import {
	getDocument,
	PDFDocumentLoadingTask,
	PDFDocumentProxy,
} from 'pdfjs-dist'
import { configurePdfWorker } from '../../../utils/pdfConfig'

configurePdfWorker()

const usePdfTextExtractor = () => {
	const extractText = async (file: File): Promise<string> => {
		try {
			const arrayBuffer = await file.arrayBuffer()
			const uint8Array = new Uint8Array(arrayBuffer)

			const loadingTask: PDFDocumentLoadingTask = getDocument({
				data: uint8Array,
			})

			loadingTask.onProgress = progress => {
				console.log(
					`Loading progress: ${(progress.loaded / progress.total) * 100}%`,
				)
			}

			const pdf: PDFDocumentProxy = await loadingTask.promise

			let extractedText = ''
			for (let pageNum = 1; pageNum <= pdf.numPages; pageNum++) {
				try {
					const page = await pdf.getPage(pageNum)
					const textContent = await page.getTextContent()
					const pageText = textContent.items
						.map(item => ('str' in item ? item.str : ''))
						.join(' ')
					extractedText += pageText
				} catch (pageError) {
					console.error(`Error processing page ${pageNum}:`, pageError)
				}
			}

			if (extractedText.length === 0) {
				throw new Error('No text could be extracted from the PDF')
			}

			return extractedText
		} catch (error) {
			console.error('Error in extractText:', error)
			throw error
		}
	}

	return { extractText }
}

export default usePdfTextExtractor
