Extract text from pdf
pdfFileObject = open(path,'rb') pdfReader = PyPDF2.PdfFileReader(pdfFileObject) num_pages = pdfReader.numPages count = 0 text = "" while count < num_pages: pageObj = pdfReader.getPage(count) count +=1 text += pageObj.extractText() if text != "": text = text else: text = textract.process(fileurl, method='tesseract', language='eng')