Spaces:

iamnew123
/

lawdocker

Runtime error

lawdocker / app.py

Update app.py

26f3eec verified 7 months ago

1.36 kB

	import gradio as gr
	from PyPDF2 import PdfReader
	from pdf2image import convert_from_bytes
	import pytesseract
	from PIL import Image

	def extract_text_from_pdf(pdf_path):
	try:
	reader = PdfReader(pdf_path)
	text = "\n".join(page.extract_text() or "" for page in reader.pages)
	return text.strip()
	except Exception as e:
	return f"[Error reading normal PDF] {e}"

	def extract_text_from_scanned(pdf_path):
	try:
	images = convert_from_bytes(open(pdf_path, "rb").read())
	text = ""
	for image in images:
	text += pytesseract.image_to_string(image)
	return text.strip()
	except Exception as e:
	return f"[Error reading scanned PDF] {e}"

	def process(pdf_file):
	if not pdf_file:
	return "Please upload a PDF."

	text = extract_text_from_pdf(pdf_file)
	if not text.strip():
	text = extract_text_from_scanned(pdf_file)
	return text or "❌ Could not extract text."

	with gr.Blocks() as demo:
	gr.Markdown("## 📚 Law PDF Formatter")
	with gr.Row():
	file = gr.File(file_types=[".pdf"], label="Upload PDF", type="filepath")
	btn = gr.Button("Extract Text")
	out = gr.Textbox(label="Extracted Text", lines=20, interactive=True)
	btn.click(fn=process, inputs=file, outputs=out)

	demo.launch(share=True, flagging_callback=None)