Spaces:

Josebert
/

JR_SmartOCR

Runtime error

App Files Files Community

Josebert commited on Mar 22, 2025

Commit

fb37c01

verified ·

1 Parent(s): 91937c7

Create app.py

Browse files

Files changed (1) hide show

app.py +72 -0

app.py ADDED Viewed

	@@ -0,0 +1,72 @@

+from vllm import LLM
+from vllm.sampling_params import SamplingParams
+from datetime import datetime, timedelta
+from huggingface_hub import hf_hub_download, login
+import os
+import gradio as gr
+from pdf2image import convert_from_path
+import easyocr
+# Initialize OCR reader
+reader = easyocr.Reader(['en'])
+# ... existing SYSTEM_PROMPT and load_system_prompt definitions ...
+def process_pdf_or_image(file_path):
+    # Handle PDF files
+    if file_path.lower().endswith('.pdf'):
+        images = convert_from_path(file_path)
+        extracted_text = ""
+        for image in images:
+            ocr_results = reader.readtext(image, detail=0)
+            extracted_text += " ".join(ocr_results) + "\n"
+    # Handle image files
+    else:
+        ocr_results = reader.readtext(file_path, detail=0)
+        extracted_text = " ".join(ocr_results)
+    return extracted_text
+def generate_response(file_path):
+    # Extract text from PDF/image
+    extracted_text = process_pdf_or_image(file_path)
+    # Prepare messages for the LLM
+    messages = [
+        {"role": "system", "content": SYSTEM_PROMPT},
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "text",
+                    "text": f"Process this extracted text, correct any errors and enhance it:\n{extracted_text}",
+                }
+            ],
+        },
+    ]
+    # Initialize the LLM
+    llm = LLM(model="mistralai/Mistral-Small-3.1-24B", tokenizer_mode="mistral")
+    # Define sampling parameters
+    sampling_params = SamplingParams(max_tokens=512, temperature=0.15)
+    # Get the response from the LLM
+    outputs = llm.chat(messages, sampling_params=sampling_params)
+    return outputs[0].outputs[0].text
+# Gradio interface
+with gr.Blocks() as demo:
+    gr.Markdown("# Document Processing with Mistral")
+    file_input = gr.File(label="Upload PDF or Image")
+    output_text = gr.Textbox(label="Processed Text", lines=10)
+    submit_btn = gr.Button("Process Document")
+    submit_btn.click(
+        fn=generate_response,
+        inputs=file_input,
+        outputs=output_text
+    )
+demo.launch()