Spaces:

Josebert
/

JR_SmartOCR

Runtime error

App Files Files Community

Josebert commited on Mar 22, 2025

Commit

2c2d398

verified ·

1 Parent(s): 178bc1c

Update app.py

Browse files

Files changed (1) hide show

app.py +111 -61

app.py CHANGED Viewed

@@ -1,78 +1,128 @@
-from vllm import LLM
-from vllm.sampling_params import SamplingParams
-from datetime import datetime, timedelta
-from huggingface_hub import hf_hub_download
 import requests
 import json
 from pdf2image import convert_from_path
 import easyocr
 # Initialize OCR reader
 reader = easyocr.Reader(['en'])
-# ... existing SYSTEM_PROMPT and load_system_prompt definitions ...
-def process_document(file_path):
-    # Handle PDF files
-    if file_path.lower().endswith('.pdf'):
-        images = convert_from_path(file_path)
-        extracted_text = ""
-        for image in images:
-            ocr_results = reader.readtext(image, detail=0)
-            extracted_text += " ".join(ocr_results) + "\n"
-    # Handle image files
-    else:
-        ocr_results = reader.readtext(file_path, detail=0)
-        extracted_text = " ".join(ocr_results)
-    return extracted_text
 def generate_response(file_path):
-    # Extract text from document
-    extracted_text = process_document(file_path)
-    # Prepare messages for the LLM
-    messages = [
-        {"role": "system", "content": SYSTEM_PROMPT},
-        {
-            "role": "user",
-            "content": [
-                {
-                    "type": "text",
-                    "text": f"Process this extracted text, correct any errors and enhance it:\n{extracted_text}",
-                }
-            ],
-        },
-    ]
-    # Initialize the LLM
-    llm = LLM(model="mistralai/Mistral-Small-3.1-24B-Instruct-2503", tokenizer_mode="mistral")
-    # Define sampling parameters
-    sampling_params = SamplingParams(max_tokens=512, temperature=0.15)
-    # Get the response from the LLM
-    outputs = llm.chat(messages, sampling_params=sampling_params)
-    return outputs[0].outputs[0].text
-# Example usage
-if __name__ == "__main__":
-    document_path = "path/to/your/document.pdf"  # or .jpg/.png
-    response = generate_response(document_path)
-    print(response)
-# Gradio interface
-with gr.Blocks() as demo:
     gr.Markdown("# Document Processing with Mistral")
-    file_input = gr.File(label="Upload PDF or Image")
-    output_text = gr.Textbox(label="Processed Text", lines=10)
-    submit_btn = gr.Button("Process Document")
-    submit_btn.click(
-        fn=generate_response,
-        inputs=file_input,
-        outputs=output_text
-    )
-demo.launch()

+import os
+import gradio as gr
 import requests
 import json
+import logging
+from datetime import datetime
+import random
 from pdf2image import convert_from_path
 import easyocr
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# API configuration
+api_token = os.getenv("API_TOKEN")
+if not api_token:
+    raise ValueError("API token not found. Make sure 'API_TOKEN' is set in the Secrets.")
+API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.3"
+HEADERS = {"Authorization": f"Bearer {api_token}"}
 # Initialize OCR reader
 reader = easyocr.Reader(['en'])
+# Define a system prompt (example)
+SYSTEM_PROMPT = "You are a helpful assistant that corrects and enhances text."
+def get_unique_parameters():
+    """Generate unique parameters for each request"""
+    return {
+        "temperature": random.uniform(0.7, 0.9),
+        "top_p": random.uniform(0.85, 0.95),
+        "timestamp": datetime.now().strftime("%H%M%S"),
+    }
+def make_api_call(prompt, params):
+    """Unified API call handler"""
+    payload = {
+        "inputs": f"{prompt} [ts:{params['timestamp']}]",
+        "parameters": {
+            "temperature": params["temperature"],
+            "top_p": params["top_p"]
+        }
+    }
+    try:
+        response = requests.post(API_URL, headers=HEADERS, json=payload)
+        response.raise_for_status()
+        return response.json()
+    except Exception as e:
+        logger.error(f"API Error: {e}")
+        return None
+def process_document(file_path):
+    try:
+        # Handle PDF files
+        if file_path.lower().endswith('.pdf'):
+            images = convert_from_path(file_path)
+            extracted_text = ""
+            for image in images:
+                ocr_results = reader.readtext(image, detail=0)
+                extracted_text += " ".join(ocr_results) + "\n"
+        # Handle image files
+        else:
+            ocr_results = reader.readtext(file_path, detail=0)
+            extracted_text = " ".join(ocr_results)
+        return extracted_text
+    except Exception as e:
+        logger.error(f"Error processing document: {e}")
+        return ""
 def generate_response(file_path):
+    try:
+        # Extract text from document
+        extracted_text = process_document(file_path)
+        if not extracted_text.strip():
+            return "No text extracted from the document."
+        params = get_unique_parameters()
+        prompt = f"Process this extracted text, correct any errors and enhance it:\n{extracted_text}"
+        result = make_api_call(prompt, params)
+        if result:
+            return result[0].get("generated_text", "No response from the model.")
+        else:
+            return "Error processing the document."
+    except Exception as e:
+        logger.error(f"Error generating response: {e}")
+        return "Error processing the document."
+# Interface styling
+css = """
+.gradio-container {
+    font-family: 'Arial', sans-serif !important;
+    max-width: 1200px !important;
+    margin: auto !important;
+}
+.gr-button {
+    background-color: #2e5090 !important;
+    color: white !important;
+}
+.gr-input {
+    border: 2px solid #ddd !important;
+    border-radius: 8px !important;
+}
+"""
+# Create interface
+with gr.Blocks(css=css, theme=gr.themes.Default()) as demo:
     gr.Markdown("# Document Processing with Mistral")
+    with gr.Tabs():
+        with gr.Tab("Document Processing"):
+            file_input = gr.File(label="Upload PDF or Image")
+            output_text = gr.Textbox(label="Processed Text", lines=10)
+            submit_btn = gr.Button("Process Document")
+            submit_btn.click(
+                fn=generate_response,
+                inputs=file_input,
+                outputs=output_text
+            )
+if __name__ == "__main__":
+    demo.launch(share=True)