Spaces:

Josebert
/

JR_SmartOCR

Runtime error

App Files Files Community

Josebert commited on Mar 22, 2025

Commit

8e3b552

verified ·

1 Parent(s): 924ef97

Update app.py

Browse files

Files changed (1) hide show

app.py +114 -47

app.py CHANGED Viewed

@@ -5,11 +5,21 @@ import json
 import logging
 from datetime import datetime
 import random
-from pdf2image import convert_from_path
-import easyocr
 # Configure logging
-logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 # API configuration
@@ -19,79 +29,104 @@ if not api_token:
 API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.3"
 HEADERS = {"Authorization": f"Bearer {api_token}"}
-# Initialize OCR reader
-reader = easyocr.Reader(['en'])
-# Define a system prompt (example)
-SYSTEM_PROMPT = "You are a helpful assistant that corrects and enhances text."
-def get_unique_parameters():
-    """Generate unique parameters for each request"""
-    return {
-        "temperature": random.uniform(0.7, 0.9),
-        "top_p": random.uniform(0.85, 0.95),
-        "timestamp": datetime.now().strftime("%H%M%S"),
-    }
-def make_api_call(prompt, params):
-    """Unified API call handler"""
     payload = {
         "inputs": f"{prompt} [ts:{params['timestamp']}]",
         "parameters": {
             "temperature": params["temperature"],
-            "top_p": params["top_p"]
         }
     }
-    try:
-        response = requests.post(API_URL, headers=HEADERS, json=payload)
-        response.raise_for_status()
-        return response.json()
-    except Exception as e:
-        logger.error(f"API Error: {e}")
-        return None
 def process_document(file_path):
     try:
-        # Handle PDF files
         if file_path.lower().endswith('.pdf'):
             images = convert_from_path(file_path)
             extracted_text = ""
-            for image in images:
                 ocr_results = reader.readtext(image, detail=0)
                 extracted_text += " ".join(ocr_results) + "\n"
-        # Handle image files
         else:
             ocr_results = reader.readtext(file_path, detail=0)
             extracted_text = " ".join(ocr_results)
-        return extracted_text
     except Exception as e:
         logger.error(f"Error processing document: {e}")
-        return ""
 def generate_response(file_path):
     try:
-        # Extract text from document
         extracted_text = process_document(file_path)
-        if not extracted_text.strip():
-            return "No text extracted from the document."
-        params = get_unique_parameters()
-        prompt = f"Process this extracted text, correct any errors and enhance it:\n{extracted_text}"
         result = make_api_call(prompt, params)
-        if result:
-            return result[0].get("generated_text", "No response from the model.")
-        else:
-            return "Error processing the document."
     except Exception as e:
         logger.error(f"Error generating response: {e}")
-        return "Error processing the document."
-# Interface styling
 css = """
 .gradio-container {
     font-family: 'Arial', sans-serif !important;
@@ -101,28 +136,60 @@ css = """
 .gr-button {
     background-color: #2e5090 !important;
     color: white !important;
 }
 .gr-input {
     border: 2px solid #ddd !important;
     border-radius: 8px !important;
 }
 """
-# Create interface
 with gr.Blocks(css=css, theme=gr.themes.Default()) as demo:
     gr.Markdown("# Document Processing with Mistral")
     with gr.Tabs():
         with gr.Tab("Document Processing"):
-            file_input = gr.File(label="Upload PDF or Image")
-            output_text = gr.Textbox(label="Processed Text", lines=10)
-            submit_btn = gr.Button("Process Document")
             submit_btn.click(
                 fn=generate_response,
                 inputs=file_input,
-                outputs=output_text
             )
 if __name__ == "__main__":
-    demo.launch(share=True)

 import logging
 from datetime import datetime
 import random
+# Optional imports with error handling
+try:
+    from pdf2image import convert_from_path
+    import easyocr
+    HAS_OCR = True
+except ImportError:
+    HAS_OCR = False
+    print("OCR features will be disabled. Install pdf2image and easyocr for full functionality.")
 # Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s'
+)
 logger = logging.getLogger(__name__)
 # API configuration
 API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.3"
 HEADERS = {"Authorization": f"Bearer {api_token}"}
+TIMEOUT = 30  # seconds
+def initialize_ocr():
+    """Initialize OCR with error handling"""
+    if not HAS_OCR:
+        return None
+    try:
+        return easyocr.Reader(['en'])
+    except Exception as e:
+        logger.error(f"Failed to initialize OCR: {e}")
+        return None
+# Initialize OCR reader
+reader = initialize_ocr()
+def make_api_call(prompt, params, retries=3):
+    """Enhanced API call handler with retries"""
     payload = {
         "inputs": f"{prompt} [ts:{params['timestamp']}]",
         "parameters": {
             "temperature": params["temperature"],
+            "top_p": params["top_p"],
+            "max_tokens": 1000
         }
     }
+    for attempt in range(retries):
+        try:
+            response = requests.post(
+                API_URL,
+                headers=HEADERS,
+                json=payload,
+                timeout=TIMEOUT
+            )
+            response.raise_for_status()
+            return response.json()
+        except requests.exceptions.RequestException as e:
+            logger.error(f"API Error (attempt {attempt + 1}/{retries}): {e}")
+            if attempt == retries - 1:
+                return None
+            continue
 def process_document(file_path):
+    """Process document with improved error handling"""
+    if not HAS_OCR or not reader:
+        return "OCR functionality is not available. Please install required packages."
+    if not os.path.exists(file_path):
+        return "File not found."
     try:
         if file_path.lower().endswith('.pdf'):
             images = convert_from_path(file_path)
             extracted_text = ""
+            for i, image in enumerate(images):
+                logger.info(f"Processing page {i+1}/{len(images)}")
                 ocr_results = reader.readtext(image, detail=0)
                 extracted_text += " ".join(ocr_results) + "\n"
         else:
             ocr_results = reader.readtext(file_path, detail=0)
             extracted_text = " ".join(ocr_results)
+        return extracted_text.strip() or "No text extracted from the document."
     except Exception as e:
         logger.error(f"Error processing document: {e}")
+        return f"Error processing document: {str(e)}"
 def generate_response(file_path):
+    """Generate response with better error handling"""
     try:
         extracted_text = process_document(file_path)
+        if not extracted_text or extracted_text.startswith("Error"):
+            return extracted_text
+        params = {
+            "temperature": random.uniform(0.7, 0.9),
+            "top_p": random.uniform(0.85, 0.95),
+            "timestamp": datetime.now().strftime("%H%M%S")
+        }
+        prompt = f"""Process and enhance this text:
+        {extracted_text}
+        Provide:
+        1. Corrected text
+        2. Summary
+        3. Key points
+        """
         result = make_api_call(prompt, params)
+        if result and isinstance(result, list):
+            return result[0].get("generated_text", "No valid response from model.")
+        return "Error: Failed to process the document."
     except Exception as e:
         logger.error(f"Error generating response: {e}")
+        return f"Error: {str(e)}"
+# Interface styling with improved CSS
 css = """
 .gradio-container {
     font-family: 'Arial', sans-serif !important;
 .gr-button {
     background-color: #2e5090 !important;
     color: white !important;
+    transition: all 0.3s ease !important;
+}
+.gr-button:hover {
+    opacity: 0.9 !important;
 }
 .gr-input {
     border: 2px solid #ddd !important;
     border-radius: 8px !important;
+    padding: 8px !important;
+}
+.gr-form {
+    background-color: #f8f9fa !important;
+    padding: 20px !important;
+    border-radius: 10px !important;
 }
 """
+# Create interface with better organization
 with gr.Blocks(css=css, theme=gr.themes.Default()) as demo:
     gr.Markdown("# Document Processing with Mistral")
     with gr.Tabs():
         with gr.Tab("Document Processing"):
+            with gr.Column():
+                file_input = gr.File(
+                    label="Upload PDF or Image",
+                    file_types=[".pdf", ".png", ".jpg", ".jpeg"]
+                )
+                output_text = gr.Textbox(
+                    label="Processed Text",
+                    lines=15,
+                    show_copy_button=True
+                )
+                submit_btn = gr.Button(
+                    "Process Document",
+                    variant="primary"
+                )
             submit_btn.click(
                 fn=generate_response,
                 inputs=file_input,
+                outputs=output_text,
+                api_name="process_document"
             )
 if __name__ == "__main__":
+    # Check dependencies
+    if not HAS_OCR:
+        print("Warning: OCR features are disabled. Install required packages for full functionality.")
+    # Launch with optimized settings
+    demo.launch(
+        share=True,
+        server_name="0.0.0.0",
+        server_port=7860,
+        show_error=True
+    )