Spaces:

Josebert
/

JR_SmartOCR

Runtime error

App Files Files Community

Josebert commited on Mar 24, 2025

Commit

c531117

verified ·

1 Parent(s): 43dd50a

Update app.py

Browse files

Files changed (1) hide show

app.py +100 -111

app.py CHANGED Viewed

@@ -1,10 +1,15 @@
 import os
 import gradio as gr
-import requests
-import json
 import logging
 from datetime import datetime
-import random
 # Optional imports with error handling
 try:
@@ -23,13 +28,11 @@ logging.basicConfig(
 logger = logging.getLogger(__name__)
 # API configuration
-api_token = os.getenv("API_TOKEN")
 if not api_token:
-    raise ValueError("API token not found. Make sure 'API_TOKEN' is set in the Secrets.")
-API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.3"
-HEADERS = {"Authorization": f"Bearer {api_token}"}
-TIMEOUT = 30  # seconds
 def initialize_ocr():
     """Initialize OCR with error handling"""
@@ -41,34 +44,56 @@ def initialize_ocr():
         logger.error(f"Failed to initialize OCR: {e}")
         return None
-# Initialize OCR reader
 reader = initialize_ocr()
-def make_api_call(prompt, params, retries=3):
-    """Enhanced API call handler with retries"""
-    payload = {
-        "inputs": f"{prompt} [ts:{params['timestamp']}]",
-        "parameters": {
-            "temperature": params["temperature"],
-            "top_p": params["top_p"],
-            "max_tokens": 1000
         }
-    }
     for attempt in range(retries):
         try:
-            response = requests.post(
-                API_URL,
-                headers=HEADERS,
-                json=payload,
-                timeout=TIMEOUT
             )
-            response.raise_for_status()
-            return response.json()
-        except requests.exceptions.RequestException as e:
             logger.error(f"API Error (attempt {attempt + 1}/{retries}): {e}")
             if attempt == retries - 1:
-                return None
             continue
 def process_document(file_path):
@@ -85,8 +110,11 @@ def process_document(file_path):
             extracted_text = ""
             for i, image in enumerate(images):
                 logger.info(f"Processing page {i+1}/{len(images)}")
-                ocr_results = reader.readtext(image, detail=0)
                 extracted_text += " ".join(ocr_results) + "\n"
         else:
             ocr_results = reader.readtext(file_path, detail=0)
             extracted_text = " ".join(ocr_results)
@@ -96,100 +124,61 @@ def process_document(file_path):
         logger.error(f"Error processing document: {e}")
         return f"Error processing document: {str(e)}"
-def generate_response(file_path):
-    """Generate response with better error handling"""
     try:
-        extracted_text = process_document(file_path)
-        if not extracted_text or extracted_text.startswith("Error"):
             return extracted_text
-        params = {
-            "temperature": random.uniform(0.7, 0.9),
-            "top_p": random.uniform(0.85, 0.95),
-            "timestamp": datetime.now().strftime("%H%M%S")
-        }
-        prompt = f"""Process and enhance this text:
-        {extracted_text}
-        Provide:
-        1. Corrected text
-        2. Summary
-        3. Key points
-        """
-        result = make_api_call(prompt, params)
-        if result and isinstance(result, list):
-            return result[0].get("generated_text", "No valid response from model.")
-        return "Error: Failed to process the document."
     except Exception as e:
-        logger.error(f"Error generating response: {e}")
         return f"Error: {str(e)}"
-# Interface styling with improved CSS
 css = """
-.gradio-container {
-    font-family: 'Arial', sans-serif !important;
-    max-width: 1200px !important;
-    margin: auto !important;
-}
-.gr-button {
-    background-color: #2e5090 !important;
-    color: white !important;
-    transition: all 0.3s ease !important;
-}
-.gr-button:hover {
-    opacity: 0.9 !important;
-}
-.gr-input {
-    border: 2px solid #ddd !important;
-    border-radius: 8px !important;
-    padding: 8px !important;
-}
-.gr-form {
-    background-color: #f8f9fa !important;
-    padding: 20px !important;
-    border-radius: 10px !important;
-}
 """
-# Create interface with better organization
-with gr.Blocks(css=css, theme=gr.themes.Default()) as demo:
-    gr.Markdown("# Document Processing with Mistral")
-    with gr.Tabs():
-        with gr.Tab("Document Processing"):
-            with gr.Column():
-                file_input = gr.File(
-                    label="Upload PDF or Image",
-                    file_types=[".pdf", ".png", ".jpg", ".jpeg"]
-                )
-                output_text = gr.Textbox(
-                    label="Processed Text",
-                    lines=15,
-                    show_copy_button=True
-                )
-                submit_btn = gr.Button(
-                    "Process Document",
-                    variant="primary"
-                )
-            submit_btn.click(
-                fn=generate_response,
-                inputs=file_input,
-                outputs=output_text,
-                api_name="process_document"
             )
 if __name__ == "__main__":
-    # Check dependencies
-    if not HAS_OCR:
-        print("Warning: OCR features are disabled. Install required packages for full functionality.")
-    # Launch with optimized settings
-    demo.launch(
-        share=True,
-        server_name="0.0.0.0",
-        server_port=7860,
-        show_error=True
-    )

 import os
+import base64
 import gradio as gr
 import logging
 from datetime import datetime
+from huggingface_hub import InferenceClient
+from PIL import Image
+import io
+from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
 # Optional imports with error handling
 try:
 logger = logging.getLogger(__name__)
 # API configuration
+api_token = os.getenv("HUGGINGFACE_TOKEN")
 if not api_token:
+    raise ValueError("API token not found. Set HUGGINGFACE_TOKEN in .env file or environment variables.")
+client = InferenceClient(token=api_token)
 def initialize_ocr():
     """Initialize OCR with error handling"""
         logger.error(f"Failed to initialize OCR: {e}")
         return None
 reader = initialize_ocr()
+def encode_image_to_base64(image_path):
+    """Convert image to base64 string"""
+    with open(image_path, "rb") as image_file:
+        return base64.b64encode(image_file.read()).decode('utf-8')
+def make_api_call(text_content, image_path=None, retries=3):
+    """Enhanced API call handler with retries and image support"""
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "text",
+                    "text": f"""Analyze this document content and provide:
+                    1. Corrected text with proper formatting
+                    2. Brief summary
+                    3. Key points or important information
+                    4. Any detected entities (dates, names, numbers)
+                    Content: {text_content}"""
+                }
+            ]
         }
+    ]
+    if image_path and os.path.exists(image_path):
+        base64_image = encode_image_to_base64(image_path)
+        messages[0]["content"].append({
+            "type": "image_url",
+            "image_url": {
+                "url": f"data:image/jpeg;base64,{base64_image}"
+            }
+        })
     for attempt in range(retries):
         try:
+            response = client.text_generation(
+                model="google/gemma-7b-it",
+                prompt=str(messages),
+                max_new_tokens=1000,
+                temperature=0.7,
+                top_p=0.95,
             )
+            return response
+        except Exception as e:
             logger.error(f"API Error (attempt {attempt + 1}/{retries}): {e}")
             if attempt == retries - 1:
+                return f"Error processing request: {str(e)}"
             continue
 def process_document(file_path):
             extracted_text = ""
             for i, image in enumerate(images):
                 logger.info(f"Processing page {i+1}/{len(images)}")
+                temp_path = f"temp_page_{i}.jpg"
+                image.save(temp_path)
+                ocr_results = reader.readtext(temp_path, detail=0)
                 extracted_text += " ".join(ocr_results) + "\n"
+                os.remove(temp_path)
         else:
             ocr_results = reader.readtext(file_path, detail=0)
             extracted_text = " ".join(ocr_results)
         logger.error(f"Error processing document: {e}")
         return f"Error processing document: {str(e)}"
+def process_and_analyze(file):
+    """Main processing function"""
+    if not file:
+        return "Please upload a file."
     try:
+        extracted_text = process_document(file.name)
+        if extracted_text.startswith("Error") or extracted_text.startswith("OCR functionality"):
             return extracted_text
+        result = make_api_call(extracted_text, file.name)
+        return result if result else "Failed to analyze the document."
     except Exception as e:
+        logger.error(f"Error in processing: {e}")
         return f"Error: {str(e)}"
+# Interface styling
 css = """
+.gradio-container { font-family: 'Arial', sans-serif !important; max-width: 1200px !important; }
+.gr-button { background-color: #2e5090 !important; color: white !important; }
+.gr-button:hover { opacity: 0.9 !important; }
+.gr-form { background-color: #f8f9fa !important; border-radius: 10px !important; }
 """
+# Create interface
+with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 📄 Smart Document Analyzer")
+    with gr.Row():
+        with gr.Column():
+            file_input = gr.File(
+                label="Upload Document (PDF, PNG, JPG)",
+                file_types=[".pdf", ".png", ".jpg", ".jpeg"]
+            )
+            process_btn = gr.Button("📝 Analyze Document", variant="primary")
+        with gr.Column():
+            output = gr.Textbox(
+                label="Analysis Results",
+                lines=15,
+                show_copy_button=True
             )
+    gr.Markdown("### 📋 Instructions\n" +
+                "1. Upload a PDF or image file\n" +
+                "2. Click 'Analyze Document'\n" +
+                "3. Wait for the analysis results\n")
+    process_btn.click(
+        fn=process_and_analyze,
+        inputs=file_input,
+        outputs=output,
+        api_name="analyze"
+    )
 if __name__ == "__main__":
+    demo.launch()