Spaces:

Josebert
/

JR_SmartOCR

Runtime error

App Files Files Community

Josebert commited on Mar 24, 2025

Commit

5a52e06

verified ·

1 Parent(s): f819b18

Update app.py

Browse files

Files changed (1) hide show

app.py +73 -98

app.py CHANGED Viewed

@@ -2,11 +2,14 @@ import os
 import base64
 import gradio as gr
 import logging
-from datetime import datetime
 from huggingface_hub import InferenceClient
 from PIL import Image
 import io
 # Optional imports with error handling
 try:
     from pdf2image import convert_from_path
@@ -16,19 +19,11 @@ except ImportError:
     HAS_OCR = False
     print("OCR features will be disabled. Install pdf2image and easyocr for full functionality.")
-# Configure logging
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(levelname)s - %(message)s'
 )
-logger = logging.getLogger(__name__)
-# API configuration
-api_token = os.getenv("HUGGINGFACE_TOKEN")
-if not api_token:
-    raise ValueError("API token not found. Set HUGGINGFACE_TOKEN in .env file or environment variables.")
-client = InferenceClient(token=api_token)
 def initialize_ocr():
     """Initialize OCR with error handling"""
@@ -42,58 +37,57 @@ def initialize_ocr():
 reader = initialize_ocr()
-def encode_image_to_base64(image_path):
-    """Convert image to base64 string"""
-    with open(image_path, "rb") as image_file:
-        return base64.b64encode(image_file.read()).decode('utf-8')
 def make_api_call(text_content, image_path=None, retries=3):
     """Enhanced API call handler with retries and image support"""
-    messages = [
-        {
             "role": "user",
-            "content": [
-                {
-                    "type": "text",
-                    "text": f"""Analyze this document content and provide:
-                    1. Corrected text with proper formatting
-                    2. Brief summary
-                    3. Key points or important information
-                    4. Any detected entities (dates, names, numbers)
-                    Content: {text_content}"""
-                }
-            ]
-        }
-    ]
-    if image_path and os.path.exists(image_path):
-        base64_image = encode_image_to_base64(image_path)
-        messages[0]["content"].append({
-            "type": "image_url",
-            "image_url": {
-                "url": f"data:image/jpeg;base64,{base64_image}"
-            }
-        })
-    for attempt in range(retries):
-        try:
-            response = client.text_generation(
-                model="google/gemma-7b-it",
-                prompt=str(messages),
-                max_new_tokens=1000,
-                temperature=0.7,
-                top_p=0.95,
-            )
-            return response
-        except Exception as e:
-            logger.error(f"API Error (attempt {attempt + 1}/{retries}): {e}")
-            if attempt == retries - 1:
-                return f"Error processing request: {str(e)}"
-            continue
 def process_document(file_path):
-    """Process document with improved error handling"""
     if not HAS_OCR or not reader:
         return "OCR functionality is not available. Please install required packages."
@@ -105,7 +99,6 @@ def process_document(file_path):
             images = convert_from_path(file_path)
             extracted_text = ""
             for i, image in enumerate(images):
-                logger.info(f"Processing page {i+1}/{len(images)}")
                 temp_path = f"temp_page_{i}.jpg"
                 image.save(temp_path)
                 ocr_results = reader.readtext(temp_path, detail=0)
@@ -137,44 +130,26 @@ def process_and_analyze(file):
         logger.error(f"Error in processing: {e}")
         return f"Error: {str(e)}"
-# Interface styling
-css = """
-.gradio-container { font-family: 'Arial', sans-serif !important; max-width: 1200px !important; }
-.gr-button { background-color: #2e5090 !important; color: white !important; }
-.gr-button:hover { opacity: 0.9 !important; }
-.gr-form { background-color: #f8f9fa !important; border-radius: 10px !important; }
-"""
-# Create interface
-with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# 📄 Smart Document Analyzer")
-    with gr.Row():
-        with gr.Column():
-            file_input = gr.File(
-                label="Upload Document (PDF, PNG, JPG)",
-                file_types=[".pdf", ".png", ".jpg", ".jpeg"]
-            )
-            process_btn = gr.Button("📝 Analyze Document", variant="primary")
-        with gr.Column():
-            output = gr.Textbox(
-                label="Analysis Results",
-                lines=15,
-                show_copy_button=True
-            )
-    gr.Markdown("### 📋 Instructions\n" +
-                "1. Upload a PDF or image file\n" +
-                "2. Click 'Analyze Document'\n" +
-                "3. Wait for the analysis results\n")
-    process_btn.click(
-        fn=process_and_analyze,
-        inputs=file_input,
-        outputs=output,
-        api_name="analyze"
-    )
 if __name__ == "__main__":
     demo.launch()

 import base64
 import gradio as gr
 import logging
 from huggingface_hub import InferenceClient
 from PIL import Image
 import io
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
 # Optional imports with error handling
 try:
     from pdf2image import convert_from_path
     HAS_OCR = False
     print("OCR features will be disabled. Install pdf2image and easyocr for full functionality.")
+# Initialize Hugging Face client with default token for Spaces
+client = InferenceClient(
+    model="google/gemma-7b-it",
+    token=None  # Will use HF_TOKEN from secrets in Spaces
 )
 def initialize_ocr():
     """Initialize OCR with error handling"""
 reader = initialize_ocr()
 def make_api_call(text_content, image_path=None, retries=3):
     """Enhanced API call handler with retries and image support"""
+    try:
+        # Prepare message for chat completion
+        messages = [{
             "role": "user",
+            "content": [{
+                "type": "text",
+                "text": f"""Analyze this document and provide:
+                1. Corrected text
+                2. Brief summary
+                3. Key points
+                4. Detected entities (dates, names, numbers)
+                Content: {text_content}"""
+            }]
+        }]
+        # Add image if available
+        if image_path and os.path.exists(image_path):
+            with open(image_path, "rb") as img_file:
+                base64_image = base64.b64encode(img_file.read()).decode('utf-8')
+                messages[0]["content"].append({
+                    "type": "image_url",
+                    "image_url": {
+                        "url": f"data:image/jpeg;base64,{base64_image}"
+                    }
+                })
+        # Make API call with retries
+        for attempt in range(retries):
+            try:
+                completion = client.chat.completions.create(
+                    model="google/gemma-7b-it",
+                    messages=messages,
+                    max_tokens=1000,
+                    temperature=0.7
+                )
+                return completion.choices[0].message.content
+            except Exception as e:
+                if attempt == retries - 1:
+                    raise e
+                logger.warning(f"Attempt {attempt + 1} failed, retrying...")
+                continue
+    except Exception as e:
+        logger.error(f"API call failed: {e}")
+        return f"Error processing request: {str(e)}"
 def process_document(file_path):
+    """Process document with OCR"""
     if not HAS_OCR or not reader:
         return "OCR functionality is not available. Please install required packages."
             images = convert_from_path(file_path)
             extracted_text = ""
             for i, image in enumerate(images):
                 temp_path = f"temp_page_{i}.jpg"
                 image.save(temp_path)
                 ocr_results = reader.readtext(temp_path, detail=0)
         logger.error(f"Error in processing: {e}")
         return f"Error: {str(e)}"
+# Create Gradio interface
+demo = gr.Interface(
+    fn=process_and_analyze,
+    inputs=gr.File(
+        label="Upload Document (PDF, PNG, JPG)",
+        file_types=[".pdf", ".png", ".jpg", ".jpeg"]
+    ),
+    outputs=gr.Textbox(
+        label="Analysis Results",
+        lines=15,
+        show_copy_button=True
+    ),
+    title="📄 Smart Document Analyzer",
+    description="Upload a document to analyze its content using AI.",
+    theme=gr.themes.Soft(),
+    css="""
+        .gradio-container { max-width: 1200px !important; }
+        .gr-button { background-color: #2e5090 !important; }
+    """
+)
 if __name__ == "__main__":
     demo.launch()