Spaces:

Abdulahad79
/

simpleOCR

Sleeping

App Files Files Community

Abdulahad79 commited on Jan 29

Commit

2e4035a

verified ·

1 Parent(s): 4deac87

jj

Browse files

Files changed (1) hide show

app.py +35 -27

app.py CHANGED Viewed

@@ -4,40 +4,40 @@ from docx import Document
 from PIL import Image
 import os
-# 1. API Configuration
-# In 2026, 'gemini-3-flash' is the recommended model for speed/OCR tasks.
-MODEL_ID = 'gemini-3-flash'
 api_key = os.getenv("GEMINI_API_KEY")
 if api_key:
     genai.configure(api_key=api_key)
 else:
-    print("Warning: GEMINI_API_KEY not found in environment variables.")
 def process_document(input_img):
     if input_img is None:
         return None, "Error: No image uploaded.", ""
     if not api_key:
-        return None, "Error: API Key missing in Space Secrets.", ""
     try:
-        # Use the latest stable model
         model = genai.GenerativeModel(MODEL_ID)
-        # Convert Gradio numpy array to PIL for Gemini
         pil_img = Image.fromarray(input_img)
-        # Optimized prompt for structured document extraction
         prompt = """
-        Extract all text from this image accurately.
-        - Identify titles and center them.
-        - Preserve bold and italic text where possible.
-        - Maintain the original paragraph structure.
-        - If there are tables or lists, format them clearly.
         """
-        # Generate content
         response = model.generate_content([prompt, pil_img])
         if not response or not response.text:
@@ -51,12 +51,13 @@ def process_document(input_img):
             clean_line = line.strip()
             if clean_line:
                 p = doc.add_paragraph()
-                # Basic markdown-to-docx style handling
-                run = p.add_run(clean_line.replace('**', '').replace('*', ''))
                 if '**' in line: run.bold = True
                 if '*' in line and '**' not in line: run.italic = True
-        output_path = "Handwritten_Notes_Converted.docx"
         doc.save(output_path)
         return output_path, "✅ Conversion Successful!", extracted_text
@@ -64,22 +65,29 @@ def process_document(input_img):
     except Exception as e:
         return None, f"❌ System Error: {str(e)}", ""
-# --- Gradio Professional UI ---
-with gr.Blocks(theme=gr.themes.Soft(), title="Smart OCR 2026") as demo:
-    gr.Markdown("# 🖋️ AI Document Architect")
-    gr.Markdown("Convert messy handwriting or scans into formatted Word docs using Gemini 3 Flash.")
     with gr.Row():
         with gr.Column(scale=1):
-            input_image = gr.Image(label="Upload Document Scan", type="numpy")
-            submit_btn = gr.Button("🚀 Start AI Conversion", variant="primary")
         with gr.Column(scale=1):
-            status_msg = gr.Textbox(label="System Status", interactive=False)
-            download_link = gr.File(label="📄 Download Word Document")
-    with gr.Accordion("Text Preview & Manual Edit", open=False):
-        extracted_text = gr.TextArea(label="Extracted Content", lines=12)
     submit_btn.click(
         fn=process_document,

 from PIL import Image
 import os
+# 1. API Configuration using Hugging Face Secret
+# In 2026, 'gemini-3-flash-preview' is the most stable high-speed model
+MODEL_ID = 'gemini-3-flash-preview'
 api_key = os.getenv("GEMINI_API_KEY")
 if api_key:
     genai.configure(api_key=api_key)
 else:
+    print("Warning: GEMINI_API_KEY not found in environment secrets.")
 def process_document(input_img):
     if input_img is None:
         return None, "Error: No image uploaded.", ""
     if not api_key:
+        return None, "Error: API Key missing in Space Secrets (GEMINI_API_KEY).", ""
     try:
+        # Load the 2026 stable Flash model
         model = genai.GenerativeModel(MODEL_ID)
+        # Convert Gradio numpy image to PIL for Gemini
         pil_img = Image.fromarray(input_img)
+        # Expert prompt for high-fidelity document extraction
         prompt = """
+        Extract all text from this document accurately.
+        - Identify titles and align them correctly.
+        - Preserve Bold and Italic formatting.
+        - Group lines into logical paragraphs.
+        - If there are handwritten notes, transcribe them faithfully.
         """
+        # Generate Content
         response = model.generate_content([prompt, pil_img])
         if not response or not response.text:
             clean_line = line.strip()
             if clean_line:
                 p = doc.add_paragraph()
+                # Basic cleaning of markdown tags if Gemini adds them
+                text_to_write = clean_line.replace('**', '').replace('*', '')
+                run = p.add_run(text_to_write)
                 if '**' in line: run.bold = True
                 if '*' in line and '**' not in line: run.italic = True
+        output_path = "Converted_Document.docx"
         doc.save(output_path)
         return output_path, "✅ Conversion Successful!", extracted_text
     except Exception as e:
         return None, f"❌ System Error: {str(e)}", ""
+# --- Gradio UI Setup ---
+with gr.Blocks(theme=gr.themes.Soft(), title="Gemini 3 Smart OCR") as demo:
+    gr.Markdown("# 🖋️ AI Document Architect (Gemini 3)")
+    gr.Markdown("Convert messy handwriting or document scans into formatted Word files instantly.")
     with gr.Row():
         with gr.Column(scale=1):
+            input_image = gr.Image(label="Source Image", type="numpy")
+            submit_btn = gr.Button("🚀 Convert to Word", variant="primary")
+            # --- Added Example Images ---
+            gr.Examples(
+                examples=["image1.jpg", "image2.jpg"],
+                inputs=input_image,
+                label="Sample Notes"
+            )
         with gr.Column(scale=1):
+            status_msg = gr.Textbox(label="Status", interactive=False)
+            download_link = gr.File(label="📄 Download Word File")
+    with gr.Accordion("Review Extracted Text", open=False):
+        extracted_text = gr.TextArea(label="Text Preview", lines=12)
     submit_btn.click(
         fn=process_document,