Abdulahad79 commited on
Commit
2e4035a
Β·
verified Β·
1 Parent(s): 4deac87
Files changed (1) hide show
  1. app.py +35 -27
app.py CHANGED
@@ -4,40 +4,40 @@ from docx import Document
4
  from PIL import Image
5
  import os
6
 
7
- # 1. API Configuration
8
- # In 2026, 'gemini-3-flash' is the recommended model for speed/OCR tasks.
9
- MODEL_ID = 'gemini-3-flash'
10
  api_key = os.getenv("GEMINI_API_KEY")
11
 
12
  if api_key:
13
  genai.configure(api_key=api_key)
14
  else:
15
- print("Warning: GEMINI_API_KEY not found in environment variables.")
16
 
17
  def process_document(input_img):
18
  if input_img is None:
19
  return None, "Error: No image uploaded.", ""
20
 
21
  if not api_key:
22
- return None, "Error: API Key missing in Space Secrets.", ""
23
 
24
  try:
25
- # Use the latest stable model
26
  model = genai.GenerativeModel(MODEL_ID)
27
 
28
- # Convert Gradio numpy array to PIL for Gemini
29
  pil_img = Image.fromarray(input_img)
30
 
31
- # Optimized prompt for structured document extraction
32
  prompt = """
33
- Extract all text from this image accurately.
34
- - Identify titles and center them.
35
- - Preserve bold and italic text where possible.
36
- - Maintain the original paragraph structure.
37
- - If there are tables or lists, format them clearly.
38
  """
39
 
40
- # Generate content
41
  response = model.generate_content([prompt, pil_img])
42
 
43
  if not response or not response.text:
@@ -51,12 +51,13 @@ def process_document(input_img):
51
  clean_line = line.strip()
52
  if clean_line:
53
  p = doc.add_paragraph()
54
- # Basic markdown-to-docx style handling
55
- run = p.add_run(clean_line.replace('**', '').replace('*', ''))
 
56
  if '**' in line: run.bold = True
57
  if '*' in line and '**' not in line: run.italic = True
58
 
59
- output_path = "Handwritten_Notes_Converted.docx"
60
  doc.save(output_path)
61
 
62
  return output_path, "βœ… Conversion Successful!", extracted_text
@@ -64,22 +65,29 @@ def process_document(input_img):
64
  except Exception as e:
65
  return None, f"❌ System Error: {str(e)}", ""
66
 
67
- # --- Gradio Professional UI ---
68
- with gr.Blocks(theme=gr.themes.Soft(), title="Smart OCR 2026") as demo:
69
- gr.Markdown("# πŸ–‹οΈ AI Document Architect")
70
- gr.Markdown("Convert messy handwriting or scans into formatted Word docs using Gemini 3 Flash.")
71
 
72
  with gr.Row():
73
  with gr.Column(scale=1):
74
- input_image = gr.Image(label="Upload Document Scan", type="numpy")
75
- submit_btn = gr.Button("πŸš€ Start AI Conversion", variant="primary")
 
 
 
 
 
 
 
76
 
77
  with gr.Column(scale=1):
78
- status_msg = gr.Textbox(label="System Status", interactive=False)
79
- download_link = gr.File(label="πŸ“„ Download Word Document")
80
 
81
- with gr.Accordion("Text Preview & Manual Edit", open=False):
82
- extracted_text = gr.TextArea(label="Extracted Content", lines=12)
83
 
84
  submit_btn.click(
85
  fn=process_document,
 
4
  from PIL import Image
5
  import os
6
 
7
+ # 1. API Configuration using Hugging Face Secret
8
+ # In 2026, 'gemini-3-flash-preview' is the most stable high-speed model
9
+ MODEL_ID = 'gemini-3-flash-preview'
10
  api_key = os.getenv("GEMINI_API_KEY")
11
 
12
  if api_key:
13
  genai.configure(api_key=api_key)
14
  else:
15
+ print("Warning: GEMINI_API_KEY not found in environment secrets.")
16
 
17
  def process_document(input_img):
18
  if input_img is None:
19
  return None, "Error: No image uploaded.", ""
20
 
21
  if not api_key:
22
+ return None, "Error: API Key missing in Space Secrets (GEMINI_API_KEY).", ""
23
 
24
  try:
25
+ # Load the 2026 stable Flash model
26
  model = genai.GenerativeModel(MODEL_ID)
27
 
28
+ # Convert Gradio numpy image to PIL for Gemini
29
  pil_img = Image.fromarray(input_img)
30
 
31
+ # Expert prompt for high-fidelity document extraction
32
  prompt = """
33
+ Extract all text from this document accurately.
34
+ - Identify titles and align them correctly.
35
+ - Preserve Bold and Italic formatting.
36
+ - Group lines into logical paragraphs.
37
+ - If there are handwritten notes, transcribe them faithfully.
38
  """
39
 
40
+ # Generate Content
41
  response = model.generate_content([prompt, pil_img])
42
 
43
  if not response or not response.text:
 
51
  clean_line = line.strip()
52
  if clean_line:
53
  p = doc.add_paragraph()
54
+ # Basic cleaning of markdown tags if Gemini adds them
55
+ text_to_write = clean_line.replace('**', '').replace('*', '')
56
+ run = p.add_run(text_to_write)
57
  if '**' in line: run.bold = True
58
  if '*' in line and '**' not in line: run.italic = True
59
 
60
+ output_path = "Converted_Document.docx"
61
  doc.save(output_path)
62
 
63
  return output_path, "βœ… Conversion Successful!", extracted_text
 
65
  except Exception as e:
66
  return None, f"❌ System Error: {str(e)}", ""
67
 
68
+ # --- Gradio UI Setup ---
69
+ with gr.Blocks(theme=gr.themes.Soft(), title="Gemini 3 Smart OCR") as demo:
70
+ gr.Markdown("# πŸ–‹οΈ AI Document Architect (Gemini 3)")
71
+ gr.Markdown("Convert messy handwriting or document scans into formatted Word files instantly.")
72
 
73
  with gr.Row():
74
  with gr.Column(scale=1):
75
+ input_image = gr.Image(label="Source Image", type="numpy")
76
+ submit_btn = gr.Button("πŸš€ Convert to Word", variant="primary")
77
+
78
+ # --- Added Example Images ---
79
+ gr.Examples(
80
+ examples=["image1.jpg", "image2.jpg"],
81
+ inputs=input_image,
82
+ label="Sample Notes"
83
+ )
84
 
85
  with gr.Column(scale=1):
86
+ status_msg = gr.Textbox(label="Status", interactive=False)
87
+ download_link = gr.File(label="πŸ“„ Download Word File")
88
 
89
+ with gr.Accordion("Review Extracted Text", open=False):
90
+ extracted_text = gr.TextArea(label="Text Preview", lines=12)
91
 
92
  submit_btn.click(
93
  fn=process_document,