Spaces:
Sleeping
Sleeping
jj
Browse files
app.py
CHANGED
|
@@ -4,40 +4,40 @@ from docx import Document
|
|
| 4 |
from PIL import Image
|
| 5 |
import os
|
| 6 |
|
| 7 |
-
# 1. API Configuration
|
| 8 |
-
# In 2026, 'gemini-3-flash' is the
|
| 9 |
-
MODEL_ID = 'gemini-3-flash'
|
| 10 |
api_key = os.getenv("GEMINI_API_KEY")
|
| 11 |
|
| 12 |
if api_key:
|
| 13 |
genai.configure(api_key=api_key)
|
| 14 |
else:
|
| 15 |
-
print("Warning: GEMINI_API_KEY not found in environment
|
| 16 |
|
| 17 |
def process_document(input_img):
|
| 18 |
if input_img is None:
|
| 19 |
return None, "Error: No image uploaded.", ""
|
| 20 |
|
| 21 |
if not api_key:
|
| 22 |
-
return None, "Error: API Key missing in Space Secrets.", ""
|
| 23 |
|
| 24 |
try:
|
| 25 |
-
#
|
| 26 |
model = genai.GenerativeModel(MODEL_ID)
|
| 27 |
|
| 28 |
-
# Convert Gradio numpy
|
| 29 |
pil_img = Image.fromarray(input_img)
|
| 30 |
|
| 31 |
-
#
|
| 32 |
prompt = """
|
| 33 |
-
Extract all text from this
|
| 34 |
-
- Identify titles and
|
| 35 |
-
- Preserve
|
| 36 |
-
-
|
| 37 |
-
- If there are
|
| 38 |
"""
|
| 39 |
|
| 40 |
-
# Generate
|
| 41 |
response = model.generate_content([prompt, pil_img])
|
| 42 |
|
| 43 |
if not response or not response.text:
|
|
@@ -51,12 +51,13 @@ def process_document(input_img):
|
|
| 51 |
clean_line = line.strip()
|
| 52 |
if clean_line:
|
| 53 |
p = doc.add_paragraph()
|
| 54 |
-
# Basic markdown
|
| 55 |
-
|
|
|
|
| 56 |
if '**' in line: run.bold = True
|
| 57 |
if '*' in line and '**' not in line: run.italic = True
|
| 58 |
|
| 59 |
-
output_path = "
|
| 60 |
doc.save(output_path)
|
| 61 |
|
| 62 |
return output_path, "β
Conversion Successful!", extracted_text
|
|
@@ -64,22 +65,29 @@ def process_document(input_img):
|
|
| 64 |
except Exception as e:
|
| 65 |
return None, f"β System Error: {str(e)}", ""
|
| 66 |
|
| 67 |
-
# --- Gradio
|
| 68 |
-
with gr.Blocks(theme=gr.themes.Soft(), title="Smart OCR
|
| 69 |
-
gr.Markdown("# ποΈ AI Document Architect")
|
| 70 |
-
gr.Markdown("Convert messy handwriting or scans into formatted Word
|
| 71 |
|
| 72 |
with gr.Row():
|
| 73 |
with gr.Column(scale=1):
|
| 74 |
-
input_image = gr.Image(label="
|
| 75 |
-
submit_btn = gr.Button("π
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
|
| 77 |
with gr.Column(scale=1):
|
| 78 |
-
status_msg = gr.Textbox(label="
|
| 79 |
-
download_link = gr.File(label="π Download Word
|
| 80 |
|
| 81 |
-
with gr.Accordion("
|
| 82 |
-
extracted_text = gr.TextArea(label="
|
| 83 |
|
| 84 |
submit_btn.click(
|
| 85 |
fn=process_document,
|
|
|
|
| 4 |
from PIL import Image
|
| 5 |
import os
|
| 6 |
|
| 7 |
+
# 1. API Configuration using Hugging Face Secret
|
| 8 |
+
# In 2026, 'gemini-3-flash-preview' is the most stable high-speed model
|
| 9 |
+
MODEL_ID = 'gemini-3-flash-preview'
|
| 10 |
api_key = os.getenv("GEMINI_API_KEY")
|
| 11 |
|
| 12 |
if api_key:
|
| 13 |
genai.configure(api_key=api_key)
|
| 14 |
else:
|
| 15 |
+
print("Warning: GEMINI_API_KEY not found in environment secrets.")
|
| 16 |
|
| 17 |
def process_document(input_img):
|
| 18 |
if input_img is None:
|
| 19 |
return None, "Error: No image uploaded.", ""
|
| 20 |
|
| 21 |
if not api_key:
|
| 22 |
+
return None, "Error: API Key missing in Space Secrets (GEMINI_API_KEY).", ""
|
| 23 |
|
| 24 |
try:
|
| 25 |
+
# Load the 2026 stable Flash model
|
| 26 |
model = genai.GenerativeModel(MODEL_ID)
|
| 27 |
|
| 28 |
+
# Convert Gradio numpy image to PIL for Gemini
|
| 29 |
pil_img = Image.fromarray(input_img)
|
| 30 |
|
| 31 |
+
# Expert prompt for high-fidelity document extraction
|
| 32 |
prompt = """
|
| 33 |
+
Extract all text from this document accurately.
|
| 34 |
+
- Identify titles and align them correctly.
|
| 35 |
+
- Preserve Bold and Italic formatting.
|
| 36 |
+
- Group lines into logical paragraphs.
|
| 37 |
+
- If there are handwritten notes, transcribe them faithfully.
|
| 38 |
"""
|
| 39 |
|
| 40 |
+
# Generate Content
|
| 41 |
response = model.generate_content([prompt, pil_img])
|
| 42 |
|
| 43 |
if not response or not response.text:
|
|
|
|
| 51 |
clean_line = line.strip()
|
| 52 |
if clean_line:
|
| 53 |
p = doc.add_paragraph()
|
| 54 |
+
# Basic cleaning of markdown tags if Gemini adds them
|
| 55 |
+
text_to_write = clean_line.replace('**', '').replace('*', '')
|
| 56 |
+
run = p.add_run(text_to_write)
|
| 57 |
if '**' in line: run.bold = True
|
| 58 |
if '*' in line and '**' not in line: run.italic = True
|
| 59 |
|
| 60 |
+
output_path = "Converted_Document.docx"
|
| 61 |
doc.save(output_path)
|
| 62 |
|
| 63 |
return output_path, "β
Conversion Successful!", extracted_text
|
|
|
|
| 65 |
except Exception as e:
|
| 66 |
return None, f"β System Error: {str(e)}", ""
|
| 67 |
|
| 68 |
+
# --- Gradio UI Setup ---
|
| 69 |
+
with gr.Blocks(theme=gr.themes.Soft(), title="Gemini 3 Smart OCR") as demo:
|
| 70 |
+
gr.Markdown("# ποΈ AI Document Architect (Gemini 3)")
|
| 71 |
+
gr.Markdown("Convert messy handwriting or document scans into formatted Word files instantly.")
|
| 72 |
|
| 73 |
with gr.Row():
|
| 74 |
with gr.Column(scale=1):
|
| 75 |
+
input_image = gr.Image(label="Source Image", type="numpy")
|
| 76 |
+
submit_btn = gr.Button("π Convert to Word", variant="primary")
|
| 77 |
+
|
| 78 |
+
# --- Added Example Images ---
|
| 79 |
+
gr.Examples(
|
| 80 |
+
examples=["image1.jpg", "image2.jpg"],
|
| 81 |
+
inputs=input_image,
|
| 82 |
+
label="Sample Notes"
|
| 83 |
+
)
|
| 84 |
|
| 85 |
with gr.Column(scale=1):
|
| 86 |
+
status_msg = gr.Textbox(label="Status", interactive=False)
|
| 87 |
+
download_link = gr.File(label="π Download Word File")
|
| 88 |
|
| 89 |
+
with gr.Accordion("Review Extracted Text", open=False):
|
| 90 |
+
extracted_text = gr.TextArea(label="Text Preview", lines=12)
|
| 91 |
|
| 92 |
submit_btn.click(
|
| 93 |
fn=process_document,
|