Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import google.generativeai as genai | |
| from PIL import Image | |
| # Streamlit UI | |
| st.title("📝 OCR with Gemini (Accurate Extraction & Refinement)") | |
| # API Key Input | |
| GEMINI_API_KEY = st.text_input("Enter your Gemini API Key:", type="password") | |
| if GEMINI_API_KEY: | |
| genai.configure(api_key=GEMINI_API_KEY) | |
| model = genai.GenerativeModel('gemini-2.5-pro-exp-03-25') | |
| # Upload image | |
| uploaded_file = st.file_uploader("Upload an image of handwritten text", type=["jpg", "jpeg", "png", "webp"]) | |
| if uploaded_file: | |
| image = Image.open(uploaded_file) | |
| st.image(image, caption="Uploaded Image", use_container_width=True) | |
| if st.button("Extract & Refine Text"): | |
| with st.spinner("Extracting text..."): | |
| try: | |
| # Initial Extraction | |
| prompt_initial = [ | |
| image, | |
| f"""You are an expert in extracting handwritten text from scanned student answer scripts. | |
| Your task is to accurately extract the handwritten content while ensuring fidelity to the original writing style and structure. | |
| Follow these strict guidelines: | |
| 1. **Accurate Extraction:** Extract the handwritten text exactly as it appears, preserving original spacing, punctuation, and line breaks. | |
| 2. **Handling Scratched-Out Text:** | |
| - If a word or phrase is visibly scratched out (e.g., crossed out using lines or scribbles): | |
| - Either omit it entirely from the extracted text **OR** | |
| - Retain it but apply strikethrough formatting using `~~word~~` (Markdown format). | |
| - Do not attempt to guess or reconstruct scratched-out words. | |
| Excluding Diagrams, Graphs, Tables, and Equations: | |
| - **Diagrams & Flowcharts:** Detect and exclude them, replacing them with '[Diagram Detected: This section contains a diagram or flowchart illustrating a concept or process.]'. Ensure that the surrounding text structure remains intact. | |
| - **Graphs:** Identify and omit graphs (e.g., bar charts, line graphs, scatter plots), replacing them with '[Graph Detected: This section contains a visual representation of data, such as trends, distributions, or comparisons.]'. Ensure that surrounding text remains structured and readable. | |
| - **Tables:** Do not extract table contents. Instead, insert '[Table Detected: A structured table with numerical or categorical data is present.]' in the extracted text to indicate omitted tabular data while maintaining text alignment. | |
| - **Equations & Expressions:** Omit standalone mathematical formulas while maintaining appropriate spacing. If a mathematical expression is detected, replace it with '[Equation Detected: A mathematical formula or expression is present.]'. | |
| Whenever a table, diagram, figure, or flowchart is encountered in the document, **explicitly mention its presence with a relevant description**, but do not attempt to extract or reproduce its contents. The extracted text should remain structured, and no part of a diagram, table, or equation should be reconstructed in any form. | |
| 4. **Preserving Annotations:** | |
| - If additional notes, comments, or margin annotations are present, extract them separately and label them as **"Annotations"**. | |
| 5. **No Grammar or Content Correction:** | |
| - Do not modify spelling mistakes, grammar, or factual content. Extract the text exactly as written, correcting only **recognition errors** (e.g., misidentified characters). | |
| 6. **Ensure High Accuracy:** | |
| - Cross-check extracted text to prevent common OCR errors, such as misreading '1' as 'l' or 'O' as '0'. | |
| """ | |
| ] | |
| response_initial = model.generate_content(prompt_initial) | |
| extracted_text = response_initial.text | |
| # Refinement Pass | |
| prompt_refine = [ | |
| image, | |
| f"""You are an expert in handwritten text recognition and refinement, specializing in ensuring the highest accuracy in extracted text from student answer scripts. | |
| Your task is to carefully compare the initially extracted text with the handwritten content in the provided image and make precise corrections to eliminate any OCR recognition errors. | |
| **Guidelines:** | |
| 1. **Exact Character Matching:** | |
| - Ensure that every character, symbol, and special notation (e.g., mathematical symbols like ∫, λ, π, Σ, ∂, Greek letters, and subscript/superscript text) is accurately extracted. | |
| - Pay close attention to case sensitivity, ensuring that uppercase and lowercase letters are correctly identified. | |
| 2. **Handling of Scratched-Out Text:** | |
| - If any word or phrase is scratched out, either: | |
| - Completely **omit it** from the refined text. | |
| - OR retain it with **strikethrough formatting** using `~~scratched text~~` (Markdown format). | |
| - Do **not** attempt to reconstruct or infer words that have been heavily scratched out beyond recognition. | |
| Excluding Diagrams, Graphs, Tables, and Equations: | |
| - **Diagrams & Flowcharts:** Detect and exclude them, replacing them with '[Diagram Detected: This section contains a diagram or flowchart illustrating a concept or process.]'. Ensure that the surrounding text structure remains intact. | |
| - **Graphs:** Identify and omit graphs (e.g., bar charts, line graphs, scatter plots), replacing them with '[Graph Detected: This section contains a visual representation of data, such as trends, distributions, or comparisons.]'. Ensure that surrounding text remains structured and readable. | |
| - **Tables:** Do not extract table contents. Instead, insert '[Table Detected: A structured table with numerical or categorical data is present.]' in the extracted text to indicate omitted tabular data while maintaining text alignment. | |
| - **Equations & Expressions:** Omit standalone mathematical formulas while maintaining appropriate spacing. If a mathematical expression is detected, replace it with '[Equation Detected: A mathematical formula or expression is present.]'. | |
| Whenever a table, diagram, figure, or flowchart is encountered in the document, **explicitly mention its presence with a relevant description**, but do not attempt to extract or reproduce its contents. The extracted text should remain structured, and no part of a diagram, table, or equation should be reconstructed in any form. | |
| 4. **Preserving Formatting & Spacing:** | |
| - Maintain the **exact structure** of the handwritten text, including spacing, punctuation, and line breaks. | |
| - If words are separated by unusual spacing, preserve that spacing in the extracted text. | |
| 5. **Correction of OCR Recognition Errors:** | |
| - Identify and fix **common OCR mistakes**, such as: | |
| - Misinterpreting **numbers and letters** (e.g., ‘1’ vs. ‘l’, ‘0’ vs. ‘O’). | |
| - Incorrectly recognizing **special characters** (e.g., ‘∑’ mistaken as ‘E’). | |
| - Missing **accents or diacritics** (e.g., é, ü, ñ). | |
| - If a character is ambiguous, cross-check it against the handwriting to determine the most likely correct representation. | |
| 6. **No Grammar or Content Modification:** | |
| - Do **not** alter spelling, grammar, or factual content. | |
| - Only correct recognition errors—do not "fix" perceived mistakes made by the writer. | |
| 7. **Final Validation:** | |
| - Before finalizing the output, perform a second verification pass to ensure that **every** character exactly matches the handwritten content in the image. | |
| **Final Output Format:** | |
| - The **refined text** should be provided in **plain text or Markdown format**, ensuring readability while preserving structure and accuracy. | |
| - Clearly indicate any corrections made compared to the initially extracted text. | |
| """ | |
| ] | |
| response_refine = model.generate_content(prompt_refine) | |
| refined_text = response_refine.text | |
| flagged_elements = [] | |
| keywords = { | |
| "Diagrams": ["[Diagram Detected]", "diagram", "flowchart"], | |
| "Graphs": ["[Graph Detected]", "graph", "chart"], | |
| "Tables": ["[Table Detected]", "table"], | |
| "Equations": ["[Equation Detected]", "equation", "formula"] | |
| } | |
| for key, words in keywords.items(): | |
| if any(word.lower() in refined_text.lower() for word in words): | |
| flagged_elements.append(key) | |
| # Display results | |
| st.subheader("✅ Final Extracted Text:") | |
| st.markdown(refined_text, unsafe_allow_html=True) | |
| st.code(refined_text, language="text") | |
| # Display warning if flagged elements were found | |
| if flagged_elements: | |
| st.warning(f"⚠️ The extracted text contains: {', '.join(flagged_elements)}. Please review these sections manually.") | |
| # Allow Download | |
| st.download_button("⬇️ Download Markdown", refined_text, file_name="extracted_text.md", mime="text/markdown") | |
| except Exception as e: | |
| st.error(f"Error: {e}") |