Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import torch | |
| import torchvision.transforms as transforms | |
| from PIL import Image | |
| from pillow_heif import register_heif_opener | |
| import numpy as np | |
| import os | |
| from io import BytesIO | |
| from googletrans import Translator, LANGUAGES | |
| from gtts import gTTS | |
| # Register HEIC support for PIL | |
| register_heif_opener() | |
| from streamlit_cropper import st_cropper | |
| import easyocr | |
| st.set_page_config(page_title="INK VISION", page_icon="β¨", layout="wide") | |
| # Custom CSS for the stunning animated background and glassmorphic UI | |
| st.markdown(""" | |
| <style> | |
| @import url('https://fonts.googleapis.com/css2?family=Orbitron:wght@500;700&family=Poppins:wght@300;400;600&display=swap'); | |
| /* Animated Gradient Background */ | |
| .stApp { | |
| background: linear-gradient(-45deg, #ee7752, #e73c7e, #23a6d5, #23d5ab); | |
| background-size: 400% 400%; | |
| animation: gradientBG 15s ease infinite; | |
| font-family: 'Poppins', sans-serif; | |
| } | |
| @keyframes gradientBG { | |
| 0% { background-position: 0% 50%; } | |
| 50% { background-position: 100% 50%; } | |
| 100% { background-position: 0% 50%; } | |
| } | |
| /* Base text color to white for contrast against dark/bright backgrounds */ | |
| h1, h2, h3, p, label { | |
| color: #ffffff !important; | |
| text-shadow: 1px 1px 4px rgba(0,0,0,0.4); | |
| } | |
| /* Glassmorphism wrapper for header */ | |
| .glass-container { | |
| background: rgba(255, 255, 255, 0.1); | |
| border-radius: 16px; | |
| box-shadow: 0 8px 32px 0 rgba(31, 38, 135, 0.37); | |
| backdrop-filter: blur(8.5px); | |
| -webkit-backdrop-filter: blur(8.5px); | |
| border: 1px solid rgba(255, 255, 255, 0.18); | |
| padding: 2rem; | |
| margin-top: 1rem; | |
| margin-bottom: 2rem; | |
| } | |
| /* Fancy Header Font */ | |
| h1 { | |
| font-family: 'Orbitron', sans-serif !important; | |
| font-size: 3rem !important; | |
| text-align: center; | |
| background: -webkit-linear-gradient(#fff, #f0f0f0); | |
| -webkit-background-clip: text; | |
| -webkit-text-fill-color: transparent; | |
| margin-bottom: 0.5rem; | |
| } | |
| /* Stylish buttons */ | |
| div.stButton > button:first-child { | |
| background: linear-gradient(90deg, #ff007f 0%, #7928ca 100%); | |
| color: white; | |
| border: none; | |
| border-radius: 50px; | |
| padding: 10px 24px; | |
| font-weight: 600; | |
| font-size: 1.1rem; | |
| cursor: pointer; | |
| transition: all 0.3s ease; | |
| box-shadow: 0 4px 15px rgba(0,0,0,0.2); | |
| } | |
| div.stButton > button:first-child:hover { | |
| transform: translateY(-2px); | |
| box-shadow: 0 6px 20px rgba(0,0,0,0.3); | |
| background: linear-gradient(90deg, #7928ca 0%, #ff007f 100%); | |
| color: #ffffff !important; | |
| } | |
| /* File Uploader styling */ | |
| .stFileUploader > div > div { | |
| background: rgba(255, 255, 255, 0.05); | |
| border: 2px dashed rgba(255, 255, 255, 0.5); | |
| border-radius: 10px; | |
| } | |
| /* Text area styling */ | |
| .stTextArea textarea { | |
| background-color: rgba(255, 255, 255, 0.9) !important; | |
| color: #333333 !important; | |
| font-size: 1.5rem !important; | |
| font-weight: 600 !important; | |
| font-family: 'Poppins', sans-serif !important; | |
| border-radius: 10px !important; | |
| border: 2px solid transparent !important; | |
| } | |
| .stTextArea textarea:focus { | |
| border-color: #ff007f !important; | |
| box-shadow: 0 0 10px rgba(255,0,127,0.5) !important; | |
| } | |
| </style> | |
| <div class="glass-container"> | |
| <h1>β¨ Handwritten Text Recognition System for Document Digitalization β¨</h1> | |
| <p style="text-align: center; font-size: 1.2rem;">..H..T..R..</p> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| from pipeline.preprocessor import DocumentPreprocessor | |
| from pipeline.ocr_engine import HTREngine | |
| from pipeline.postprocessor import NLPCorrector | |
| # Initialise translator once | |
| translator = Translator() | |
| # Simple helpers for state | |
| if "extracted_text" not in st.session_state: | |
| st.session_state["extracted_text"] = "" | |
| if "translated_text" not in st.session_state: | |
| st.session_state["translated_text"] = "" | |
| if "target_lang" not in st.session_state: | |
| st.session_state["target_lang"] = "en" | |
| def load_pipeline(): | |
| p = DocumentPreprocessor() | |
| e = HTREngine(languages=['en']) | |
| n = NLPCorrector(use_ml=True) | |
| return p, e, n | |
| preprocessor, engine, nlp_corrector = load_pipeline() | |
| col1, col2 = st.columns(2) | |
| target_image = None | |
| with col1: | |
| st.markdown("### πΈ Please Upload an Image") | |
| input_method = st.radio("Choose Input Method", ["Upload Image", "Take a Photo"], horizontal=True) | |
| if input_method == "Upload Image": | |
| uploaded_file = st.file_uploader("Upload a handwritten word image", type=["png", "jpg", "jpeg", "heic", "webp"]) | |
| if uploaded_file is not None: | |
| raw_image = Image.open(uploaded_file).convert("RGB") | |
| # Resize image to a standard width so both cropper and st.image match in size | |
| target_width = 700 | |
| if raw_image.width != target_width: | |
| ratio = target_width / float(raw_image.width) | |
| raw_image = raw_image.resize((target_width, int(raw_image.height * ratio))) | |
| if st.checkbox("β¨ Crop Image", key="crop_upload"): | |
| st.markdown("β¨ **Crop the word below:**") | |
| target_image = st_cropper(raw_image, realtime_update=True, box_color='#ff007f', key="upload_crop") | |
| else: | |
| target_image = raw_image | |
| st.image(target_image, caption="Uploaded Image") | |
| else: | |
| camera_photo = st.camera_input("Take a picture of a handwritten word") | |
| if camera_photo is not None: | |
| raw_image = Image.open(camera_photo).convert("RGB") | |
| # Resize image to a standard width so both cropper and st.image match in size | |
| target_width = 700 | |
| if raw_image.width != target_width: | |
| ratio = target_width / float(raw_image.width) | |
| raw_image = raw_image.resize((target_width, int(raw_image.height * ratio))) | |
| if st.checkbox("β¨ Crop Image", key="crop_camera"): | |
| st.markdown("β¨ **Crop the word below:**") | |
| target_image = st_cropper(raw_image, realtime_update=True, box_color='#ff007f', key="camera_crop") | |
| else: | |
| target_image = raw_image | |
| st.image(target_image, caption="Captured Image") | |
| with col2: | |
| st.markdown("### πͺ Magic Result") | |
| extracted_text = st.session_state.get("extracted_text", "") | |
| translated_text = st.session_state.get("translated_text", "") | |
| if target_image is not None: | |
| if st.button("β¨ Extract Text"): | |
| with st.spinner("Please wait while extracting"): | |
| if engine is None: | |
| st.error("Pipeline failed to initialize.") | |
| else: | |
| # --- STREAM A: RAW OCR (No Preprocessing) --- | |
| try: | |
| raw_ocr_output = engine.extract_text(np.array(target_image)) | |
| raw_stream_text = nlp_corrector.correct_spelling(raw_ocr_output) | |
| except Exception: | |
| raw_stream_text = "" | |
| # --- STREAM B: 3-STEP PIPELINE (Pre-Processed) --- | |
| try: | |
| # 1. Computer Vision Pre-Processing | |
| cleaned_image_array = preprocessor.process(target_image) | |
| # 2. Deep Learning OCR Engine | |
| p_ocr_output = engine.extract_text(cleaned_image_array) | |
| # 3. NLP Post-Processing | |
| clean_stream_text = nlp_corrector.correct_spelling(p_ocr_output) | |
| except Exception: | |
| clean_stream_text = "" | |
| # --- THE ENSEMBLE JUDGE --- | |
| # The judge picks the version that sounds most like real English | |
| extracted_text = nlp_corrector.judge_best_output(raw_stream_text, clean_stream_text) | |
| if extracted_text.strip() == "": | |
| st.warning("Oops! I couldn't find any text. Try a clearer image.") | |
| extracted_text = "" | |
| else: | |
| st.success("Ensemble Magic! Winner selected from Dual-Stream analysis.") | |
| with st.expander(""): | |
| st.write(f"**Stream A (Raw Image):** {raw_stream_text}") | |
| st.write(f"**Stream B (Cleaned Image):** {clean_stream_text}") | |
| st.session_state["extracted_text"] = extracted_text | |
| st.session_state["translated_text"] = "" | |
| # Editable original text | |
| st.session_state["extracted_text"] = st.text_area( | |
| "You can edit the result here:", | |
| value=st.session_state.get("extracted_text", ""), | |
| height=150, | |
| ) | |
| st.markdown("### π Translation & Voice") | |
| # Language selection | |
| lang_keys = sorted(LANGUAGES.keys()) | |
| default_index = lang_keys.index(st.session_state.get("target_lang", "en")) | |
| target_lang = st.selectbox( | |
| "Choose target language", | |
| options=lang_keys, | |
| index=default_index, | |
| format_func=lambda k: LANGUAGES[k].title(), | |
| ) | |
| st.session_state["target_lang"] = target_lang | |
| with st.expander("Show available languages"): | |
| st.write(", ".join(f"{code} β {name.title()}" for code, name in LANGUAGES.items())) | |
| col_translate, col_speak = st.columns(2) | |
| with col_translate: | |
| if st.button("π Translate into other language"): | |
| if st.session_state["extracted_text"].strip(): | |
| try: | |
| result = translator.translate( | |
| st.session_state["extracted_text"], | |
| dest=target_lang, | |
| ) | |
| st.session_state["translated_text"] = result.text | |
| except Exception as e: | |
| st.error(f"Translation failed: {e}") | |
| else: | |
| st.warning("Please extract or type some text first.") | |
| with col_speak: | |
| if st.button("π Speak text (original & translated)"): | |
| original = st.session_state.get("extracted_text", "").strip() | |
| translated = st.session_state.get("translated_text", "").strip() | |
| if not original and not translated: | |
| st.warning("Nothing to speak. Please extract or translate text first.") | |
| else: | |
| # Speak original (English assumed) | |
| if original: | |
| try: | |
| buf = BytesIO() | |
| gTTS(text=original, lang="en").write_to_fp(buf) | |
| buf.seek(0) | |
| st.audio(buf.read(), format="audio/mp3") | |
| except Exception as e: | |
| st.error(f"Failed to generate audio for original text: {e}") | |
| # Speak translated | |
| if translated: | |
| try: | |
| buf_tr = BytesIO() | |
| gTTS(text=translated, lang=target_lang).write_to_fp(buf_tr) | |
| buf_tr.seek(0) | |
| st.audio(buf_tr.read(), format="audio/mp3") | |
| except Exception as e: | |
| st.error(f"Failed to generate audio for translated text: {e}") | |
| if st.session_state.get("translated_text", "").strip(): | |
| st.text_area( | |
| "Translated text:", | |
| value=st.session_state["translated_text"], | |
| height=150, | |
| ) | |
| else: | |
| st.info("Waiting for an image to work my magic...") | |