Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from ocr_cpu import extract_text_got, extract_text_qwen, extract_text_llama, clean_extracted_text | |
| import json | |
| # Set up page layout and styling | |
| st.set_page_config(page_title="MultiModel OCR Fusion", layout="centered", page_icon="π") | |
| st.markdown( | |
| """ | |
| <style> | |
| .reportview-container { background: #f4f4f4; } | |
| .sidebar .sidebar-content { background: #e0e0e0; } | |
| h1 { color: #007BFF; } | |
| .upload-btn { background-color: #007BFF; color: white; padding: 10px; border-radius: 5px; text-align: center; } | |
| </style> | |
| """, unsafe_allow_html=True | |
| ) | |
| # --- Title Section --- | |
| st.title("π MultiModel OCR Fusion") | |
| st.write("Upload an image to extract and clean text using multiple OCR models (GOT, Qwen, LLaMA).") | |
| # --- Image Upload Section --- | |
| uploaded_file = st.file_uploader("Upload an image file", type=["jpg", "jpeg", "png"]) | |
| # Model selection | |
| st.sidebar.title("Model Selection") | |
| model_choice = st.sidebar.selectbox("Choose OCR Model", ("GOT", "Qwen", "LLaMA")) | |
| if uploaded_file is not None: | |
| st.image(uploaded_file, caption='Uploaded Image', use_column_width=True) | |
| # Extract text from the image based on selected model | |
| with st.spinner(f"Extracting text using the {model_choice} model..."): | |
| try: | |
| if model_choice == "GOT": | |
| extracted_text = extract_text_got(uploaded_file) | |
| elif model_choice == "Qwen": | |
| extracted_text = extract_text_qwen(uploaded_file) | |
| elif model_choice == "LLaMA": | |
| extracted_text = extract_text_llama(uploaded_file) | |
| # If no text extracted | |
| if not extracted_text.strip(): | |
| st.warning(f"No text extracted using {model_choice}.") | |
| else: | |
| # Clean the extracted text | |
| cleaned_text = clean_extracted_text(extracted_text) | |
| except Exception as e: | |
| st.error(f"Error during text extraction: {str(e)}") | |
| extracted_text, cleaned_text = "", "" | |
| # --- Display Extracted and Cleaned Text --- | |
| st.subheader(f"Extracted Text using {model_choice}") | |
| st.text_area(f"Raw Text ({model_choice})", extracted_text, height=200) | |
| st.subheader("Cleaned Text (AI-processed)") | |
| st.text_area("Cleaned Text", cleaned_text, height=200) | |
| # Save extracted text for further use | |
| if extracted_text: | |
| with open("extracted_text.json", "w") as json_file: | |
| json.dump({"text": extracted_text}, json_file) | |
| # --- Keyword Search --- | |
| st.subheader("Search for Keywords") | |
| keyword = st.text_input("Enter a keyword to search in the extracted text") | |
| if keyword: | |
| if keyword.lower() in cleaned_text.lower(): | |
| st.success(f"Keyword **'{keyword}'** found in the cleaned text!") | |
| else: | |
| st.error(f"Keyword **'{keyword}'** not found.") | |