Spaces:

UniquePratham
/

DualTextOCRFusion

Sleeping

App Files Files Community

DualTextOCRFusion / app.py

UniquePratham

Update app.py

a0652de verified about 1 year ago

raw

history blame

2.86 kB

	import streamlit as st
	from ocr_cpu import extract_text_got, extract_text_qwen, extract_text_llama, clean_extracted_text
	import json

	# Set up page layout and styling
	st.set_page_config(page_title="MultiModel OCR Fusion", layout="centered", page_icon="📄")

	st.markdown(
	"""
	<style>
	.reportview-container { background: #f4f4f4; }
	.sidebar .sidebar-content { background: #e0e0e0; }
	h1 { color: #007BFF; }
	.upload-btn { background-color: #007BFF; color: white; padding: 10px; border-radius: 5px; text-align: center; }
	</style>
	""", unsafe_allow_html=True
	)

	# --- Title Section ---
	st.title("📄 MultiModel OCR Fusion")
	st.write("Upload an image to extract and clean text using multiple OCR models (GOT, Qwen, LLaMA).")

	# --- Image Upload Section ---
	uploaded_file = st.file_uploader("Upload an image file", type=["jpg", "jpeg", "png"])

	# Model selection
	st.sidebar.title("Model Selection")
	model_choice = st.sidebar.selectbox("Choose OCR Model", ("GOT", "Qwen", "LLaMA"))

	if uploaded_file is not None:
	st.image(uploaded_file, caption='Uploaded Image', use_column_width=True)

	# Extract text from the image based on selected model
	with st.spinner(f"Extracting text using the {model_choice} model..."):
	try:
	if model_choice == "GOT":
	extracted_text = extract_text_got(uploaded_file)
	elif model_choice == "Qwen":
	extracted_text = extract_text_qwen(uploaded_file)
	elif model_choice == "LLaMA":
	extracted_text = extract_text_llama(uploaded_file)

	# If no text extracted
	if not extracted_text.strip():
	st.warning(f"No text extracted using {model_choice}.")
	else:
	# Clean the extracted text
	cleaned_text = clean_extracted_text(extracted_text)
	except Exception as e:
	st.error(f"Error during text extraction: {str(e)}")
	extracted_text, cleaned_text = "", ""

	# --- Display Extracted and Cleaned Text ---
	st.subheader(f"Extracted Text using {model_choice}")
	st.text_area(f"Raw Text ({model_choice})", extracted_text, height=200)

	st.subheader("Cleaned Text (AI-processed)")
	st.text_area("Cleaned Text", cleaned_text, height=200)

	# Save extracted text for further use
	if extracted_text:
	with open("extracted_text.json", "w") as json_file:
	json.dump({"text": extracted_text}, json_file)

	# --- Keyword Search ---
	st.subheader("Search for Keywords")
	keyword = st.text_input("Enter a keyword to search in the extracted text")

	if keyword:
	if keyword.lower() in cleaned_text.lower():
	st.success(f"Keyword '{keyword}' found in the cleaned text!")
	else:
	st.error(f"Keyword '{keyword}' not found.")