Spaces:

agnixcode
/

pitp_project_text_extract_from_image

Sleeping

pitp_project_text_extract_from_image / app.py

Dua Rajper

Create app.py

e9ed06c verified about 1 year ago

2.88 kB

	import streamlit as st
	from PIL import Image
	import torch
	import easyocr
	import numpy as np
	import openai # Using OpenAI GPT (or replace with GROQ API)
	import io
	from transformers import CLIPModel, CLIPImageProcessor

	# ✅ Fix: set_page_config() must be the first Streamlit command
	st.set_page_config(page_title="Multimodal AI Assistant", layout="wide")

	# ---- Load CLIP Model (Vision Only) ---- #
	@st.cache_resource
	def load_clip_model():
	model = CLIPModel.from_pretrained(
	"fxmarty/clip-vision-model-tiny",
	ignore_mismatched_sizes=True # ✅ Fix size mismatch
	)
	processor = CLIPImageProcessor.from_pretrained("fxmarty/clip-vision-model-tiny")
	return model, processor

	model, processor = load_clip_model()

	# ---- Load OCR (EasyOCR) ---- #
	@st.cache_resource
	def load_ocr():
	return easyocr.Reader(['en'])

	reader = load_ocr()

	# ---- Streamlit UI ---- #
	st.title("🖼️ Multimodal AI Assistant")
	st.write("Upload an image, extract text, and ask questions!")

	# ---- Upload Image ---- #
	uploaded_file = st.file_uploader("📤 Upload an image", type=["jpg", "png", "jpeg"])

	extracted_text = None # Variable to store extracted text

	if uploaded_file is not None:
	# Convert file to image format
	image = Image.open(uploaded_file).convert("RGB")

	# ✅ Fix: use `use_container_width` instead of `use_column_width`
	st.image(image, caption="Uploaded Image", use_container_width=True)

	# ✅ Convert PIL image to NumPy array for EasyOCR
	image_np = np.array(image)

	# ✅ Fix: Pass the correct format to EasyOCR
	with st.spinner("🔍 Extracting text from image..."):
	extracted_text_list = reader.readtext(image_np, detail=0)

	extracted_text = " ".join(extracted_text_list) # Combine extracted text

	st.write("### 📝 Extracted Text:")
	if extracted_text:
	st.success(extracted_text)
	else:
	st.warning("No readable text found in the image.")

	# ---- Question Answering Section ---- #
	if extracted_text:
	user_question = st.text_input("💡 Ask a question about the extracted text:")

	if user_question:
	with st.spinner("🤖 Thinking..."):
	# Using OpenAI GPT API (replace with GROQ or Hugging Face LLM if needed)
	openai.api_key = "YOUR_OPENAI_API_KEY" # Store securely in a .env file

	response = openai.ChatCompletion.create(
	model="gpt-3.5-turbo",
	messages=[
	{"role": "system", "content": "You are an AI assistant helping answer questions based on extracted text from an image."},
	{"role": "user", "content": f"Extracted text: {extracted_text}\n\nQuestion: {user_question}"}
	]
	)

	answer = response["choices"][0]["message"]["content"]

	st.write("### 🤖 AI Answer:")
	st.success(answer)