Spaces:

amritn8
/

final_v1

Sleeping

App Files Files Community

final_v1 / app.py

amritn8

Update app.py

e40bfd2 verified 8 months ago

raw

history blame contribute delete

3.15 kB

	import streamlit as st
	import torch
	import os
	from transformers import pipeline
	import fitz # PyMuPDF
	import docx
	from time import time

	# Configure logging
	import logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	# ----------------------------
	# SETUP & MODEL LOAD
	# ----------------------------
	st.set_page_config(page_title="Fast QA App", layout="wide")
	st.title("🧠 Instant Question Answering")

	# Set cache directory
	cache_dir = os.path.join(os.getcwd(), "model_cache")
	os.makedirs(cache_dir, exist_ok=True)
	os.environ["TRANSFORMERS_CACHE"] = cache_dir

	# Load model with progress indicator
	@st.cache_resource(show_spinner="Loading AI model...")
	def load_qa_model():
	logger.info(f"Loading model at {time()}")
	return pipeline(
	"question-answering",
	model="distilbert-base-uncased-distilled-squad", # Faster alternative
	device=0 if torch.cuda.is_available() else -1
	)

	qa_pipeline = load_qa_model()
	st.success("Model loaded successfully!")

	# ----------------------------
	# TEXT EXTRACTION FUNCTIONS
	# ----------------------------
	def extract_text_from_pdf(uploaded_file):
	with fitz.open(stream=uploaded_file.read(), filetype="pdf") as doc:
	return " ".join(page.get_text() for page in doc)

	def extract_text_from_docx(uploaded_file):
	doc = docx.Document(uploaded_file)
	return "\n".join(para.text for para in doc.paragraphs if para.text)

	# ----------------------------
	# STREAMLIT UI
	# ----------------------------
	with st.form("qa_form"):
	st.subheader("📄 Document Input")
	uploaded_file = st.file_uploader("Upload PDF/DOCX", type=["pdf", "docx"])
	manual_text = st.text_area("Or paste text here:", height=150)

	st.subheader("❓ Question Input")
	question = st.text_input("Enter your question:")
	submit_btn = st.form_submit_button("Get Answer")

	if submit_btn:
	context = ""
	if uploaded_file:
	file_type = uploaded_file.name.split(".")[-1].lower()
	if file_type == "pdf":
	context = extract_text_from_pdf(uploaded_file)
	elif file_type == "docx":
	context = extract_text_from_docx(uploaded_file)
	else:
	context = manual_text

	if not context:
	st.warning("Please provide either a document or text input")
	elif not question:
	st.warning("Please enter a question")
	else:
	with st.spinner("Analyzing content..."):
	try:
	result = qa_pipeline(question=question, context=context[:10000]) # Limit context length
	st.markdown(f"### ✅ Answer: {result['answer']}")
	st.progress(result["score"]) # Show confidence score
	st.caption(f"Confidence: {result['score']:.0%}")
	except Exception as e:
	st.error(f"Error processing request: {str(e)}")

	# ----------------------------
	# ADVANCED SECTION
	# ----------------------------
	with st.expander("⚙️ Advanced Options"):
	st.subheader("Model Information")
	st.code(f"Using: distilbert-base-uncased-distilled-squad")
	st.caption("Optimized for fast inference on limited resources")