final_v1 / app.py
amritn8's picture
Update app.py
e40bfd2 verified
import streamlit as st
import torch
import os
from transformers import pipeline
import fitz # PyMuPDF
import docx
from time import time
# Configure logging
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# ----------------------------
# SETUP & MODEL LOAD
# ----------------------------
st.set_page_config(page_title="Fast QA App", layout="wide")
st.title("🧠 Instant Question Answering")
# Set cache directory
cache_dir = os.path.join(os.getcwd(), "model_cache")
os.makedirs(cache_dir, exist_ok=True)
os.environ["TRANSFORMERS_CACHE"] = cache_dir
# Load model with progress indicator
@st.cache_resource(show_spinner="Loading AI model...")
def load_qa_model():
logger.info(f"Loading model at {time()}")
return pipeline(
"question-answering",
model="distilbert-base-uncased-distilled-squad", # Faster alternative
device=0 if torch.cuda.is_available() else -1
)
qa_pipeline = load_qa_model()
st.success("Model loaded successfully!")
# ----------------------------
# TEXT EXTRACTION FUNCTIONS
# ----------------------------
def extract_text_from_pdf(uploaded_file):
with fitz.open(stream=uploaded_file.read(), filetype="pdf") as doc:
return " ".join(page.get_text() for page in doc)
def extract_text_from_docx(uploaded_file):
doc = docx.Document(uploaded_file)
return "\n".join(para.text for para in doc.paragraphs if para.text)
# ----------------------------
# STREAMLIT UI
# ----------------------------
with st.form("qa_form"):
st.subheader("πŸ“„ Document Input")
uploaded_file = st.file_uploader("Upload PDF/DOCX", type=["pdf", "docx"])
manual_text = st.text_area("Or paste text here:", height=150)
st.subheader("❓ Question Input")
question = st.text_input("Enter your question:")
submit_btn = st.form_submit_button("Get Answer")
if submit_btn:
context = ""
if uploaded_file:
file_type = uploaded_file.name.split(".")[-1].lower()
if file_type == "pdf":
context = extract_text_from_pdf(uploaded_file)
elif file_type == "docx":
context = extract_text_from_docx(uploaded_file)
else:
context = manual_text
if not context:
st.warning("Please provide either a document or text input")
elif not question:
st.warning("Please enter a question")
else:
with st.spinner("Analyzing content..."):
try:
result = qa_pipeline(question=question, context=context[:10000]) # Limit context length
st.markdown(f"### βœ… Answer: {result['answer']}")
st.progress(result["score"]) # Show confidence score
st.caption(f"Confidence: {result['score']:.0%}")
except Exception as e:
st.error(f"Error processing request: {str(e)}")
# ----------------------------
# ADVANCED SECTION
# ----------------------------
with st.expander("βš™οΈ Advanced Options"):
st.subheader("Model Information")
st.code(f"Using: distilbert-base-uncased-distilled-squad")
st.caption("Optimized for fast inference on limited resources")