import streamlit as st import torch import os from transformers import pipeline import fitz # PyMuPDF import docx from time import time # Configure logging import logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # ---------------------------- # SETUP & MODEL LOAD # ---------------------------- st.set_page_config(page_title="Fast QA App", layout="wide") st.title("🧠 Instant Question Answering") # Set cache directory cache_dir = os.path.join(os.getcwd(), "model_cache") os.makedirs(cache_dir, exist_ok=True) os.environ["TRANSFORMERS_CACHE"] = cache_dir # Load model with progress indicator @st.cache_resource(show_spinner="Loading AI model...") def load_qa_model(): logger.info(f"Loading model at {time()}") return pipeline( "question-answering", model="distilbert-base-uncased-distilled-squad", # Faster alternative device=0 if torch.cuda.is_available() else -1 ) qa_pipeline = load_qa_model() st.success("Model loaded successfully!") # ---------------------------- # TEXT EXTRACTION FUNCTIONS # ---------------------------- def extract_text_from_pdf(uploaded_file): with fitz.open(stream=uploaded_file.read(), filetype="pdf") as doc: return " ".join(page.get_text() for page in doc) def extract_text_from_docx(uploaded_file): doc = docx.Document(uploaded_file) return "\n".join(para.text for para in doc.paragraphs if para.text) # ---------------------------- # STREAMLIT UI # ---------------------------- with st.form("qa_form"): st.subheader("📄 Document Input") uploaded_file = st.file_uploader("Upload PDF/DOCX", type=["pdf", "docx"]) manual_text = st.text_area("Or paste text here:", height=150) st.subheader("❓ Question Input") question = st.text_input("Enter your question:") submit_btn = st.form_submit_button("Get Answer") if submit_btn: context = "" if uploaded_file: file_type = uploaded_file.name.split(".")[-1].lower() if file_type == "pdf": context = extract_text_from_pdf(uploaded_file) elif file_type == "docx": context = extract_text_from_docx(uploaded_file) else: context = manual_text if not context: st.warning("Please provide either a document or text input") elif not question: st.warning("Please enter a question") else: with st.spinner("Analyzing content..."): try: result = qa_pipeline(question=question, context=context[:10000]) # Limit context length st.markdown(f"### ✅ Answer: {result['answer']}") st.progress(result["score"]) # Show confidence score st.caption(f"Confidence: {result['score']:.0%}") except Exception as e: st.error(f"Error processing request: {str(e)}") # ---------------------------- # ADVANCED SECTION # ---------------------------- with st.expander("⚙️ Advanced Options"): st.subheader("Model Information") st.code(f"Using: distilbert-base-uncased-distilled-squad") st.caption("Optimized for fast inference on limited resources")