|
|
import streamlit as st |
|
|
import torch |
|
|
import os |
|
|
from transformers import pipeline |
|
|
import fitz |
|
|
import docx |
|
|
from time import time |
|
|
|
|
|
|
|
|
import logging |
|
|
logging.basicConfig(level=logging.INFO) |
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.set_page_config(page_title="Fast QA App", layout="wide") |
|
|
st.title("π§ Instant Question Answering") |
|
|
|
|
|
|
|
|
cache_dir = os.path.join(os.getcwd(), "model_cache") |
|
|
os.makedirs(cache_dir, exist_ok=True) |
|
|
os.environ["TRANSFORMERS_CACHE"] = cache_dir |
|
|
|
|
|
|
|
|
@st.cache_resource(show_spinner="Loading AI model...") |
|
|
def load_qa_model(): |
|
|
logger.info(f"Loading model at {time()}") |
|
|
return pipeline( |
|
|
"question-answering", |
|
|
model="distilbert-base-uncased-distilled-squad", |
|
|
device=0 if torch.cuda.is_available() else -1 |
|
|
) |
|
|
|
|
|
qa_pipeline = load_qa_model() |
|
|
st.success("Model loaded successfully!") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def extract_text_from_pdf(uploaded_file): |
|
|
with fitz.open(stream=uploaded_file.read(), filetype="pdf") as doc: |
|
|
return " ".join(page.get_text() for page in doc) |
|
|
|
|
|
def extract_text_from_docx(uploaded_file): |
|
|
doc = docx.Document(uploaded_file) |
|
|
return "\n".join(para.text for para in doc.paragraphs if para.text) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with st.form("qa_form"): |
|
|
st.subheader("π Document Input") |
|
|
uploaded_file = st.file_uploader("Upload PDF/DOCX", type=["pdf", "docx"]) |
|
|
manual_text = st.text_area("Or paste text here:", height=150) |
|
|
|
|
|
st.subheader("β Question Input") |
|
|
question = st.text_input("Enter your question:") |
|
|
submit_btn = st.form_submit_button("Get Answer") |
|
|
|
|
|
if submit_btn: |
|
|
context = "" |
|
|
if uploaded_file: |
|
|
file_type = uploaded_file.name.split(".")[-1].lower() |
|
|
if file_type == "pdf": |
|
|
context = extract_text_from_pdf(uploaded_file) |
|
|
elif file_type == "docx": |
|
|
context = extract_text_from_docx(uploaded_file) |
|
|
else: |
|
|
context = manual_text |
|
|
|
|
|
if not context: |
|
|
st.warning("Please provide either a document or text input") |
|
|
elif not question: |
|
|
st.warning("Please enter a question") |
|
|
else: |
|
|
with st.spinner("Analyzing content..."): |
|
|
try: |
|
|
result = qa_pipeline(question=question, context=context[:10000]) |
|
|
st.markdown(f"### β
Answer: {result['answer']}") |
|
|
st.progress(result["score"]) |
|
|
st.caption(f"Confidence: {result['score']:.0%}") |
|
|
except Exception as e: |
|
|
st.error(f"Error processing request: {str(e)}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with st.expander("βοΈ Advanced Options"): |
|
|
st.subheader("Model Information") |
|
|
st.code(f"Using: distilbert-base-uncased-distilled-squad") |
|
|
st.caption("Optimized for fast inference on limited resources") |