import streamlit as st
import torch
import os
from transformers import pipeline
import fitz  # PyMuPDF
import docx
from time import time

# Configure logging
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# ----------------------------
# SETUP & MODEL LOAD
# ----------------------------
st.set_page_config(page_title="Fast QA App", layout="wide")
st.title("🧠 Instant Question Answering")

# Set cache directory
cache_dir = os.path.join(os.getcwd(), "model_cache")
os.makedirs(cache_dir, exist_ok=True)
os.environ["TRANSFORMERS_CACHE"] = cache_dir

# Load model with progress indicator
@st.cache_resource(show_spinner="Loading AI model...")
def load_qa_model():
    logger.info(f"Loading model at {time()}")
    return pipeline(
        "question-answering",
        model="distilbert-base-uncased-distilled-squad",  # Faster alternative
        device=0 if torch.cuda.is_available() else -1
    )

qa_pipeline = load_qa_model()
st.success("Model loaded successfully!")

# ----------------------------
# TEXT EXTRACTION FUNCTIONS
# ----------------------------
def extract_text_from_pdf(uploaded_file):
    with fitz.open(stream=uploaded_file.read(), filetype="pdf") as doc:
        return " ".join(page.get_text() for page in doc)

def extract_text_from_docx(uploaded_file):
    doc = docx.Document(uploaded_file)
    return "\n".join(para.text for para in doc.paragraphs if para.text)

# ----------------------------
# STREAMLIT UI
# ----------------------------
with st.form("qa_form"):
    st.subheader("📄 Document Input")
    uploaded_file = st.file_uploader("Upload PDF/DOCX", type=["pdf", "docx"])
    manual_text = st.text_area("Or paste text here:", height=150)

    st.subheader("❓ Question Input")
    question = st.text_input("Enter your question:")
    submit_btn = st.form_submit_button("Get Answer")

if submit_btn:
    context = ""
    if uploaded_file:
        file_type = uploaded_file.name.split(".")[-1].lower()
        if file_type == "pdf":
            context = extract_text_from_pdf(uploaded_file)
        elif file_type == "docx":
            context = extract_text_from_docx(uploaded_file)
    else:
        context = manual_text

    if not context:
        st.warning("Please provide either a document or text input")
    elif not question:
        st.warning("Please enter a question")
    else:
        with st.spinner("Analyzing content..."):
            try:
                result = qa_pipeline(question=question, context=context[:10000])  # Limit context length
                st.markdown(f"### ✅ Answer: {result['answer']}")
                st.progress(result["score"])  # Show confidence score
                st.caption(f"Confidence: {result['score']:.0%}")
            except Exception as e:
                st.error(f"Error processing request: {str(e)}")

# ----------------------------
# ADVANCED SECTION
# ----------------------------
with st.expander("⚙️ Advanced Options"):
    st.subheader("Model Information")
    st.code(f"Using: distilbert-base-uncased-distilled-squad")
    st.caption("Optimized for fast inference on limited resources")