import os import streamlit as st import numpy as np from sentence_transformers import SentenceTransformer from ingest import build_index from rag import retrieve from groq import Groq from config import EMBEDDING_MODEL from pdf_utils import answer_to_pdf # ---------------- CONFIG ---------------- st.set_page_config(page_title="OTT Bot", layout="wide") # ---------------- LOAD MODELS ---------------- @st.cache_resource def load_embedder(): return SentenceTransformer(EMBEDDING_MODEL) embedder = load_embedder() # ---------------- GROQ CLIENT ---------------- GROQ_API_KEY = os.environ.get("GROQ_API_KEY") if not GROQ_API_KEY: st.error("❌ GROQ_API_KEY not set in HF Secrets") st.stop() client = Groq(api_key=GROQ_API_KEY) # ---------------- UI ---------------- st.title("📚 OTT Bot") st.markdown("Upload PDFs via HF Dataset and query them using AI") # ---------------- INGESTION ---------------- if st.button("🔨 Build Index"): with st.spinner("Building FAISS index from dataset..."): build_index() st.success("✅ Index built successfully") st.divider() # ---------------- QUESTION INPUT ---------------- question = st.text_input( "Ask a question about the document", placeholder="e.g. What is the main concept discussed?" ) # ---------------- ANSWER ---------------- if question: with st.spinner("Searching document..."): query_embedding = embedder.encode([question]).astype("float32") contexts = retrieve(query_embedding) context_text = "\n\n".join(c["text"] for c in contexts) prompt = f""" You are an expert tutor. Using ONLY the information in the context below, provide a detailed and well-structured answer. Guidelines: - Explain step by step - Use headings or bullet points where useful - Give examples - Do NOT keep the answer short - If information is missing, say so clearly Context: {context_text} Question: {question} Answer clearly and concisely. """ with st.spinner("Thinking..."): response = client.chat.completions.create( model="llama-3.1-8b-instant", messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": prompt}, ], ) answer_text = response.choices[0].message.content st.subheader("Answer") st.write(answer_text) # ---------------- PDF DOWNLOAD ---------------- pdf_buffer = answer_to_pdf(question, answer_text) st.download_button( label="📄 Download Answer as PDF", data=pdf_buffer, file_name="OTT_Bot_Answer.pdf", mime="application/pdf" )