File size: 2,661 Bytes
f648189
bd90524
f648189
 
f6e6c81
f648189
 
 
87fa998
dc425e6
f648189
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f129d48
f648189
 
 
f129d48
f648189
42d0898
f648189
dc425e6
f129d48
dc425e6
f648189
 
 
 
 
 
 
 
 
9de5fcf
 
f648189
9de5fcf
f648189
9de5fcf
87f3e40
 
 
 
 
 
 
 
 
 
 
f648189
 
 
 
 
 
 
 
 
 
 
 
fb3f467
f648189
 
 
 
 
 
9de5fcf
d943582
f648189
9de5fcf
d943582
9de5fcf
 
d943582
 
 
 
 
 
9de5fcf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import os
import streamlit as st
import numpy as np
from sentence_transformers import SentenceTransformer
from ingest import build_index
from rag import retrieve
from groq import Groq
from config import EMBEDDING_MODEL
from pdf_utils import answer_to_pdf

# ---------------- CONFIG ----------------
st.set_page_config(page_title="OTT Bot", layout="wide")

# ---------------- LOAD MODELS ----------------
@st.cache_resource
def load_embedder():
    return SentenceTransformer(EMBEDDING_MODEL)

embedder = load_embedder()

# ---------------- GROQ CLIENT ----------------
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
if not GROQ_API_KEY:
    st.error("❌ GROQ_API_KEY not set in HF Secrets")
    st.stop()

client = Groq(api_key=GROQ_API_KEY)

# ---------------- UI ----------------
st.title("πŸ“š OTT Bot")
st.markdown("Upload PDFs via HF Dataset and query them using AI")

# ---------------- INGESTION ----------------
if st.button("πŸ”¨ Build Index"):
    with st.spinner("Building FAISS index from dataset..."):
        build_index()
    st.success("βœ… Index built successfully")

st.divider()

# ---------------- QUESTION INPUT ----------------
question = st.text_input(
    "Ask a question about the document",
    placeholder="e.g. What is the main concept discussed?"
)

# ---------------- ANSWER ----------------
if question:
    with st.spinner("Searching document..."):
        query_embedding = embedder.encode([question]).astype("float32")
        contexts = retrieve(query_embedding)

        context_text = "\n\n".join(c["text"] for c in contexts)

        prompt = f"""
You are an expert tutor.

Using ONLY the information in the context below,
provide a detailed and well-structured answer.

Guidelines:
- Explain step by step
- Use headings or bullet points where useful
- Give examples
- Do NOT keep the answer short
- If information is missing, say so clearly

Context:
{context_text}

Question:
{question}

Answer clearly and concisely.
"""

    with st.spinner("Thinking..."):
        response = client.chat.completions.create(
            model="llama-3.1-8b-instant",
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": prompt},
            ],
        )

    answer_text = response.choices[0].message.content

    st.subheader("Answer")
    st.write(answer_text)

    # ---------------- PDF DOWNLOAD ----------------
    pdf_buffer = answer_to_pdf(question, answer_text)

    st.download_button(
        label="πŸ“„ Download Answer as PDF",
        data=pdf_buffer,
        file_name="OTT_Bot_Answer.pdf",
        mime="application/pdf"
    )