File size: 2,158 Bytes
1549d01
 
 
 
 
 
 
 
 
 
9072935
b843db7
1549d01
 
 
 
 
 
 
 
 
 
9072935
1549d01
 
 
 
 
 
 
 
 
 
 
 
 
9072935
1549d01
 
 
 
 
9072935
 
1549d01
 
 
9072935
1549d01
9072935
1549d01
 
9072935
1549d01
 
 
9072935
1549d01
9072935
1549d01
 
 
9072935
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import streamlit as st
import os
import faiss
import numpy as np
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from groq import Groq

# ✅ Use Hugging Face Secrets for safety
GROQ_API_KEY = os.getenv("GROQ_API_KEY")

def extract_text_from_pdf(pdf_file):
    reader = PdfReader(pdf_file)
    text = ""
    for page in reader.pages:
        text += page.extract_text()
    return text

def split_text_into_chunks(text, chunk_size=500, chunk_overlap=100):
    splitter = CharacterTextSplitter(
        separator="\n", chunk_size=chunk_size,
        chunk_overlap=chunk_overlap, length_function=len
    )
    return splitter.split_text(text)

def store_embeddings(chunks):
    embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    vectorstore = FAISS.from_texts(chunks, embedding_model)
    return vectorstore

def query_groq(prompt):
    client = Groq(api_key=GROQ_API_KEY)
    response = client.chat.completions.create(
        messages=[{"role": "user", "content": prompt}],
        model="llama-3-70b-8192"
    )
    return response.choices[0].message.content

def ask_question(query, vectorstore):
    docs = vectorstore.similarity_search(query, k=3)
    context = "\n\n".join([doc.page_content for doc in docs])
    full_prompt = f"Context:\n{context}\n\nQuestion: {query}"
    return query_groq(full_prompt)

st.set_page_config(page_title="RAG PDF Chatbot", layout="centered")
st.title("📄 RAG Chatbot (PDF + FAISS + Groq API)")

uploaded_file = st.file_uploader("Upload a PDF", type="pdf")

if uploaded_file:
    with st.spinner("Reading and indexing..."):
        text = extract_text_from_pdf(uploaded_file)
        chunks = split_text_into_chunks(text)
        vectorstore = store_embeddings(chunks)
        st.success("PDF indexed!")

    query = st.text_input("Ask something about the document:")
    if query:
        with st.spinner("Generating answer..."):
            response = ask_question(query, vectorstore)
            st.markdown(f"**Answer:** {response}")