ragcchatBot / app.py
taha-18's picture
Update app.py
b843db7 verified
import streamlit as st
import os
import faiss
import numpy as np
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from groq import Groq
# βœ… Use Hugging Face Secrets for safety
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
def extract_text_from_pdf(pdf_file):
reader = PdfReader(pdf_file)
text = ""
for page in reader.pages:
text += page.extract_text()
return text
def split_text_into_chunks(text, chunk_size=500, chunk_overlap=100):
splitter = CharacterTextSplitter(
separator="\n", chunk_size=chunk_size,
chunk_overlap=chunk_overlap, length_function=len
)
return splitter.split_text(text)
def store_embeddings(chunks):
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.from_texts(chunks, embedding_model)
return vectorstore
def query_groq(prompt):
client = Groq(api_key=GROQ_API_KEY)
response = client.chat.completions.create(
messages=[{"role": "user", "content": prompt}],
model="llama-3-70b-8192"
)
return response.choices[0].message.content
def ask_question(query, vectorstore):
docs = vectorstore.similarity_search(query, k=3)
context = "\n\n".join([doc.page_content for doc in docs])
full_prompt = f"Context:\n{context}\n\nQuestion: {query}"
return query_groq(full_prompt)
st.set_page_config(page_title="RAG PDF Chatbot", layout="centered")
st.title("πŸ“„ RAG Chatbot (PDF + FAISS + Groq API)")
uploaded_file = st.file_uploader("Upload a PDF", type="pdf")
if uploaded_file:
with st.spinner("Reading and indexing..."):
text = extract_text_from_pdf(uploaded_file)
chunks = split_text_into_chunks(text)
vectorstore = store_embeddings(chunks)
st.success("PDF indexed!")
query = st.text_input("Ask something about the document:")
if query:
with st.spinner("Generating answer..."):
response = ask_question(query, vectorstore)
st.markdown(f"**Answer:** {response}")