Sazzz02 commited on
Commit
f56d8ed
·
verified ·
1 Parent(s): a5f24dc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -11
app.py CHANGED
@@ -4,12 +4,13 @@ import hashlib
4
  import pickle
5
  from langchain.text_splitter import RecursiveCharacterTextSplitter
6
  from langchain_community.document_loaders import PyPDFLoader
7
- from langchain_openai import OpenAIEmbeddings, ChatOpenAI
8
  from langchain_community.vectorstores import FAISS
9
  from langchain.chains import RetrievalQA
 
10
 
11
- # Load/OpenAI key from Hugging Face Secrets
12
- OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
13
 
14
  # Directory to cache vectorstores
15
  CACHE_DIR = "vector_cache"
@@ -28,7 +29,7 @@ def build_vectorstore(pdf_path: str):
28
  loader = PyPDFLoader(pdf_path)
29
  documents = loader.load()
30
 
31
- # Chunking strategy (important for assignment accuracy)
32
  text_splitter = RecursiveCharacterTextSplitter(
33
  chunk_size=1000,
34
  chunk_overlap=200,
@@ -36,7 +37,7 @@ def build_vectorstore(pdf_path: str):
36
  )
37
  chunks = text_splitter.split_documents(documents)
38
 
39
- embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
40
  vectorstore = FAISS.from_documents(chunks, embeddings)
41
  return vectorstore
42
 
@@ -67,12 +68,14 @@ def rag_bot(question: str, pdf_path: str):
67
  vectorstore = get_vectorstore(pdf_path)
68
  retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
69
 
 
 
 
 
 
 
70
  qa = RetrievalQA.from_chain_type(
71
- llm=ChatOpenAI(
72
- model="gpt-3.5-turbo",
73
- temperature=0,
74
- openai_api_key=OPENAI_API_KEY
75
- ),
76
  chain_type="stuff",
77
  retriever=retriever,
78
  )
@@ -84,7 +87,7 @@ def rag_bot(question: str, pdf_path: str):
84
 
85
  # ------------------ Gradio UI ------------------
86
  with gr.Blocks() as demo:
87
- gr.Markdown("## 📖 RAG Q&A Bot – Upload a PDF and Ask Questions")
88
 
89
  with gr.Row():
90
  pdf_file = gr.File(label="Upload PDF", type="filepath", file_types=[".pdf"])
 
4
  import pickle
5
  from langchain.text_splitter import RecursiveCharacterTextSplitter
6
  from langchain_community.document_loaders import PyPDFLoader
7
+ from langchain_community.embeddings import HuggingFaceEmbeddings
8
  from langchain_community.vectorstores import FAISS
9
  from langchain.chains import RetrievalQA
10
+ from langchain_groq import ChatGroq
11
 
12
+ # Load Groq API key
13
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY")
14
 
15
  # Directory to cache vectorstores
16
  CACHE_DIR = "vector_cache"
 
29
  loader = PyPDFLoader(pdf_path)
30
  documents = loader.load()
31
 
32
+ # Chunking strategy
33
  text_splitter = RecursiveCharacterTextSplitter(
34
  chunk_size=1000,
35
  chunk_overlap=200,
 
37
  )
38
  chunks = text_splitter.split_documents(documents)
39
 
40
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
41
  vectorstore = FAISS.from_documents(chunks, embeddings)
42
  return vectorstore
43
 
 
68
  vectorstore = get_vectorstore(pdf_path)
69
  retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
70
 
71
+ # Use Groq LLM
72
+ llm = ChatGroq(
73
+ groq_api_key=GROQ_API_KEY,
74
+ model_name="mixtral-8x7b-32768", # or "llama2-70b-4096"
75
+ )
76
+
77
  qa = RetrievalQA.from_chain_type(
78
+ llm=llm,
 
 
 
 
79
  chain_type="stuff",
80
  retriever=retriever,
81
  )
 
87
 
88
  # ------------------ Gradio UI ------------------
89
  with gr.Blocks() as demo:
90
+ gr.Markdown("## 📖 RAG Q&A Bot – Powered by Groq + HuggingFace Embeddings")
91
 
92
  with gr.Row():
93
  pdf_file = gr.File(label="Upload PDF", type="filepath", file_types=[".pdf"])