AnwinMJ commited on
Commit
06d67dc
·
verified ·
1 Parent(s): b63b782

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -32
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import gradio as gr
2
  from langchain_community.embeddings import HuggingFaceEmbeddings
3
  from langchain_community.vectorstores import Chroma
@@ -5,23 +6,15 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
5
  from langchain.document_loaders import PyPDFLoader
6
  from langchain.chains import RetrievalQA
7
  from langchain.llms.base import LLM
8
- from groq import Groq
9
  from typing import List, Optional
 
 
 
10
 
11
- # Step 1: Load PDF and prepare vector store
12
- loader = PyPDFLoader("ivas103.pdf")
13
- documents = loader.load()
14
-
15
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
16
- docs = text_splitter.split_documents(documents)
17
-
18
- embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
19
- vectorstore = Chroma.from_documents(docs, embedding, persist_directory="rag_chroma_groq")
20
-
21
- # Step 2: Define custom LLM class using Groq
22
  class GroqLLM(LLM):
23
  model: str = "llama3-8b-8192"
24
- api_key: str = "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" # Replace with your key
25
  temperature: float = 0.7
26
 
27
  def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
@@ -41,30 +34,64 @@ class GroqLLM(LLM):
41
  def _llm_type(self) -> str:
42
  return "groq-llm"
43
 
44
- # Step 3: Create RetrievalQA chain
45
- retriever = vectorstore.as_retriever()
46
- groq_llm = GroqLLM(api_key="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") # Replace with your key
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
- qa_chain = RetrievalQA.from_chain_type(
49
- llm=groq_llm,
50
- retriever=retriever,
51
- return_source_documents=True
52
- )
 
 
 
 
 
 
53
 
54
- # Step 4: Gradio interface function
55
  def ask_question(query):
 
 
 
 
 
 
 
 
 
56
  result = qa_chain({"query": query})
57
  answer = result["result"]
58
- sources = "\n\n".join([doc.metadata.get("source", "Unknown") for doc in result["source_documents"]])
59
  return f"### Answer:\n{answer}\n\n### Sources:\n{sources}"
60
 
61
- # Step 5: Launch Gradio UI
62
- iface = gr.Interface(
63
- fn=ask_question,
64
- inputs=gr.Textbox(label="Ask a question", placeholder="e.g., What is a chassis?"),
65
- outputs=gr.Markdown(),
66
- title="📄 PDF RAG Chatbot (Groq + LangChain)",
67
- description="Ask questions based on the content of the PDF file."
68
- )
 
 
 
 
 
 
 
69
 
70
- iface.launch()
 
1
+ import os
2
  import gradio as gr
3
  from langchain_community.embeddings import HuggingFaceEmbeddings
4
  from langchain_community.vectorstores import Chroma
 
6
  from langchain.document_loaders import PyPDFLoader
7
  from langchain.chains import RetrievalQA
8
  from langchain.llms.base import LLM
 
9
  from typing import List, Optional
10
+ from groq import Groq
11
+ import tempfile
12
+ import shutil
13
 
14
+ # Custom LLM using Groq
 
 
 
 
 
 
 
 
 
 
15
  class GroqLLM(LLM):
16
  model: str = "llama3-8b-8192"
17
+ api_key: str = os.environ.get("GROQ_API_KEY") # Use env var for security
18
  temperature: float = 0.7
19
 
20
  def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
 
34
  def _llm_type(self) -> str:
35
  return "groq-llm"
36
 
37
+ # Global cache to reuse vectorstore during the session
38
+ vectorstore_cache = {}
39
+
40
+ def process_pdf(file_obj):
41
+ # Save uploaded PDF to temp directory
42
+ with tempfile.TemporaryDirectory() as temp_dir:
43
+ file_path = os.path.join(temp_dir, file_obj.name)
44
+ with open(file_path, "wb") as f:
45
+ f.write(file_obj.read())
46
+
47
+ # Load and split
48
+ loader = PyPDFLoader(file_path)
49
+ documents = loader.load()
50
+
51
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
52
+ docs = text_splitter.split_documents(documents)
53
 
54
+ embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
55
+
56
+ # Create persistent Chroma DB
57
+ persist_dir = os.path.join(temp_dir, "chroma_db")
58
+ vectorstore = Chroma.from_documents(docs, embedding, persist_directory=persist_dir)
59
+ vectorstore.persist()
60
+
61
+ # Store for session use
62
+ vectorstore_cache["retriever"] = vectorstore.as_retriever()
63
+
64
+ return "PDF processed and ready. You can now ask questions."
65
 
 
66
  def ask_question(query):
67
+ if "retriever" not in vectorstore_cache:
68
+ return "Please upload a PDF first."
69
+
70
+ llm = GroqLLM()
71
+ qa_chain = RetrievalQA.from_chain_type(
72
+ llm=llm,
73
+ retriever=vectorstore_cache["retriever"],
74
+ return_source_documents=True
75
+ )
76
  result = qa_chain({"query": query})
77
  answer = result["result"]
78
+ sources = "\n".join([doc.metadata.get("source", "No metadata") for doc in result["source_documents"]])
79
  return f"### Answer:\n{answer}\n\n### Sources:\n{sources}"
80
 
81
+ with gr.Blocks() as demo:
82
+ gr.Markdown("## 📄 PDF Question Answering Bot (Groq + HuggingFace + LangChain)")
83
+
84
+ with gr.Row():
85
+ pdf_file = gr.File(label="Upload your PDF")
86
+ upload_btn = gr.Button("Process PDF")
87
+
88
+ upload_output = gr.Textbox(label="Status", interactive=False)
89
+ upload_btn.click(process_pdf, inputs=pdf_file, outputs=upload_output)
90
+
91
+ query = gr.Textbox(label="Ask a question")
92
+ answer_output = gr.Markdown()
93
+ query_btn = gr.Button("Get Answer")
94
+
95
+ query_btn.click(ask_question, inputs=query, outputs=answer_output)
96
 
97
+ demo.launch()