Yatheshr commited on
Commit
d0da0f6
Β·
verified Β·
1 Parent(s): ba67be3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -85
app.py CHANGED
@@ -1,90 +1,89 @@
1
- import os
2
  import gradio as gr
3
- from typing import List
 
 
4
  from langchain_community.document_loaders import PyPDFLoader
5
- from langchain_text_splitters import RecursiveCharacterTextSplitter
6
- from langchain_community.embeddings import HuggingFaceEmbeddings
7
  from langchain_community.vectorstores import FAISS
8
- import google.generativeai as genai
9
-
10
- # Path to save vector index
11
- INDEX_DIR = "rag_multi_pdf_index"
12
-
13
- # Step 1: Create knowledge base from PDFs
14
- def create_knowledge_base(pdf_files: List[gr.File]) -> str:
15
- if not pdf_files:
16
- return "❌ No PDFs uploaded."
17
-
18
- all_chunks = []
19
- splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
20
-
21
- for file in pdf_files:
22
- loader = PyPDFLoader(file.name)
23
- try:
24
- docs = loader.load()
25
- chunks = splitter.split_documents(docs)
26
- all_chunks.extend(chunks)
27
- except Exception as e:
28
- return f"❌ Error reading {file.name}: {str(e)}"
29
-
30
- if not all_chunks:
31
- return "❌ No content extracted from PDFs."
32
-
33
- embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
34
- vectorstore = FAISS.from_documents(all_chunks, embeddings)
35
- vectorstore.save_local(INDEX_DIR)
36
-
37
- return f"βœ… Knowledge base created with {len(all_chunks)} chunks from {len(pdf_files)} PDFs."
38
-
39
- # Step 2: Load vectorstore
40
- def load_vectorstore() -> FAISS:
41
- embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
42
- return FAISS.load_local(INDEX_DIR, embeddings)
43
-
44
- # Step 3: Ask question via Gemini using retrieved context
45
- def chat_with_rag(api_key: str, question: str) -> str:
46
- if not api_key or not api_key.startswith("AI"):
47
- return "❌ Invalid Gemini API Key. It should start with 'AI'."
48
-
49
  try:
50
- genai.configure(api_key=api_key)
51
- model = genai.GenerativeModel("gemini-pro")
 
 
 
 
 
 
 
 
 
 
52
  except Exception as e:
53
- return f"❌ Gemini configuration error: {str(e)}"
54
-
55
- try:
56
- vs = load_vectorstore()
57
- top_docs = vs.similarity_search(question, k=3)
58
- context = "\n\n".join([doc.page_content for doc in top_docs])
59
- except Exception as e:
60
- return f"❌ Error loading vectorstore or retrieving context: {str(e)}"
61
-
62
- prompt = f"""Use the following context to answer the question:\n\n{context}\n\nQuestion: {question}"""
63
-
64
- try:
65
- response = model.generate_content(prompt)
66
- return response.text
67
- except Exception as e:
68
- return f"❌ Gemini error: {str(e)}"
69
-
70
- # Step 4: Gradio UI
71
- with gr.Blocks(title="πŸ“š RAG Q&A with Gemini") as demo:
72
- gr.Markdown("## πŸ“„ Upload multiple PDFs β†’ 🧠 Build Knowledge Base β†’ πŸ€– Ask Questions with Gemini")
73
-
74
- api_key = gr.Textbox(label="πŸ” Gemini API Key", placeholder="Enter your Gemini API Key", type="password")
75
-
76
- pdfs = gr.File(label="πŸ“‚ Upload PDFs", file_types=[".pdf"], file_count="multiple")
77
- create_btn = gr.Button("πŸ“„ Create Knowledge Base")
78
- kb_status = gr.Textbox(label="πŸ“¦ Knowledge Base Status", interactive=False)
79
-
80
- create_btn.click(fn=create_knowledge_base, inputs=[pdfs], outputs=[kb_status])
81
-
82
- question = gr.Textbox(label="❓ Ask a Question")
83
- answer = gr.Textbox(label="πŸ’¬ Gemini Answer", lines=10, interactive=False)
84
- ask_btn = gr.Button("πŸš€ Ask")
85
-
86
- ask_btn.click(fn=chat_with_rag, inputs=[api_key, question], outputs=[answer])
87
-
88
- # Step 5: Launch app
89
- if __name__ == "__main__":
90
- demo.launch()
 
 
 
1
  import gradio as gr
2
+ import tempfile
3
+ import os
4
+
5
  from langchain_community.document_loaders import PyPDFLoader
6
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
8
  from langchain_community.vectorstores import FAISS
9
+ from langchain.chains import RetrievalQA
10
+ from langchain.callbacks.base import BaseCallbackHandler
11
+
12
+ # Global state
13
+ kb = None
14
+ retriever = None
15
+ qa = None
16
+
17
+ class StreamHandler(BaseCallbackHandler):
18
+ def __init__(self, update_fn):
19
+ self.text = ""
20
+ self.update_fn = update_fn
21
+
22
+ def on_llm_new_token(self, token: str, **kwargs):
23
+ self.text += token
24
+ self.update_fn(self.text)
25
+
26
+ def save_pdfs(pdf_list):
27
+ paths = []
28
+ for pdf in pdf_list:
29
+ tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
30
+ tmp.write(pdf.read())
31
+ tmp.close()
32
+ paths.append(tmp.name)
33
+ return paths
34
+
35
+ def create_kb(api_key, pdf_list):
36
+ global retriever, qa
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  try:
38
+ pdf_paths = save_pdfs(pdf_list)
39
+ docs = []
40
+ for path in pdf_paths:
41
+ loader = PyPDFLoader(path)
42
+ docs.extend(loader.load())
43
+ splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
44
+ chunks = splitter.split_documents(docs)
45
+ embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004", google_api_key=api_key)
46
+ db = FAISS.from_documents(chunks, embeddings)
47
+ retriever = db.as_retriever(search_kwargs={"k": 3})
48
+ qa = RetrievalQA.from_chain_type(llm=None, retriever=retriever) # llm passed later
49
+ return "βœ… Knowledge base created."
50
  except Exception as e:
51
+ return f"❌ Error creating KB: {e}"
52
+
53
+ def ask_question(api_key, question, chat_history, set_stream):
54
+ global retriever, qa
55
+ if retriever is None:
56
+ return chat_history, "❌ Create KB first."
57
+ handler = StreamHandler(lambda txt: set_stream(txt))
58
+ llm = ChatGoogleGenerativeAI(model="models/gemini-1.5-pro-latest",
59
+ google_api_key=api_key,
60
+ streaming=True,
61
+ callbacks=[handler])
62
+ qa.llm = llm
63
+ chat_history = chat_history or []
64
+ chat_history.append({"role": "user", "content": question})
65
+ result = qa.invoke({"query": question})
66
+ chat_history.append({"role": "assistant", "content": handler.text})
67
+ return chat_history, ""
68
+
69
+ with gr.Blocks() as demo:
70
+ gr.Markdown("# πŸ“š Multi‑PDF RAG Chat with Gemini")
71
+
72
+ with gr.Column():
73
+ api_key = gr.Textbox(show_label=False, placeholder="Enter your Gemini API Key", type="password")
74
+ pdfs = gr.File(file_types=[".pdf"], label="Upload PDFs", file_count="multiple")
75
+ kb_status = gr.Textbox(label="Status")
76
+ create_btn = gr.Button("▢️ Create Knowledge Base")
77
+
78
+ create_btn.click(create_kb, inputs=[api_key, pdfs], outputs=kb_status)
79
+
80
+ chatbot = gr.Chatbot(label="🧠 Assistant", type="messages")
81
+ question = gr.Textbox(show_label=False, placeholder="Ask a question")
82
+ stream_output = gr.State("") # to capture stream text
83
+ send = gr.Button("πŸ” Ask")
84
+
85
+ send.click(fn=ask_question,
86
+ inputs=[api_key, question, chatbot, stream_output],
87
+ outputs=[chatbot, ""])
88
+
89
+ demo.launch()