Yatheshr commited on
Commit
792970f
Β·
verified Β·
1 Parent(s): ff8bd89

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -58
app.py CHANGED
@@ -1,91 +1,74 @@
1
- import os
2
  import gradio as gr
3
-
4
  from langchain_community.document_loaders import PyPDFLoader
5
  from langchain.text_splitter import RecursiveCharacterTextSplitter
6
  from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
7
  from langchain_community.vectorstores import Pinecone
8
  from langchain.chains import RetrievalQA
9
-
10
  import pinecone
11
 
12
- # Constants
13
  INDEX_NAME = "rag-demo-index"
14
 
15
- def process_rag(pdf_file, user_question):
16
- # Load secrets from Hugging Face (or env manually)
17
- pinecone_api_key = os.environ.get("PINECONE_API_KEY")
18
- pinecone_env = os.environ.get("PINECONE_ENVIRONMENT") # Example: "gcp-starter"
19
- google_api_key = os.environ.get("GOOGLE_API_KEY")
20
-
21
- if not all([pinecone_api_key, pinecone_env, google_api_key]):
22
- return "❌ Missing API key(s). Please check Pinecone & Google Gemini keys."
23
 
24
  if not pdf_file:
25
  return "❌ Please upload a PDF file."
26
 
27
  try:
28
- # 1. Load and split PDF
29
  loader = PyPDFLoader(pdf_file.name)
30
  documents = loader.load()
 
 
31
 
32
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
33
- docs = text_splitter.split_documents(documents)
34
-
35
- # 2. Gemini Embeddings
36
- embeddings = GoogleGenerativeAIEmbeddings(
37
- model="models/embedding-001",
38
- google_api_key=google_api_key
39
- )
40
 
41
- # 3. Init Pinecone (old client)
42
- pinecone.init(api_key=pinecone_api_key, environment=pinecone_env)
43
 
44
  if INDEX_NAME not in pinecone.list_indexes():
45
  pinecone.create_index(name=INDEX_NAME, dimension=768, metric="cosine")
46
 
47
- # 4. Create LangChain-compatible Vector DB
48
- vectorstore = Pinecone.from_documents(
49
- documents=docs,
50
- embedding=embeddings,
51
- index_name=INDEX_NAME
52
- )
53
-
54
- retriever = vectorstore.as_retriever()
55
-
56
- # 5. Gemini LLM
57
- llm = ChatGoogleGenerativeAI(
58
- model="gemini-pro",
59
- google_api_key=google_api_key,
60
- temperature=0
61
- )
62
-
63
- # 6. Retrieval QA chain
64
- qa_chain = RetrievalQA.from_chain_type(
65
- llm=llm,
66
- retriever=retriever,
67
- return_source_documents=False
68
- )
69
-
70
- # 7. Ask question
71
  result = qa_chain({"query": user_question})
72
  return result["result"]
73
 
74
  except Exception as e:
75
  return f"❌ Error: {str(e)}"
76
 
77
- # Gradio UI
78
- with gr.Blocks() as demo:
79
- gr.Markdown("## πŸ” Ask Questions from PDF using Gemini + Pinecone (LangChain RAG)")
80
-
81
- with gr.Row():
82
- pdf_input = gr.File(label="πŸ“„ Upload PDF", file_types=[".pdf"])
83
- question_input = gr.Textbox(label="❓ Ask your question")
84
 
85
- answer_output = gr.Textbox(label="πŸ€– Gemini Answer", lines=8)
 
 
 
 
 
 
 
86
 
87
- ask_button = gr.Button("πŸ”Ž Run RAG")
88
 
89
- ask_button.click(fn=process_rag, inputs=[pdf_input, question_input], outputs=answer_output)
 
 
 
 
90
 
91
- demo.launch()
 
 
1
  import gradio as gr
2
+ import os
3
  from langchain_community.document_loaders import PyPDFLoader
4
  from langchain.text_splitter import RecursiveCharacterTextSplitter
5
  from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
6
  from langchain_community.vectorstores import Pinecone
7
  from langchain.chains import RetrievalQA
 
8
  import pinecone
9
 
10
+ # Global flag to avoid multiple index creations
11
  INDEX_NAME = "rag-demo-index"
12
 
13
+ def process_rag(api_key_gemini, api_key_pinecone, pinecone_env, pdf_file, user_question):
14
+ if not api_key_gemini or not api_key_pinecone:
15
+ return "❌ Please provide both Gemini and Pinecone API keys."
 
 
 
 
 
16
 
17
  if not pdf_file:
18
  return "❌ Please upload a PDF file."
19
 
20
  try:
21
+ # Step 1: Load and chunk the PDF
22
  loader = PyPDFLoader(pdf_file.name)
23
  documents = loader.load()
24
+ splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
25
+ docs = splitter.split_documents(documents)
26
 
27
+ # Step 2: Init Gemini Embeddings
28
+ embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=api_key_gemini)
 
 
 
 
 
 
29
 
30
+ # Step 3: Init Pinecone
31
+ pinecone.init(api_key=api_key_pinecone, environment=pinecone_env)
32
 
33
  if INDEX_NAME not in pinecone.list_indexes():
34
  pinecone.create_index(name=INDEX_NAME, dimension=768, metric="cosine")
35
 
36
+ # Step 4: Store docs in Pinecone
37
+ vectordb = Pinecone.from_documents(docs, embedding=embeddings, index_name=INDEX_NAME)
38
+
39
+ # Step 5: Create retriever
40
+ retriever = vectordb.as_retriever()
41
+
42
+ # Step 6: Use Gemini for generation
43
+ llm = ChatGoogleGenerativeAI(model="gemini-pro", google_api_key=api_key_gemini, temperature=0)
44
+ qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, return_source_documents=True)
45
+
46
+ # Step 7: Ask question
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  result = qa_chain({"query": user_question})
48
  return result["result"]
49
 
50
  except Exception as e:
51
  return f"❌ Error: {str(e)}"
52
 
53
+ # Gradio Interface
54
+ with gr.Blocks() as app:
55
+ gr.Markdown("## πŸ“„πŸ” PDF Question Answering using Pinecone + Gemini (RAG)")
 
 
 
 
56
 
57
+ with gr.Row():
58
+ gemini_key = gr.Textbox(label="πŸ” Gemini API Key", type="password")
59
+ pinecone_key = gr.Textbox(label="🌲 Pinecone API Key", type="password")
60
+ pinecone_env = gr.Textbox(label="🌍 Pinecone Environment (e.g., us-east1-gcp)")
61
+
62
+ pdf_file = gr.File(label="πŸ“„ Upload your PDF", file_types=[".pdf"])
63
+ user_question = gr.Textbox(label="❓ Ask your question")
64
+ answer_output = gr.Textbox(label="πŸ€– Gemini Answer", lines=10)
65
 
66
+ submit_btn = gr.Button("πŸ” Ask")
67
 
68
+ submit_btn.click(
69
+ fn=process_rag,
70
+ inputs=[gemini_key, pinecone_key, pinecone_env, pdf_file, user_question],
71
+ outputs=answer_output
72
+ )
73
 
74
+ app.launch()