Yatheshr commited on
Commit
69a30c8
Β·
verified Β·
1 Parent(s): 0df463b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -37
app.py CHANGED
@@ -1,91 +1,77 @@
1
- import gradio as gr
2
  import os
 
3
  from langchain_community.document_loaders import PyPDFLoader
4
  from langchain.text_splitter import RecursiveCharacterTextSplitter
5
  from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
6
  from langchain_community.vectorstores import Pinecone as LangchainPinecone
7
  from langchain.chains import RetrievalQA
8
- from pinecone import Pinecone, ServerlessSpec
9
 
10
- # Global constants
11
  INDEX_NAME = "rag-demo-index"
12
 
13
  def process_rag(api_key_gemini, api_key_pinecone, pinecone_env, pdf_file, user_question):
14
  if not api_key_gemini or not api_key_pinecone:
15
  return "❌ Please provide both Gemini and Pinecone API keys."
16
-
17
  if not pdf_file:
18
  return "❌ Please upload a PDF file."
19
 
20
  try:
21
- # Step 1: Load and split PDF
22
  loader = PyPDFLoader(pdf_file.name)
23
  documents = loader.load()
24
  splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
25
  docs = splitter.split_documents(documents)
26
 
27
- # Step 2: Gemini Embeddings
28
  embeddings = GoogleGenerativeAIEmbeddings(
29
- model="models/embedding-001",
30
  google_api_key=api_key_gemini
31
  )
32
 
33
- # Step 3: Pinecone setup (new SDK)
34
- pc = Pinecone(api_key=api_key_pinecone)
35
 
36
- # Create index if it doesn't exist
37
- if INDEX_NAME not in pc.list_indexes().names():
38
- pc.create_index(
39
- name=INDEX_NAME,
40
- dimension=768,
41
- metric="cosine",
42
- spec=ServerlessSpec(
43
- cloud="aws", # or "gcp"
44
- region=pinecone_env # example: "us-east-1"
45
- )
46
- )
47
 
48
- index = pc.Index(INDEX_NAME)
49
-
50
- # Step 4: Store documents in Pinecone
51
  vectordb = LangchainPinecone.from_documents(
52
- docs,
53
- embedding=embeddings,
54
- index=index
55
  )
56
 
57
- # Step 5: Create retriever
58
  retriever = vectordb.as_retriever()
59
-
60
- # Step 6: Use Gemini LLM
61
  llm = ChatGoogleGenerativeAI(
62
- model="gemini-pro",
63
  google_api_key=api_key_gemini,
64
  temperature=0
65
  )
66
  qa_chain = RetrievalQA.from_chain_type(
67
- llm=llm,
68
- retriever=retriever,
69
  return_source_documents=True
70
  )
71
 
72
- # Step 7: Ask the question
73
  result = qa_chain({"query": user_question})
74
  return result["result"]
75
 
76
  except Exception as e:
77
  return f"❌ Error: {str(e)}"
78
 
79
- # Gradio interface
80
  with gr.Blocks() as app:
81
- gr.Markdown("## πŸ“„πŸ” PDF Question Answering using Pinecone + Gemini (RAG)")
82
 
83
  with gr.Row():
84
  gemini_key = gr.Textbox(label="πŸ” Gemini API Key", type="password")
85
  pinecone_key = gr.Textbox(label="🌲 Pinecone API Key", type="password")
86
- pinecone_env = gr.Textbox(label="🌍 Pinecone Region (e.g., us-east-1)", value="us-east-1")
87
 
88
- pdf_file = gr.File(label="πŸ“„ Upload your PDF", file_types=[".pdf"])
89
  user_question = gr.Textbox(label="❓ Ask your question")
90
  answer_output = gr.Textbox(label="πŸ€– Gemini Answer", lines=10)
91
 
 
 
1
  import os
2
+ import gradio as gr
3
  from langchain_community.document_loaders import PyPDFLoader
4
  from langchain.text_splitter import RecursiveCharacterTextSplitter
5
  from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
6
  from langchain_community.vectorstores import Pinecone as LangchainPinecone
7
  from langchain.chains import RetrievalQA
8
+ import pinecone # OLD SDK (pinecone-client==2.2.4)
9
 
 
10
  INDEX_NAME = "rag-demo-index"
11
 
12
  def process_rag(api_key_gemini, api_key_pinecone, pinecone_env, pdf_file, user_question):
13
  if not api_key_gemini or not api_key_pinecone:
14
  return "❌ Please provide both Gemini and Pinecone API keys."
15
+
16
  if not pdf_file:
17
  return "❌ Please upload a PDF file."
18
 
19
  try:
20
+ # Step 1: Load and split the PDF
21
  loader = PyPDFLoader(pdf_file.name)
22
  documents = loader.load()
23
  splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
24
  docs = splitter.split_documents(documents)
25
 
26
+ # Step 2: Set up embeddings using Gemini
27
  embeddings = GoogleGenerativeAIEmbeddings(
28
+ model="models/embedding-001",
29
  google_api_key=api_key_gemini
30
  )
31
 
32
+ # Step 3: Initialize Pinecone (old SDK)
33
+ pinecone.init(api_key=api_key_pinecone, environment=pinecone_env)
34
 
35
+ if INDEX_NAME not in pinecone.list_indexes():
36
+ pinecone.create_index(name=INDEX_NAME, dimension=768, metric="cosine")
 
 
 
 
 
 
 
 
 
37
 
38
+ # Step 4: Store docs in Pinecone using LangChain wrapper
 
 
39
  vectordb = LangchainPinecone.from_documents(
40
+ docs,
41
+ embedding=embeddings,
42
+ index_name=INDEX_NAME
43
  )
44
 
45
+ # Step 5: Create retriever and chain
46
  retriever = vectordb.as_retriever()
 
 
47
  llm = ChatGoogleGenerativeAI(
48
+ model="gemini-pro",
49
  google_api_key=api_key_gemini,
50
  temperature=0
51
  )
52
  qa_chain = RetrievalQA.from_chain_type(
53
+ llm=llm,
54
+ retriever=retriever,
55
  return_source_documents=True
56
  )
57
 
58
+ # Step 6: Ask question
59
  result = qa_chain({"query": user_question})
60
  return result["result"]
61
 
62
  except Exception as e:
63
  return f"❌ Error: {str(e)}"
64
 
65
+ # πŸŽ›οΈ Gradio UI
66
  with gr.Blocks() as app:
67
+ gr.Markdown("## πŸ“„πŸ” PDF Q&A using Pinecone + Gemini (RAG)")
68
 
69
  with gr.Row():
70
  gemini_key = gr.Textbox(label="πŸ” Gemini API Key", type="password")
71
  pinecone_key = gr.Textbox(label="🌲 Pinecone API Key", type="password")
72
+ pinecone_env = gr.Textbox(label="🌍 Pinecone Environment (e.g., us-east-1)")
73
 
74
+ pdf_file = gr.File(label="πŸ“„ Upload PDF", file_types=[".pdf"])
75
  user_question = gr.Textbox(label="❓ Ask your question")
76
  answer_output = gr.Textbox(label="πŸ€– Gemini Answer", lines=10)
77