Yatheshr commited on
Commit
138ff5d
Β·
verified Β·
1 Parent(s): 4bfe2fa

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -0
app.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ from langchain_community.document_loaders import PyPDFLoader
4
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
5
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
6
+ from langchain_community.vectorstores import Pinecone
7
+ from langchain.chains import RetrievalQA
8
+ import pinecone
9
+
10
+ # Global flag to avoid multiple index creations
11
+ INDEX_NAME = "rag-demo-index"
12
+
13
+ def process_rag(api_key_gemini, api_key_pinecone, pinecone_env, pdf_file, user_question):
14
+ if not api_key_gemini or not api_key_pinecone:
15
+ return "❌ Please provide both Gemini and Pinecone API keys."
16
+
17
+ if not pdf_file:
18
+ return "❌ Please upload a PDF file."
19
+
20
+ try:
21
+ # Step 1: Load and chunk the PDF
22
+ loader = PyPDFLoader(pdf_file.name)
23
+ documents = loader.load()
24
+ splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
25
+ docs = splitter.split_documents(documents)
26
+
27
+ # Step 2: Init Gemini Embeddings
28
+ embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=api_key_gemini)
29
+
30
+ # Step 3: Init Pinecone
31
+ pinecone.init(api_key=api_key_pinecone, environment=pinecone_env)
32
+
33
+ if INDEX_NAME not in pinecone.list_indexes():
34
+ pinecone.create_index(name=INDEX_NAME, dimension=768, metric="cosine")
35
+
36
+ # Step 4: Store docs in Pinecone
37
+ vectordb = Pinecone.from_documents(docs, embedding=embeddings, index_name=INDEX_NAME)
38
+
39
+ # Step 5: Create retriever
40
+ retriever = vectordb.as_retriever()
41
+
42
+ # Step 6: Use Gemini for generation
43
+ llm = ChatGoogleGenerativeAI(model="gemini-pro", google_api_key=api_key_gemini, temperature=0)
44
+ qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, return_source_documents=True)
45
+
46
+ # Step 7: Ask question
47
+ result = qa_chain({"query": user_question})
48
+ return result["result"]
49
+
50
+ except Exception as e:
51
+ return f"❌ Error: {str(e)}"
52
+
53
+ # Gradio Interface
54
+ with gr.Blocks() as app:
55
+ gr.Markdown("## πŸ“„πŸ” PDF Question Answering using Pinecone + Gemini (RAG)")
56
+
57
+ with gr.Row():
58
+ gemini_key = gr.Textbox(label="πŸ” Gemini API Key", type="password")
59
+ pinecone_key = gr.Textbox(label="🌲 Pinecone API Key", type="password")
60
+ pinecone_env = gr.Textbox(label="🌍 Pinecone Environment (e.g., us-east1-gcp)")
61
+
62
+ pdf_file = gr.File(label="πŸ“„ Upload your PDF", file_types=[".pdf"])
63
+ user_question = gr.Textbox(label="❓ Ask your question")
64
+ answer_output = gr.Textbox(label="πŸ€– Gemini Answer", lines=10)
65
+
66
+ submit_btn = gr.Button("πŸ” Ask")
67
+
68
+ submit_btn.click(
69
+ fn=process_rag,
70
+ inputs=[gemini_key, pinecone_key, pinecone_env, pdf_file, user_question],
71
+ outputs=answer_output
72
+ )
73
+
74
+ app.launch()