import os import gradio as gr from langchain_community.document_loaders import PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI from langchain_community.vectorstores import Pinecone as LangchainPinecone from langchain.chains import RetrievalQA import pinecone # OLD SDK (pinecone-client==2.2.4) INDEX_NAME = "rag-demo-index" def process_rag(api_key_gemini, api_key_pinecone, pinecone_env, pdf_file, user_question): if not api_key_gemini or not api_key_pinecone: return "❌ Please provide both Gemini and Pinecone API keys." if not pdf_file: return "❌ Please upload a PDF file." try: # Step 1: Load and split the PDF loader = PyPDFLoader(pdf_file.name) documents = loader.load() splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) docs = splitter.split_documents(documents) # Step 2: Set up embeddings using Gemini embeddings = GoogleGenerativeAIEmbeddings( model="models/embedding-001", google_api_key=api_key_gemini ) # Step 3: Initialize Pinecone (old SDK) pinecone.init(api_key=api_key_pinecone, environment=pinecone_env) if INDEX_NAME not in pinecone.list_indexes(): pinecone.create_index(name=INDEX_NAME, dimension=768, metric="cosine") # Step 4: Store docs in Pinecone using LangChain wrapper vectordb = LangchainPinecone.from_documents( docs, embedding=embeddings, index_name=INDEX_NAME ) # Step 5: Create retriever and chain retriever = vectordb.as_retriever() llm = ChatGoogleGenerativeAI( model="gemini-pro", google_api_key=api_key_gemini, temperature=0 ) qa_chain = RetrievalQA.from_chain_type( llm=llm, retriever=retriever, return_source_documents=True ) # Step 6: Ask question result = qa_chain({"query": user_question}) return result["result"] except Exception as e: return f"❌ Error: {str(e)}" # 🎛️ Gradio UI with gr.Blocks() as app: gr.Markdown("## 📄🔍 PDF Q&A using Pinecone + Gemini (RAG)") with gr.Row(): gemini_key = gr.Textbox(label="🔐 Gemini API Key", type="password") pinecone_key = gr.Textbox(label="🌲 Pinecone API Key", type="password") pinecone_env = gr.Textbox(label="🌍 Pinecone Environment (e.g., us-east-1)") pdf_file = gr.File(label="📄 Upload PDF", file_types=[".pdf"]) user_question = gr.Textbox(label="❓ Ask your question") answer_output = gr.Textbox(label="🤖 Gemini Answer", lines=10) submit_btn = gr.Button("🔍 Ask") submit_btn.click( fn=process_rag, inputs=[gemini_key, pinecone_key, pinecone_env, pdf_file, user_question], outputs=answer_output ) app.launch()