Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| from langchain_community.document_loaders import PyPDFLoader | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI | |
| from langchain_community.vectorstores import Pinecone as LangchainPinecone | |
| from langchain.chains import RetrievalQA | |
| import pinecone # OLD SDK (pinecone-client==2.2.4) | |
| INDEX_NAME = "rag-demo-index" | |
| def process_rag(api_key_gemini, api_key_pinecone, pinecone_env, pdf_file, user_question): | |
| if not api_key_gemini or not api_key_pinecone: | |
| return "β Please provide both Gemini and Pinecone API keys." | |
| if not pdf_file: | |
| return "β Please upload a PDF file." | |
| try: | |
| # Step 1: Load and split the PDF | |
| loader = PyPDFLoader(pdf_file.name) | |
| documents = loader.load() | |
| splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) | |
| docs = splitter.split_documents(documents) | |
| # Step 2: Set up embeddings using Gemini | |
| embeddings = GoogleGenerativeAIEmbeddings( | |
| model="models/embedding-001", | |
| google_api_key=api_key_gemini | |
| ) | |
| # Step 3: Initialize Pinecone (old SDK) | |
| pinecone.init(api_key=api_key_pinecone, environment=pinecone_env) | |
| if INDEX_NAME not in pinecone.list_indexes(): | |
| pinecone.create_index(name=INDEX_NAME, dimension=768, metric="cosine") | |
| # Step 4: Store docs in Pinecone using LangChain wrapper | |
| vectordb = LangchainPinecone.from_documents( | |
| docs, | |
| embedding=embeddings, | |
| index_name=INDEX_NAME | |
| ) | |
| # Step 5: Create retriever and chain | |
| retriever = vectordb.as_retriever() | |
| llm = ChatGoogleGenerativeAI( | |
| model="gemini-pro", | |
| google_api_key=api_key_gemini, | |
| temperature=0 | |
| ) | |
| qa_chain = RetrievalQA.from_chain_type( | |
| llm=llm, | |
| retriever=retriever, | |
| return_source_documents=True | |
| ) | |
| # Step 6: Ask question | |
| result = qa_chain({"query": user_question}) | |
| return result["result"] | |
| except Exception as e: | |
| return f"β Error: {str(e)}" | |
| # ποΈ Gradio UI | |
| with gr.Blocks() as app: | |
| gr.Markdown("## ππ PDF Q&A using Pinecone + Gemini (RAG)") | |
| with gr.Row(): | |
| gemini_key = gr.Textbox(label="π Gemini API Key", type="password") | |
| pinecone_key = gr.Textbox(label="π² Pinecone API Key", type="password") | |
| pinecone_env = gr.Textbox(label="π Pinecone Environment (e.g., us-east-1)") | |
| pdf_file = gr.File(label="π Upload PDF", file_types=[".pdf"]) | |
| user_question = gr.Textbox(label="β Ask your question") | |
| answer_output = gr.Textbox(label="π€ Gemini Answer", lines=10) | |
| submit_btn = gr.Button("π Ask") | |
| submit_btn.click( | |
| fn=process_rag, | |
| inputs=[gemini_key, pinecone_key, pinecone_env, pdf_file, user_question], | |
| outputs=answer_output | |
| ) | |
| app.launch() | |