Spaces:

Satyam0077
/

rag-qa-bot

Runtime error

App Files Files Community

Satyam0077 commited on Oct 23, 2024

Commit

9b255cc

verified ·

1 Parent(s): 213aeb3

Upload app.py

Browse files

Files changed (1) hide show

app.py +96 -0

app.py ADDED Viewed

	@@ -0,0 +1,96 @@

+import gradio as gr
+import PyPDF2
+import cohere
+from pinecone import Pinecone
+from sentence_transformers import SentenceTransformer
+import io
+# Initialize Pinecone and connect to the index
+pc = Pinecone(api_key="0f78bc1b-81f7-4a15-9af3-0fbcf0acdb4e")
+index = pc.Index("quickstart")
+# Load the sentence transformer model
+model = SentenceTransformer('all-MiniLM-L6-v2')
+# Initialize Cohere with your API key
+co = cohere.Client("CxIrucBVA8NNJJOBUnxwRWq488MVydBku1DlqP1u")
+def extract_text_from_pdf(pdf_file):
+    """Extracts text from the uploaded PDF, with error handling."""
+    try:
+        if pdf_file is None:
+            return "No file uploaded."
+        # Read the PDF content from the file path
+        with open(pdf_file.name, 'rb') as f:
+            pdf_reader = PyPDF2.PdfReader(f)
+            text = ""
+            for page_num in range(len(pdf_reader.pages)):
+                text += pdf_reader.pages[page_num].extract_text() or ""
+        if not text.strip():
+            return "The uploaded PDF is empty or has no readable content."
+        return text
+    except PyPDF2.errors.PdfReadError:
+        return "The uploaded PDF is encrypted or unreadable."
+    except Exception as e:
+        return f"Error reading PDF: {str(e)}"
+def store_pdf_embeddings(pdf_text):
+    """Generate and store embeddings for the uploaded PDF content."""
+    segments = [pdf_text[i:i + 512] for i in range(0, len(pdf_text), 512)]
+    embeddings = model.encode(segments)
+    vectors = [(f"seg-{i}", embed.tolist()) for i, embed in enumerate(embeddings)]
+    index.upsert(vectors=vectors)
+    return "PDF uploaded and stored successfully!"
+def ask_question(query):
+    """Handle user questions and generate answers based on the PDF content."""
+    query_embedding = model.encode(query).tolist()
+    # Retrieve the most relevant segment from Pinecone
+    result = index.query(top_k=1, vector=query_embedding)
+    retrieved_seg_id = result['matches'][0]['id']
+    segment_text = f"Segment: {retrieved_seg_id}"
+    # Generate the answer using the retrieved segment as context
+    prompt = f"{segment_text}\nQuestion: {query}\nAnswer:"
+    response = co.generate(
+        model="command-xlarge-nightly",
+        prompt=prompt,
+        max_tokens=50
+    )
+    # Return both the segment and the answer
+    answer = response.generations[0].text.strip()
+    return segment_text, answer
+# Gradio Interface Setup
+with gr.Blocks() as demo:
+    gr.Markdown("# Interactive QA Bot with PDF Support")
+    # PDF Upload Section
+    pdf_input = gr.File(label="Upload PDF", type="filepath", file_types=[".pdf"])
+    upload_status = gr.Textbox(label="Upload Status", interactive=False)
+    upload_button = gr.Button("Upload and Store")
+    # Handle PDF Upload
+    upload_button.click(
+        lambda pdf: store_pdf_embeddings(extract_text_from_pdf(pdf))
+        if pdf is not None else "Please upload a valid PDF.",
+        inputs=pdf_input, outputs=upload_status
+    )
+    # Question and Answer Section
+    query_input = gr.Textbox(label="Enter your question")
+    segment_output = gr.Textbox(label="Retrieved Segment", interactive=False)
+    answer_output = gr.Textbox(label="Answer", interactive=False)
+    query_button = gr.Button("Ask")
+    # Handle User Questions
+    query_button.click(
+        ask_question, inputs=query_input, outputs=[segment_output, answer_output]
+    )
+demo.launch(share=True)  # Set share=True if you want a public link