Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import PyPDF2 | |
| import cohere | |
| from pinecone import Pinecone | |
| from sentence_transformers import SentenceTransformer | |
| import io | |
| # Initialize Pinecone and connect to the index | |
| pc = Pinecone(api_key="0f78bc1b-81f7-4a15-9af3-0fbcf0acdb4e") | |
| index = pc.Index("quickstart") | |
| # Load the sentence transformer model | |
| model = SentenceTransformer('all-MiniLM-L6-v2') | |
| # Initialize Cohere with your API key | |
| co = cohere.Client("CxIrucBVA8NNJJOBUnxwRWq488MVydBku1DlqP1u") | |
| def extract_text_from_pdf(pdf_file): | |
| """Extracts text from the uploaded PDF, with error handling.""" | |
| try: | |
| if pdf_file is None: | |
| return "No file uploaded." | |
| # Read the PDF content from the file path | |
| with open(pdf_file.name, 'rb') as f: | |
| pdf_reader = PyPDF2.PdfReader(f) | |
| text = "" | |
| for page_num in range(len(pdf_reader.pages)): | |
| text += pdf_reader.pages[page_num].extract_text() or "" | |
| if not text.strip(): | |
| return "The uploaded PDF is empty or has no readable content." | |
| return text | |
| except PyPDF2.errors.PdfReadError: | |
| return "The uploaded PDF is encrypted or unreadable." | |
| except Exception as e: | |
| return f"Error reading PDF: {str(e)}" | |
| def store_pdf_embeddings(pdf_text): | |
| """Generate and store embeddings for the uploaded PDF content.""" | |
| segments = [pdf_text[i:i + 512] for i in range(0, len(pdf_text), 512)] | |
| embeddings = model.encode(segments) | |
| vectors = [(f"seg-{i}", embed.tolist()) for i, embed in enumerate(embeddings)] | |
| index.upsert(vectors=vectors) | |
| return "PDF uploaded and stored successfully!" | |
| def ask_question(query): | |
| """Handle user questions and generate answers based on the PDF content.""" | |
| query_embedding = model.encode(query).tolist() | |
| # Retrieve the most relevant segment from Pinecone | |
| result = index.query(top_k=1, vector=query_embedding) | |
| retrieved_seg_id = result['matches'][0]['id'] | |
| segment_text = f"Segment: {retrieved_seg_id}" | |
| # Generate the answer using the retrieved segment as context | |
| prompt = f"{segment_text}\nQuestion: {query}\nAnswer:" | |
| response = co.generate( | |
| model="command-xlarge-nightly", | |
| prompt=prompt, | |
| max_tokens=50 | |
| ) | |
| # Return both the segment and the answer | |
| answer = response.generations[0].text.strip() | |
| return segment_text, answer | |
| # Gradio Interface Setup | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# Interactive QA Bot with PDF Support") | |
| # PDF Upload Section | |
| pdf_input = gr.File(label="Upload PDF", type="filepath", file_types=[".pdf"]) | |
| upload_status = gr.Textbox(label="Upload Status", interactive=False) | |
| upload_button = gr.Button("Upload and Store") | |
| # Handle PDF Upload | |
| upload_button.click( | |
| lambda pdf: store_pdf_embeddings(extract_text_from_pdf(pdf)) | |
| if pdf is not None else "Please upload a valid PDF.", | |
| inputs=pdf_input, outputs=upload_status | |
| ) | |
| # Question and Answer Section | |
| query_input = gr.Textbox(label="Enter your question") | |
| segment_output = gr.Textbox(label="Retrieved Segment", interactive=False) | |
| answer_output = gr.Textbox(label="Answer", interactive=False) | |
| query_button = gr.Button("Ask") | |
| # Handle User Questions | |
| query_button.click( | |
| ask_question, inputs=query_input, outputs=[segment_output, answer_output] | |
| ) | |
| demo.launch(share=True) # Set share=True if you want a public link |