Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| import faiss | |
| import numpy as np | |
| from PyPDF2 import PdfReader | |
| from sentence_transformers import SentenceTransformer | |
| from groq import Groq | |
| # π Groq API Key (embed securely for private use only) | |
| GROQ_API_KEY = "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" | |
| # π¦ Load embedding model | |
| model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") | |
| # π Global storage for vector index and chunks | |
| chunks = [] | |
| index = None | |
| # π PDF Text Extraction & Processing | |
| def process_pdf(file): | |
| global chunks, index | |
| try: | |
| reader = PdfReader(file.name) | |
| text = "\n".join(page.extract_text() or "" for page in reader.pages) | |
| if not text.strip(): | |
| return "β No text found in the PDF. Please upload a different file." | |
| # π Chunking | |
| chunk_size = 300 | |
| chunk_overlap = 50 | |
| words = text.split() | |
| chunks = [ | |
| " ".join(words[i:i + chunk_size]) | |
| for i in range(0, len(words), chunk_size - chunk_overlap) | |
| ] | |
| # π Embeddings + FAISS | |
| embeddings = model.encode(chunks) | |
| dimension = embeddings.shape[1] | |
| index = faiss.IndexFlatL2(dimension) | |
| index.add(np.array(embeddings)) | |
| return f"β Processed {len(chunks)} chunks from uploaded PDF. You can now ask questions." | |
| except Exception as e: | |
| return f"β Error processing the PDF: {str(e)}" | |
| # β Ask a Question | |
| def ask_question(query): | |
| if not chunks or index is None: | |
| return "β οΈ Please upload and process a PDF first." | |
| query_embedding = model.encode([query]) | |
| distances, indices = index.search(np.array(query_embedding), k=3) | |
| context = "\n".join([chunks[i] for i in indices[0]]) | |
| # Use Groq API for question answering | |
| client = Groq(api_key=GROQ_API_KEY) | |
| prompt = f"Answer the question based on the following context:\n\n{context}\n\nQuestion: {query}" | |
| try: | |
| response = client.chat.completions.create( | |
| messages=[{"role": "user", "content": prompt}], | |
| model="llama3-70b-8192" | |
| ) | |
| return response.choices[0].message.content | |
| except Exception as e: | |
| return f"β Error from Groq API: {str(e)}" | |
| # ποΈ Gradio Interface | |
| file_input = gr.File(label="π Upload PDF") | |
| question_input = gr.Textbox(label="β Ask a Question about the PDF") | |
| answer_output = gr.Textbox(label="π Answer") | |
| pdf_processor = gr.Interface(fn=process_pdf, inputs=file_input, outputs="text") | |
| pdf_qa = gr.Interface(fn=ask_question, inputs=question_input, outputs=answer_output) | |
| # Create a tabbed interface with "Upload PDF" and "Ask a Question" tabs | |
| app = gr.TabbedInterface( | |
| [pdf_processor, pdf_qa], | |
| tab_names=["Upload PDF", "Ask a Question"] | |
| ) | |
| # Launch the app | |
| if __name__ == "__main__": | |
| app.launch() | |