Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| import faiss | |
| import numpy as np | |
| from groq import Groq | |
| from pypdf import PdfReader | |
| from sentence_transformers import SentenceTransformer | |
| # ----------------------------- | |
| # Load Groq Client | |
| # ----------------------------- | |
| GROQ_API_KEY = os.getenv("GROQ_API_KEY") | |
| if not GROQ_API_KEY: | |
| raise ValueError("GROQ_API_KEY not found in environment variables") | |
| client = Groq(api_key=GROQ_API_KEY) | |
| # ----------------------------- | |
| # Load Embedding Model (Open Source) | |
| # ----------------------------- | |
| embedding_model = SentenceTransformer("all-MiniLM-L6-v2") | |
| # Global storage | |
| documents = [] | |
| index = None | |
| # ----------------------------- | |
| # PDF Processing | |
| # ----------------------------- | |
| def process_pdf(file): | |
| global documents, index | |
| reader = PdfReader(file.name) | |
| text = "" | |
| for page in reader.pages: | |
| text += page.extract_text() | |
| # Chunking | |
| chunk_size = 500 | |
| chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)] | |
| documents = chunks | |
| # Create embeddings | |
| embeddings = embedding_model.encode(chunks) | |
| embeddings = np.array(embeddings).astype("float32") | |
| # Create FAISS index | |
| dimension = embeddings.shape[1] | |
| index = faiss.IndexFlatL2(dimension) | |
| index.add(embeddings) | |
| return "β PDF processed and indexed successfully!" | |
| # ----------------------------- | |
| # Question Answering | |
| # ----------------------------- | |
| def answer_question(question): | |
| global documents, index | |
| if index is None: | |
| return "Please upload and process a PDF first." | |
| # Embed question | |
| question_embedding = embedding_model.encode([question]) | |
| question_embedding = np.array(question_embedding).astype("float32") | |
| # Search in FAISS | |
| k = 3 | |
| distances, indices = index.search(question_embedding, k) | |
| context = "" | |
| for idx in indices[0]: | |
| context += documents[idx] + "\n\n" | |
| prompt = f""" | |
| Answer the question based only on the context below. | |
| Context: | |
| {context} | |
| Question: | |
| {question} | |
| Answer: | |
| """ | |
| # Send to Groq LLM | |
| chat_completion = client.chat.completions.create( | |
| messages=[{"role": "user", "content": prompt}], | |
| model="llama-3.3-70b-versatile", | |
| ) | |
| return chat_completion.choices[0].message.content | |
| # ----------------------------- | |
| # Gradio UI | |
| # ----------------------------- | |
| with gr.Blocks() as app: | |
| gr.Markdown("# π RAG PDF Chatbot (Groq + FAISS)") | |
| file_input = gr.File(label="Upload PDF") | |
| upload_button = gr.Button("Process PDF") | |
| upload_output = gr.Textbox() | |
| question_input = gr.Textbox(label="Ask a Question") | |
| answer_button = gr.Button("Get Answer") | |
| answer_output = gr.Textbox() | |
| upload_button.click(process_pdf, inputs=file_input, outputs=upload_output) | |
| answer_button.click(answer_question, inputs=question_input, outputs=answer_output) | |
| app.launch() |