Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| import PyPDF2 | |
| import requests | |
| # ================= CONFIG ================= | |
| GROQ_API_KEY = os.getenv("GROQ_API_KEY", "") | |
| GROQ_MODEL = "llama-3.1-8b-instant" | |
| GROQ_URL = "https://api.groq.com/openai/v1/chat/completions" | |
| processed_texts = {} | |
| # ================= PDF HANDLING ================= | |
| def extract_pdf_text(file): | |
| reader = PyPDF2.PdfReader(file) | |
| text = "" | |
| for page in reader.pages: | |
| text += (page.extract_text() or "") + "\n" | |
| return text | |
| def chunk_text(text, chunk_size=400, overlap=50): | |
| words = text.split() | |
| chunks = [] | |
| i = 0 | |
| while i < len(words): | |
| chunk = " ".join(words[i:i + chunk_size]) | |
| chunks.append(chunk) | |
| i += chunk_size - overlap | |
| return chunks | |
| def preview_documents(files): | |
| processed_texts.clear() | |
| rows = [] | |
| for f in files: | |
| text = extract_pdf_text(f) | |
| name = os.path.basename(f) | |
| chunks = chunk_text(text) | |
| processed_texts[name] = chunks | |
| rows.append([ | |
| name, | |
| len(text.split()), | |
| text[:300], | |
| f"{len(chunks)} chunks" | |
| ]) | |
| return rows | |
| def process_documents(files): | |
| if not processed_texts: | |
| return "β Preview documents first." | |
| return f"β {len(processed_texts)} document(s) processed." | |
| # ================= GROQ CALL ================= | |
| def query_groq(prompt): | |
| if not GROQ_API_KEY: | |
| return "β GROQ_API_KEY not set." | |
| headers = { | |
| "Authorization": f"Bearer {GROQ_API_KEY}", | |
| "Content-Type": "application/json" | |
| } | |
| payload = { | |
| "model": GROQ_MODEL, | |
| "messages": [ | |
| {"role": "system", "content": "Answer ONLY from the provided documents."}, | |
| {"role": "user", "content": prompt} | |
| ], | |
| "temperature": 0.1, | |
| "max_tokens": 400 | |
| } | |
| r = requests.post(GROQ_URL, headers=headers, json=payload, timeout=30) | |
| if r.status_code == 200: | |
| return r.json()["choices"][0]["message"]["content"] | |
| return f"β Groq Error {r.status_code}: {r.text}" | |
| # ================= RAG ================= | |
| def retrieve_context(question): | |
| question_words = set(question.lower().split()) | |
| best_chunk = "" | |
| best_score = 0 | |
| for chunks in processed_texts.values(): | |
| for chunk in chunks: | |
| chunk_words = set(chunk.lower().split()) | |
| score = len(question_words & chunk_words) | |
| if score > best_score: | |
| best_score = score | |
| best_chunk = chunk | |
| return best_chunk[:1000] if best_chunk else "" | |
| def answer_question(question, history): | |
| if history is None: | |
| history = [] | |
| # πΉ STEP 1: show user message instantly | |
| history.append((question, "")) | |
| if not processed_texts: | |
| history[-1] = (question, "β οΈ Upload and process PDFs first.") | |
| return history, "" | |
| context = retrieve_context(question) | |
| if not context: | |
| history[-1] = (question, "β No relevant information found in documents.") | |
| return history, "" | |
| prompt = f""" | |
| DOCUMENT CONTEXT: | |
| {context} | |
| QUESTION: | |
| {question} | |
| Answer clearly using the document context only. | |
| """ | |
| # πΉ STEP 2: get model response | |
| answer = query_groq(prompt) | |
| # πΉ STEP 3: replace last empty reply | |
| history[-1] = (question, answer) | |
| return history, "" | |
| # ================= UI ================= | |
| with gr.Blocks(title="RAG PDF Chatbot") as demo: | |
| gr.Markdown("# π RAG PDF Chatbot (Groq)") | |
| gr.Markdown("*Upload PDFs β Preview β Ask questions*") | |
| with gr.Row(): | |
| files = gr.File(file_types=[".pdf"], file_count="multiple") | |
| preview_btn = gr.Button("π Preview") | |
| process_btn = gr.Button("π Process") | |
| status = gr.Textbox(label="Status") | |
| table = gr.DataFrame( | |
| headers=["File", "Words", "Preview", "Chunks"], | |
| interactive=False | |
| ) | |
| chatbot = gr.Chatbot(height=420) | |
| msg = gr.Textbox( | |
| placeholder="Ask a question from the documents...", | |
| lines=2 | |
| ) | |
| send = gr.Button("Send") | |
| preview_btn.click(preview_documents, files, table) | |
| process_btn.click(process_documents, files, status) | |
| send.click(answer_question, [msg, chatbot], [chatbot, msg]) | |
| msg.submit(answer_question, [msg, chatbot], [chatbot, msg]) | |
| demo.launch() | |