Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import os | |
| import requests | |
| import numpy as np | |
| from pypdf import PdfReader | |
| from sentence_transformers import SentenceTransformer | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| # ---------------- CONFIG ---------------- | |
| GROQ_API_KEY = os.environ.get("smartdoc_rag_chatbot") # HF Secrets me add hona chahiye | |
| GROQ_URL = "https://api.groq.com/openai/v1/chat/completions" | |
| MODEL_NAME = "llama-3.1-8b-instant" | |
| embedder = SentenceTransformer("all-MiniLM-L6-v2") | |
| chunks = [] | |
| chunk_embeddings = [] | |
| # ---------------- PDF LOADING ---------------- | |
| def load_pdfs(pdf_files): | |
| global chunks, chunk_embeddings | |
| if not pdf_files: | |
| return "❌ Please upload at least one PDF." | |
| documents = [] | |
| for doc_id, pdf in enumerate(pdf_files): | |
| reader = PdfReader(pdf) | |
| for page_num, page in enumerate(reader.pages): | |
| text = page.extract_text() | |
| if text: | |
| documents.append({ | |
| "text": text, | |
| "page": page_num + 1, | |
| "doc": f"Document {doc_id + 1}" | |
| }) | |
| # chunking | |
| chunks = [] | |
| for doc in documents: | |
| text = doc["text"] | |
| for i in range(0, len(text), 500): | |
| chunks.append({ | |
| "content": text[i:i+500], | |
| "page": doc["page"], | |
| "doc": doc["doc"] | |
| }) | |
| texts = [c["content"] for c in chunks] | |
| chunk_embeddings = embedder.encode(texts) | |
| return f"✅ Loaded {len(pdf_files)} PDF(s) with {len(chunks)} chunks." | |
| # ---------------- RETRIEVAL ---------------- | |
| def retrieve_context(query, k=3): | |
| query_embedding = embedder.encode([query]) | |
| similarities = cosine_similarity(query_embedding, chunk_embeddings)[0] | |
| top_k = np.argsort(similarities)[-k:] | |
| selected = [chunks[i] for i in top_k] | |
| context = "\n".join([c["content"] for c in selected]) | |
| source = selected[-1] | |
| return context, source | |
| # ---------------- GROQ CALL ---------------- | |
| def ask_question(question): | |
| if not chunks: | |
| return "⚠️ Please load PDFs first." | |
| context, source = retrieve_context(question) | |
| prompt = f""" | |
| You are SmartDoc RAG Chatbot. | |
| Answer the question using ONLY the context below. | |
| Context: | |
| {context} | |
| Question: | |
| {question} | |
| """ | |
| headers = { | |
| "Authorization": f"Bearer {GROQ_API_KEY}", | |
| "Content-Type": "application/json" | |
| } | |
| response = requests.post( | |
| GROQ_URL, | |
| headers=headers, | |
| json={ | |
| "model": MODEL_NAME, | |
| "messages": [{"role": "user", "content": prompt}], | |
| "temperature": 0.2 | |
| } | |
| ) | |
| answer = response.json()["choices"][0]["message"]["content"] | |
| return f"""{answer} | |
| 📄 Source: {source['doc']} — Page {source['page']}""" | |
| # ---------------- UI ---------------- | |
| css = """ | |
| body { | |
| background: linear-gradient(120deg, #e0f2ff, #f8fbff); | |
| } | |
| h1, h3 { | |
| text-align: center; | |
| } | |
| .gr-textbox textarea { | |
| font-size: 15px; | |
| } | |
| .gr-button-primary { | |
| font-weight: bold; | |
| } | |
| """ | |
| with gr.Blocks( | |
| theme=gr.themes.Soft( | |
| primary_hue="blue", | |
| secondary_hue="cyan", | |
| neutral_hue="slate", | |
| font=["Inter", "sans-serif"] | |
| ), | |
| css=css | |
| ) as demo: | |
| gr.Markdown(""" | |
| # 📄 SmartDoc RAG Chatbot | |
| ### Retrieval‑Augmented AI for Document Question Answering | |
| Upload PDFs and ask questions based **only** on their content. | |
| """) | |
| with gr.Row(): | |
| # LEFT PANEL | |
| with gr.Column(scale=1): | |
| pdf_files = gr.File( | |
| file_types=[".pdf"], | |
| file_count="multiple", | |
| label="📂 Upload PDF Documents" | |
| ) | |
| load_btn = gr.Button("📥 Load Documents", variant="primary") | |
| status = gr.Textbox(label="Status", interactive=False) | |
| # RIGHT PANEL | |
| with gr.Column(scale=2): | |
| with gr.Row(): | |
| question = gr.Textbox( | |
| placeholder="Type your question here…", | |
| lines=1, | |
| scale=8 | |
| ) | |
| send_btn = gr.Button("➤", scale=1) | |
| answer = gr.Textbox( | |
| label="Answer", | |
| lines=8 | |
| ) | |
| # EVENTS | |
| load_btn.click(load_pdfs, inputs=pdf_files, outputs=status) | |
| send_btn.click( | |
| ask_question, | |
| inputs=question, | |
| outputs=answer | |
| ).then(lambda: "", None, question) | |
| question.submit( | |
| ask_question, | |
| inputs=question, | |
| outputs=answer | |
| ).then(lambda: "", None, question) | |
| demo.launch() | |