import gradio as gr import pandas as pd import pypdf import docx2txt from typing import List from langchain_community.vectorstores import FAISS from langchain_community.embeddings import HuggingFaceEmbeddings from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain_core.documents import Document import evaluate import os # Optional: Groq API integration from groq import Groq # ====================================== # ๐Ÿ”น Extract text from uploaded file # ====================================== def extract_text(file): if not file: return "" filename = file.name if filename.endswith(".pdf"): reader = pypdf.PdfReader(file.name) return "\n".join([page.extract_text() for page in reader.pages if page.extract_text()]) elif filename.endswith(".docx"): return docx2txt.process(file.name) elif filename.endswith(".csv"): df = pd.read_csv(file.name) return df.to_string(index=False) else: return "" # ====================================== # ๐Ÿ”น Build FAISS index # ====================================== def build_faiss(text): splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) docs = [Document(page_content=chunk) for chunk in splitter.split_text(text)] embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") db = FAISS.from_documents(docs, embeddings) return db # ====================================== # ๐Ÿ”น Generate answer using Groq API # ====================================== def generate_answer(prompt): api_key = os.getenv("GROQ_API_KEY") if not api_key: return "โŒ GROQ_API_KEY not set. Add it in Space Settings โ†’ Variables and secrets." client = Groq(api_key=api_key) response = client.chat.completions.create( model="llama-3.3-70b-versatile", messages=[ {"role": "system", "content": "You are a factual assistant."}, {"role": "user", "content": prompt} ], temperature=0.5, max_tokens=500 ) return response.choices[0].message.content.strip() # ====================================== # ๐Ÿ”น RAG pipeline # ====================================== def answer_question(file, query): if not file: return "โš ๏ธ Please upload a document first." if not query.strip(): return "โš ๏ธ Please enter a question." text = extract_text(file) if not text: return "โš ๏ธ Could not extract text from the file." db = build_faiss(text) retriever = db.as_retriever(search_kwargs={"k": 3}) retrieved_docs = retriever.invoke(query) context = "\n".join([doc.page_content for doc in retrieved_docs]) prompt = f""" You are a factual assistant. Use ONLY the provided context to answer. If the context doesnโ€™t contain enough info, say: "The document does not provide enough information." Context: {context} Question: {query} Answer: """ answer = generate_answer(prompt) return answer # ====================================== # ๐Ÿ”น Optional evaluation # ====================================== def evaluate_answer(answer, reference): if not reference.strip(): return "No reference answer provided." # BLEU bleu = evaluate.load("bleu") bleu_score = bleu.compute(predictions=[answer], references=[[reference]])["bleu"] # ROUGE rouge = evaluate.load("rouge") rouge_result = rouge.compute(predictions=[answer], references=[reference]) rouge1_f = rouge_result["rouge1"] rouge2_f = rouge_result["rouge2"] rougel_f = rouge_result["rougeL"] # BERTScore bertscore = evaluate.load("bertscore") bert = bertscore.compute(predictions=[answer], references=[reference], lang="en") bert_f1 = sum(bert["f1"]) / len(bert["f1"]) return f""" **Evaluation Results:** - **BLEU:** {bleu_score:.4f} - **ROUGE-1 F1:** {rouge1_f:.4f} - **ROUGE-2 F1:** {rouge2_f:.4f} - **ROUGE-L F1:** {rougel_f:.4f} - **BERTScore F1:** {bert_f1:.4f} """ # ====================================== # ๐Ÿ”น Gradio Interface # ====================================== with gr.Blocks(title="RAG Chatbot AI") as demo: gr.Markdown("# ๐Ÿ“š RAG Chatbot AI\nUpload a document and ask a question. Powered by Groq + LLaMA3-70B.") with gr.Row(): with gr.Column(scale=1): file_input = gr.File(label="Upload PDF / DOCX / CSV") question_input = gr.Textbox(label="Your Question") answer_btn = gr.Button("Get Answer") with gr.Column(scale=2): answer_output = gr.Textbox(label="Answer", lines=15) reference_input = gr.Textbox(label="Reference Answer (optional)") eval_btn = gr.Button("Evaluate Answer") eval_output = gr.Markdown() answer_btn.click(fn=answer_question, inputs=[file_input, question_input], outputs=answer_output) eval_btn.click(fn=evaluate_answer, inputs=[answer_output, reference_input], outputs=eval_output) demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)