Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| import pypdf | |
| import docx2txt | |
| from typing import List | |
| from langchain_community.vectorstores import FAISS | |
| from langchain_community.embeddings import HuggingFaceEmbeddings | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from langchain_core.documents import Document | |
| import evaluate | |
| import os | |
| # Optional: Groq API integration | |
| from groq import Groq | |
| # ====================================== | |
| # 🔹 Extract text from uploaded file | |
| # ====================================== | |
| def extract_text(file): | |
| if not file: | |
| return "" | |
| filename = file.name | |
| if filename.endswith(".pdf"): | |
| reader = pypdf.PdfReader(file.name) | |
| return "\n".join([page.extract_text() for page in reader.pages if page.extract_text()]) | |
| elif filename.endswith(".docx"): | |
| return docx2txt.process(file.name) | |
| elif filename.endswith(".csv"): | |
| df = pd.read_csv(file.name) | |
| return df.to_string(index=False) | |
| else: | |
| return "" | |
| # ====================================== | |
| # 🔹 Build FAISS index | |
| # ====================================== | |
| def build_faiss(text): | |
| splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) | |
| docs = [Document(page_content=chunk) for chunk in splitter.split_text(text)] | |
| embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
| db = FAISS.from_documents(docs, embeddings) | |
| return db | |
| # ====================================== | |
| # 🔹 Generate answer using Groq API | |
| # ====================================== | |
| def generate_answer(prompt): | |
| api_key = os.getenv("GROQ_API_KEY") | |
| if not api_key: | |
| return "❌ GROQ_API_KEY not set. Add it in Space Settings → Variables and secrets." | |
| client = Groq(api_key=api_key) | |
| response = client.chat.completions.create( | |
| model="llama-3.3-70b-versatile", | |
| messages=[ | |
| {"role": "system", "content": "You are a factual assistant."}, | |
| {"role": "user", "content": prompt} | |
| ], | |
| temperature=0.5, | |
| max_tokens=500 | |
| ) | |
| return response.choices[0].message.content.strip() | |
| # ====================================== | |
| # 🔹 RAG pipeline | |
| # ====================================== | |
| def answer_question(file, query): | |
| if not file: | |
| return "⚠️ Please upload a document first." | |
| if not query.strip(): | |
| return "⚠️ Please enter a question." | |
| text = extract_text(file) | |
| if not text: | |
| return "⚠️ Could not extract text from the file." | |
| db = build_faiss(text) | |
| retriever = db.as_retriever(search_kwargs={"k": 3}) | |
| retrieved_docs = retriever.invoke(query) | |
| context = "\n".join([doc.page_content for doc in retrieved_docs]) | |
| prompt = f""" | |
| You are a factual assistant. Use ONLY the provided context to answer. | |
| If the context doesn’t contain enough info, say: | |
| "The document does not provide enough information." | |
| Context: | |
| {context} | |
| Question: {query} | |
| Answer: | |
| """ | |
| answer = generate_answer(prompt) | |
| return answer | |
| # ====================================== | |
| # 🔹 Optional evaluation | |
| # ====================================== | |
| def evaluate_answer(answer, reference): | |
| if not reference.strip(): | |
| return "No reference answer provided." | |
| # BLEU | |
| bleu = evaluate.load("bleu") | |
| bleu_score = bleu.compute(predictions=[answer], references=[[reference]])["bleu"] | |
| # ROUGE | |
| rouge = evaluate.load("rouge") | |
| rouge_result = rouge.compute(predictions=[answer], references=[reference]) | |
| rouge1_f = rouge_result["rouge1"] | |
| rouge2_f = rouge_result["rouge2"] | |
| rougel_f = rouge_result["rougeL"] | |
| # BERTScore | |
| bertscore = evaluate.load("bertscore") | |
| bert = bertscore.compute(predictions=[answer], references=[reference], lang="en") | |
| bert_f1 = sum(bert["f1"]) / len(bert["f1"]) | |
| return f""" | |
| **Evaluation Results:** | |
| - **BLEU:** {bleu_score:.4f} | |
| - **ROUGE-1 F1:** {rouge1_f:.4f} | |
| - **ROUGE-2 F1:** {rouge2_f:.4f} | |
| - **ROUGE-L F1:** {rougel_f:.4f} | |
| - **BERTScore F1:** {bert_f1:.4f} | |
| """ | |
| # ====================================== | |
| # 🔹 Gradio Interface | |
| # ====================================== | |
| with gr.Blocks(title="RAG Chatbot AI") as demo: | |
| gr.Markdown("# 📚 RAG Chatbot AI\nUpload a document and ask a question. Powered by Groq + LLaMA3-70B.") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| file_input = gr.File(label="Upload PDF / DOCX / CSV") | |
| question_input = gr.Textbox(label="Your Question") | |
| answer_btn = gr.Button("Get Answer") | |
| with gr.Column(scale=2): | |
| answer_output = gr.Textbox(label="Answer", lines=15) | |
| reference_input = gr.Textbox(label="Reference Answer (optional)") | |
| eval_btn = gr.Button("Evaluate Answer") | |
| eval_output = gr.Markdown() | |
| answer_btn.click(fn=answer_question, inputs=[file_input, question_input], outputs=answer_output) | |
| eval_btn.click(fn=evaluate_answer, inputs=[answer_output, reference_input], outputs=eval_output) | |
| demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True) |