Spaces:
Sleeping
Sleeping
File size: 2,232 Bytes
90b2ec5 151af1c 90b2ec5 2159f24 151af1c 2159f24 151af1c 2159f24 90b2ec5 151af1c 2159f24 90b2ec5 151af1c 2159f24 90b2ec5 151af1c 2159f24 90b2ec5 151af1c 2159f24 90b2ec5 2159f24 90b2ec5 2159f24 90b2ec5 2159f24 289fc18 2159f24 b97fa03 289fc18 90b2ec5 2159f24 90b2ec5 2159f24 90b2ec5 151af1c 2159f24 03d870d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
import pandas as pd
from langchain_core.documents import Document
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain_community.llms import HuggingFacePipeline
from langchain_community.embeddings import HuggingFaceEmbeddings
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
import torch
import gradio as gr
# Load dataset
dataset = pd.read_csv("dataset.csv")
# Convert to LangChain Documents
documents = [
Document(
page_content=str(row["answer"]),
metadata={"question": str(row["question"])}
)
for _, row in dataset.iterrows()
]
# Setup embeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
# Load LLM
model_name = "google/flan-t5-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
pipe = pipeline("text2text-generation", model=model, tokenizer=tokenizer, max_length=512,
device=0 if torch.cuda.is_available() else -1)
llm = HuggingFacePipeline(pipeline=pipe)
# Create vector store
vector_store = FAISS.from_documents(documents, embeddings)
retriever = vector_store.as_retriever(search_kwargs={"k": 3})
# Create QA chain
qa_chain = RetrievalQA.from_chain_type(
llm=llm,
retriever=retriever,
return_source_documents=True
)
# Chatbot function
def chatbot_interface(question: str) -> str:
if not qa_chain:
return "Chatbot backend not initialized properly."
try:
response = qa_chain.invoke({"query": question})
answer = response.get("result", "No answer found.")
sources = response.get("source_documents", [])
source_texts = [doc.page_content for doc in sources]
return f"Answer: {answer}\n\nSources:\n" + "\n".join(f"- {text}" for text in source_texts)
except Exception as e:
return f"Error: {e}"
# Gradio UI
interface = gr.Interface(
fn=chatbot_interface,
inputs=gr.Textbox(label="Enter your question"),
outputs=gr.Textbox(label="Response"),
title="RAG Chatbot",
description="Ask questions about AI, ChatBots, NLP, and more.",
theme="default"
)
# Launch the interface
interface.launch() |