Spaces:
Sleeping
Sleeping
File size: 1,434 Bytes
2e19ee4 bad33a2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
import gradio as gr
from sentence_transformers import SentenceTransformer
import torch
# 1. Load the text database
with open("dogs.txt", "r", encoding="utf-8") as file:
dogs_text = file.read()
cleaned_text = dogs_text.strip()
chunks = cleaned_text.split("\n")
cleaned_chunks = [chunk.strip() for chunk in chunks if chunk.strip()]
# 2. Load model and embed text chunks
model = SentenceTransformer('all-MiniLM-L6-v2')
chunk_embeddings = model.encode(cleaned_chunks, convert_to_tensor=True)
# 3. Define retrieval function
def get_top_chunks(query, top_k=3):
query_embedding = model.encode(query, convert_to_tensor=True)
query_embedding_normalized = query_embedding / query_embedding.norm()
chunk_embeddings_normalized = chunk_embeddings / chunk_embeddings.norm(dim=1, keepdim=True)
similarities = torch.matmul(chunk_embeddings_normalized, query_embedding_normalized)
top_indices = torch.topk(similarities, k=top_k).indices
top_chunks = [cleaned_chunks[i] for i in top_indices]
return top_chunks
# 4. Define chatbot response function
def responses(message, history):
top_chunks = get_top_chunks(message)
context = "\n".join(top_chunks)
# Simple template generator
answer = f"Based on what I found:\n{context}\n\nHope this helps answer your question!"
return answer
# 5. Gradio chat interface
demo = gr.ChatInterface(responses, title="Dogs RAG Chatbot")
demo.launch() |