Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from sentence_transformers import SentenceTransformer | |
| import torch | |
| # 1. Load the text database | |
| with open("dogs.txt", "r", encoding="utf-8") as file: | |
| dogs_text = file.read() | |
| cleaned_text = dogs_text.strip() | |
| chunks = cleaned_text.split("\n") | |
| cleaned_chunks = [chunk.strip() for chunk in chunks if chunk.strip()] | |
| # 2. Load model and embed text chunks | |
| model = SentenceTransformer('all-MiniLM-L6-v2') | |
| chunk_embeddings = model.encode(cleaned_chunks, convert_to_tensor=True) | |
| # 3. Define retrieval function | |
| def get_top_chunks(query, top_k=3): | |
| query_embedding = model.encode(query, convert_to_tensor=True) | |
| query_embedding_normalized = query_embedding / query_embedding.norm() | |
| chunk_embeddings_normalized = chunk_embeddings / chunk_embeddings.norm(dim=1, keepdim=True) | |
| similarities = torch.matmul(chunk_embeddings_normalized, query_embedding_normalized) | |
| top_indices = torch.topk(similarities, k=top_k).indices | |
| top_chunks = [cleaned_chunks[i] for i in top_indices] | |
| return top_chunks | |
| # 4. Define chatbot response function | |
| def responses(message, history): | |
| top_chunks = get_top_chunks(message) | |
| context = "\n".join(top_chunks) | |
| # Simple template generator | |
| answer = f"Based on what I found:\n{context}\n\nHope this helps answer your question!" | |
| return answer | |
| # 5. Gradio chat interface | |
| demo = gr.ChatInterface(responses, title="Dogs RAG Chatbot") | |
| demo.launch() |