import gradio as gr from huggingface_hub import InferenceClient pip install sentence_transformers from sentence_transformers import SentenceTransformer import torch import numpy as np with open("knowledge.txt" , "r", encoding="utf-8") as f: knowledge_base = f.read() print("Knowledge base loaded.") cleaned_text = knowledge_base.strip() chunks = cleaned_text.split("\n") cleaned_chunks = [] for chunk in chunks: stripped_chunk = chunk.strip() if stripped_chunk: cleaned_chunks.append(stripped_chunk) print(cleaned_chunks) model = SentenceTransformer('all-MiniLM-L6-v2') chunk_embeddings = model.encode(cleaned_chunks, convert_to_tensor=True) print(chunk_embeddings) def get_top_chunks(query): query_embedding = model.encode(query, convert_to_tensor=True) query_embedding_normalized = query_embedding / query_embedding.norm() chunk_embeddings_normalized = chunk_embeddings / chunk_embeddings.norm(dim=1, keepdim=True) similarities = torch.matmul(chunk_embeddings_normalized, query_embedding_normalized) print(similarities) top_indices = torch.topk(similarities, k=3).indices print(top_indices) top_chunks = [] for i in top_indices: chunk = chunks[i] top_chunks.append(chunk) return top_chunks client = InferenceClient("google/gemma-3-27b-it") def respond(message,history): messages = [{"role": "system" , "content" : "You're a supportive and helpful feminist"}] if history: messages.extend(history) messages.append({"role" : "user", "content" : message}) response = "" for message in client.chat_completion( messages, max_tokens = 150, stream=True, ): token = message.choices[0].delta.content response += token yield response print(response) chatbot = gr.ChatInterface(respond, type = "messages") chatbot.launch(debug=True)