import gradio as gr import random from huggingface_hub import InferenceClient from sentence_transformers import SentenceTransformer import torch client = InferenceClient("HuggingFaceH4/zephyr-7b-beta", provider='hf-inference') #loading and processing knowledge base with open("bookbans.txt", "r", encoding="utf-8") as file: book_bans_text = file.read() #cleaning and chunking text cleaned_text = book_bans_text.strip() chunks = cleaned_text.split("\n") cleaned_chunks = [] for chunk in chunks: stripped_chunk = chunk.strip() if stripped_chunk: cleaned_chunks.append(stripped_chunk) #importing model for embeddings model = SentenceTransformer('all-MiniLM-L6-v2') chunk_embeddings = model.encode(cleaned_chunks, convert_to_tensor=True) #function to get top chunks that are most similar to query by calculating similarity scores based off of embeddings def get_top_chunk(message): query_embedding = model.encode(message, convert_to_tensor=True) query_embedding_normalized = query_embedding / query_embedding.norm() chunk_embeddings_normalized = chunk_embeddings / chunk_embeddings.norm(dim=1, keepdim=True) similarities = torch.matmul(chunk_embeddings_normalized, query_embedding_normalized) top_indices = torch.topk(similarities, k=1).indices top_chunks = [] for i in top_indices: chunk = chunks[i] top_chunks.append(chunk) return top_chunks def respond(message, history): system_message = "You are a knowledgable and friendly chatbot that gives good information." context = get_top_chunk(message) messages = [{"role": "system", "content": system_message}] if history: messages.extend(history) user_context = f"{message}\nInformation: {context}" messages.append({"role": "user", "content": user_context}) response = "" for message in client.chat_completion( messages, max_tokens=300, temperature=1.3, top_p=0.4, stream=True ): token = message.choices[0].delta.content response += token yield response chatbot = gr.ChatInterface(respond, type='messages', title= "Ask me about AI!",description="An AI assistant to keep you updated on recent book banning news!",examples=["What are the most common genres of book bans?", "Where in the US are the most book banning actions?", "How many books were banned in 2024?"], theme='shivi/calm_seafoam') chatbot.launch()