# importing all the tools we need import gradio as gr from huggingface_hub import InferenceClient from sentence_transformers import SentenceTransformer import torch # setting up embedding model and connecting w/model model = SentenceTransformer('all-MiniLM-L6-v2') client = InferenceClient("Qwen/Qwen2.5-72B-Instruct") # loading knowledge base with open("tenet_knowledge_base.txt", "r", encoding="utf-8") as file: tenet_knowledge_base = file.read() # cleaning and splitting the text base cleaned_text = tenet_knowledge_base.strip() chunks = cleaned_text.split("\n") cleaned_chunks = [] # creating chunks, converting into vectors cleaned_chunks = [chunk.strip() for chunk in chunks if chunk.strip()] chunk_embeddings = model.encode(cleaned_chunks, convert_to_tensor=True) # returning top three chunks w/cosine similarity def get_top_chunks(query): query_embedding = model.encode(query, convert_to_tensor=True) similarities = torch.nn.functional.cosine_similarity( query_embedding.unsqueeze(0), chunk_embeddings ) top_indices = torch.topk(similarities, k=3).indices return [cleaned_chunks[i] for i in top_indices] # converting into a readable format for the Qwen model def format_context(chunks): return "\n".join([f"Context {i+1}: {chunk}" for i, chunk in enumerate(chunks)]) # main chat response, building system prompt, saving chat history def respond(message, history): top_chunks = get_top_chunks(message) context = format_context(top_chunks) system_prompt = f"""You are a knowledgeable and kind assistant. Use this context to answer: {context} Make sure that you clarify that information may be inaccurate. If you do not know, please state that you are unsure. Give a response in two sentences or less. Also, embed as many links as possible and make sure to hyperlink whenever you can, but you do not have to provide links unless you have them. DO NOT MAKE UP FAKE LINKS.""" messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": message} ] # using streaming to return response as it's generated response = "" for chunk in client.chat_completion(messages, max_tokens=200, temperature=0.15, top_p=0.7,stream=True): token = chunk.choices[0].delta.content if token: response += token yield response # gradio main interface gr.ChatInterface( respond, chatbot=gr.Chatbot(height=400), textbox=gr.Textbox(placeholder="Ask your questions here..."), examples = ["How can I make a difference if I’m not old enough to vote?","What’s the best way to get involved in a cause I care about?","How do I know if the information I’m sharing or supporting is true?"]).launch()