Spaces:
Sleeping
Sleeping
File size: 6,500 Bytes
8b0167d 031abc6 8b0167d 51513ec 8b0167d 020aaf0 d179669 8b0167d 99ab6cc f92d42b 99ab6cc d61dfa5 99ab6cc 8b0167d 2eea792 8b0167d bff159e 8b0167d 031abc6 8b0167d 031abc6 8b0167d 031abc6 8b0167d f300e64 a5cf346 dd92fb1 f300e64 8b0167d f300e64 8b0167d f300e64 8b0167d f300e64 8b0167d f300e64 8b0167d f300e64 8b0167d 459f8ae 8b0167d f300e64 8b0167d 031abc6 8b0167d 49214e8 2957e04 8b0167d 16d79d0 8b0167d a813db8 0218a86 b021139 a813db8 bff159e a813db8 019a285 a813db8 bff159e a813db8 8b0167d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 |
import gradio as gr
import random
from huggingface_hub import InferenceClient
from sentence_transformers import SentenceTransformer
import torch
import glob
client = InferenceClient("deepseek-ai/DeepSeek-R1-Distill-Qwen-32B")
def respond(message, history):
print("DEBUG: respond() called with:", message)
top_results = get_top_chunks(message, chunk_embeddings, cleaned_chunks)
print(top_results)
# ✅ Format context for LLM
if top_results:
formatted_info = "\n".join(f"- {chunk}" for chunk in top_results)
system_prompt = (
f"You are a friendly chatbot that gives advice about nutrition for dogs.\n"
f"Use the following information to guide your response:\n{formatted_info}\n"
f"Respond in complete sentences and apply common sense. If the user asks about something not in the list, "
f"give a cautious answer and suggest checking with a vet."
)
else:
system_prompt = (
"You are a friendly chatbot that gives advice about what dogs can eat.\n"
"If the user asks about a food not in your database. Respond cautiously and suggest checking with a vet."
)
messages = [{"role": "system", "content": system_prompt}]
if history:
messages.extend(history)
messages.append({"role": "user", "content": message})
response = client.chat_completion(messages, max_tokens=500, temperature=0.2)
return response['choices'][0]['message']['content'].strip()
print("hello world")
#chatbot = gr.ChatInterface(respond, type="messages", title = "LLM Chatbox", theme = "gradio/soft")
# declaring chatbot so that user can interact and see their conversation history and send new messages
# ===== LOAD & PROCESS YOUR NEW CONTENT =====
#with open("toxic_foods_for_dogs.txt", "r", encoding="utf-8") as file:
# Read the entire contents of the file and store it in a variable
# toxic_food_text = file.read()
all_texts = []
for filepath in glob.glob("data/*.txt"):
with open(filepath, "r", encoding="utf-8") as file:
all_texts.append(file.read())
combined_text = "\n".join(all_texts)
#with open("food_brand_options.txt", "r", encoding:"utf-8") as f:
# brand_options = f.read()
#with open("foods_not_safe.txt", "r", encoding:"utf-8") as file:
# not_safe
#def preprocess_text(text):
# cleaned_text = text.strip()
# chunks = cleaned_text.split("\n")
# cleaned_chunks = [chunk.strip() for chunk in chunks if chunk.strip()]
# print(cleaned_chunks)
# print(len(cleaned_chunks))
# return cleaned_chunks
def preprocess_text(text):
# Strip extra whitespace from the beginning and the end of the text
cleaned_text = text.strip()
# Split the cleaned_text by every newline character (\n)
chunks = cleaned_text.split("\n")
# Create an empty list to store cleaned chunks
cleaned_chunks = []
for chunk in chunks:
stripped_chunk = chunk.strip()
cleaned_chunks.append(stripped_chunk)
# Print cleaned_chunks
print(cleaned_chunks)
# Print the length of cleaned_chunks
print(len(cleaned_chunks))
# Return the cleaned_chunks
return cleaned_chunks
cleaned_chunks = preprocess_text(combined_text)
# Load the pre-trained embedding model that converts text to vectors
model = SentenceTransformer('all-MiniLM-L6-v2')
def create_embeddings(text_chunks):
# Convert each text chunk into a vector embedding and store as a tensor
chunk_embeddings = model.encode(text_chunks, convert_to_tensor=True) # Replace ... with the text_chunks list
#replace ... with text_chunks
# Print the chunk embeddings
print(chunk_embeddings)
# Print the shape of chunk_embeddings
print(chunk_embeddings.shape)
# Return the chunk_embeddings
return chunk_embeddings
# Call the create_embeddings function and store the result in a new chunk_embeddings variable
chunk_embeddings = create_embeddings(cleaned_chunks)
# Define a function to find the most relevant text chunks for a given query, chunk_embeddings, and text_chunks
def get_top_chunks(query, chunk_embeddings, text_chunks):
# Convert the query text into a vector embedding
query_embedding = model.encode(query, convert_to_tensor=True) # Complete this line
# Normalize the query embedding to unit length for accurate similarity comparison. Normalize = bring to a length of 1
query_embedding_normalized = query_embedding / query_embedding.norm()
# Normalize all chunk embeddings to unit length for consistent comparison
# chunk_embeddings_normalized = chunk_embeddings / chunk_embeddings.norm(dim=1, keepdim=True)
if chunk_embeddings.ndim == 1:
chunk_embeddings_normalized = chunk_embeddings / chunk_embeddings.norm()
else:
chunk_embeddings_normalized = chunk_embeddings / chunk_embeddings.norm(dim=1, keepdim=True)
# Calculate cosine similarity between query and all chunks using matrix multiplication
similarities = torch.matmul(chunk_embeddings_normalized, query_embedding_normalized)
# Print the similarities
print(similarities)
# Find the indices of the 3 chunks with highest similarity scores
top_indices = torch.topk(similarities, k=1).indices
# Print the top indices
print(top_indices)
# Create an empty list to store the most relevant chunks
top_chunks = []
# Loop through the top indices and retrieve the corresponding text chunks
for i in top_indices:
relevant_info = cleaned_chunks[i]
top_chunks.append(relevant_info)
# Return the list of most relevant chunks
return top_chunks
# theme
custom_theme = gr.themes.Ocean(
primary_hue="yellow",
secondary_hue="yellow",
neutral_hue="rose",
spacing_size="lg",
radius_size="lg",
text_size="lg",
font=[gr.themes.GoogleFont("Intel One Mono"), "serif"],
)
about_text = "## About this bot Our bot will tell how to care for your dog's nutrition. Use the chat box on the right to try it out!"
with gr.Blocks(theme=custom_theme) as chatbot:
with gr.Row(scale=3):
with gr.Column(scale=1):
gr.ChatInterface(respond, type="messages", title = "LLM Chatbox", theme = "gradio/soft")
with gr.Row():
level = gr.Dropdown(choices =
["Small", "Medium", "Large"], label="Dog Size", info="What is your dog's size?"
)
#with gr.Column(scale=1):
#gr.Markdown(about_text)
#with gr.Column(scale=2):
#gr.ChatInterface(echo)
chatbot.launch() |