shlokamhaisekar commited on
Commit
b7d653b
·
verified ·
1 Parent(s): 7c18318

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +92 -63
app.py CHANGED
@@ -1,93 +1,122 @@
1
  from huggingface_hub import InferenceClient
 
2
  #STEP1FROMSEMANTICSEARCH (import libraries)
3
  from sentence_transformers import SentenceTransformer
4
  import torch
5
  import gradio as gr
6
  import random
 
7
  client=InferenceClient("Qwen/Qwen2.5-72B-Instruct")
8
  #deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
 
9
  # Open the water_cycle.txt file in read mode with UTF-8 encoding - step 2 from semantic search
10
  with open("recipes.txt", "r", encoding="utf-8") as file:
11
- # Read the entire contents of the file and store it in a variable
12
- recipes_text = file.read()
 
13
  # Print the text below
14
  print(recipes_text)
 
 
15
  def preprocess_text(text):
16
- # Strip extra whitespace from the beginning and the end of the text
17
- cleaned_text = text.strip()
18
- # Split the cleaned_text by every newline character (\n)
19
- chunks = cleaned_text.split(".")
20
- # Create an empty list to store cleaned chunks
21
- cleaned_chunks = []
22
- # Write your for-in loop below to clean each chunk and add it to the cleaned_chunks list
23
- for chunk in chunks:
24
- clean = chunk.strip()
25
- if len(chunk)>0:
26
- cleaned_chunks.append(clean)
27
- # Print cleaned_chunks
28
- print(cleaned_chunks)
29
- # Print the length of cleaned_chunks
30
- print(len(cleaned_chunks))
31
- # Return the cleaned_chunks
32
- return cleaned_chunks
 
 
 
 
 
 
 
 
33
  # Call the preprocess_text function and store the result in a cleaned_chunks variable
34
  cleaned_chunks = preprocess_text(recipes_text) # Complete this line
 
 
35
  # Load the pre-trained embedding model that converts text to vectors
36
  model = SentenceTransformer('all-MiniLM-L6-v2')
 
37
  def create_embeddings(text_chunks):
38
- # Convert each text chunk into a vector embedding and store as a tensor
39
- chunk_embeddings = model.encode(text_chunks, convert_to_tensor=True) # Replace ... with the text_chunks list
40
- # Print the chunk embeddings
41
- print(chunk_embeddings)
42
- # Print the shape of chunk_embeddings
43
- print(chunk_embeddings.shape)
44
- # Return the chunk_embeddings
45
- return chunk_embeddings
 
 
 
 
46
  # Call the create_embeddings function and store the result in a new chunk_embeddings variable
47
  chunk_embeddings = create_embeddings(cleaned_chunks) # Complete this line
 
48
  #Step 5
49
  # Define a function to find the most relevant text chunks for a given query, chunk_embeddings, and text_chunks
50
  def get_top_chunks(query, chunk_embeddings, text_chunks):
51
- # Convert the query text into a vector embedding
52
- query_embedding = model.encode(query,convert_to_tensor=True) # Complete this line
53
- # Normalize the query embedding to unit length for accurate similarity comparison
54
- query_embedding_normalized = query_embedding / query_embedding.norm()
55
- # Normalize all chunk embeddings to unit length for consistent comparison
56
- chunk_embeddings_normalized = chunk_embeddings / chunk_embeddings.norm(dim=1, keepdim=True)
57
- # Calculate cosine similarity between query and all chunks using matrix multiplication
58
- similarities = torch.matmul(chunk_embeddings_normalized,query_embedding_normalized) # Complete this line
59
- # Print the similarities
60
- print(similarities)
61
- # Find the indices of the 3 chunks with highest similarity scores
62
- top_indices = torch.topk(similarities, k=1).indices
63
- # Print the top indices
64
- print(top_indices)
65
- # Create an empty list to store the most relevant chunks
66
- top_chunks = []
67
- # Loop through the top indices and retrieve the corresponding text chunks
68
- for i in top_indices:
69
- top_chunks.append(i)
70
- print(top_chunks)
71
- # Return the list of most relevant chunks
72
- return top_chunks
73
- best_recipes_chunk = get_top_chunks(message, chunk_embeddings, cleaned_chunks)
74
- print(best_recipes_chunk)
 
 
 
 
 
 
 
 
75
  def respond(message, history):
76
- #responses = ["Yes", "No"]
77
- #return random.choice(responses)
78
- messages = [
79
- {"role":"system",
80
- "content": "You are a chatbot that is a nutrition expert and helps people with their nutritional goals. You help them plan meals."
81
- }
82
- ]
83
  if history:
84
  messages.extend(history)
85
- messages.append(
86
- {"role":"user",
87
- "content": "message"}
88
- )
89
- response = client.chat_completion(messages, max_tokens=100, temperature=1.3, top_p=.2)
90
  #temperature and top_p control randomness
91
  return response['choices'][0]['message']['content'].strip()
 
92
  chatbot = gr.ChatInterface(respond, type="messages")
93
  chatbot.launch()
 
1
  from huggingface_hub import InferenceClient
2
+
3
  #STEP1FROMSEMANTICSEARCH (import libraries)
4
  from sentence_transformers import SentenceTransformer
5
  import torch
6
  import gradio as gr
7
  import random
8
+
9
  client=InferenceClient("Qwen/Qwen2.5-72B-Instruct")
10
  #deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
11
+
12
  # Open the water_cycle.txt file in read mode with UTF-8 encoding - step 2 from semantic search
13
  with open("recipes.txt", "r", encoding="utf-8") as file:
14
+ # Read the entire contents of the file and store it in a variable
15
+ recipes_text = file.read()
16
+
17
  # Print the text below
18
  print(recipes_text)
19
+
20
+ #Step 3
21
  def preprocess_text(text):
22
+ # Strip extra whitespace from the beginning and the end of the text
23
+ cleaned_text = text.strip()
24
+
25
+ # Split the cleaned_text by every newline character (\n)
26
+ chunks = cleaned_text.split(".")
27
+
28
+ # Create an empty list to store cleaned chunks
29
+ cleaned_chunks = []
30
+
31
+ # Write your for-in loop below to clean each chunk and add it to the cleaned_chunks list
32
+ for chunk in chunks:
33
+ clean = chunk.strip()
34
+
35
+ if len(chunk)>0:
36
+ cleaned_chunks.append(clean)
37
+
38
+ # Print cleaned_chunks
39
+ print(cleaned_chunks)
40
+
41
+ # Print the length of cleaned_chunks
42
+ print(len(cleaned_chunks))
43
+
44
+ # Return the cleaned_chunks
45
+ return cleaned_chunks
46
+
47
  # Call the preprocess_text function and store the result in a cleaned_chunks variable
48
  cleaned_chunks = preprocess_text(recipes_text) # Complete this line
49
+
50
+ #Step 4
51
  # Load the pre-trained embedding model that converts text to vectors
52
  model = SentenceTransformer('all-MiniLM-L6-v2')
53
+
54
  def create_embeddings(text_chunks):
55
+ # Convert each text chunk into a vector embedding and store as a tensor
56
+ chunk_embeddings = model.encode(text_chunks, convert_to_tensor=True) # Replace ... with the text_chunks list
57
+
58
+ # Print the chunk embeddings
59
+ print(chunk_embeddings)
60
+
61
+ # Print the shape of chunk_embeddings
62
+ print(chunk_embeddings.shape)
63
+
64
+ # Return the chunk_embeddings
65
+ return chunk_embeddings
66
+
67
  # Call the create_embeddings function and store the result in a new chunk_embeddings variable
68
  chunk_embeddings = create_embeddings(cleaned_chunks) # Complete this line
69
+
70
  #Step 5
71
  # Define a function to find the most relevant text chunks for a given query, chunk_embeddings, and text_chunks
72
  def get_top_chunks(query, chunk_embeddings, text_chunks):
73
+ # Convert the query text into a vector embedding
74
+ query_embedding = model.encode(query,convert_to_tensor=True) # Complete this line
75
+
76
+ # Normalize the query embedding to unit length for accurate similarity comparison
77
+ query_embedding_normalized = query_embedding / query_embedding.norm()
78
+
79
+ # Normalize all chunk embeddings to unit length for consistent comparison
80
+ chunk_embeddings_normalized = chunk_embeddings / chunk_embeddings.norm(dim=1, keepdim=True)
81
+
82
+ # Calculate cosine similarity between query and all chunks using matrix multiplication
83
+ similarities = torch.matmul(chunk_embeddings_normalized,query_embedding_normalized) # Complete this line
84
+
85
+ # Print the similarities
86
+ print(similarities)
87
+
88
+ # Find the indices of the 3 chunks with highest similarity scores
89
+ top_indices = torch.topk(similarities, k=1).indices
90
+
91
+ # Print the top indices
92
+ print(top_indices)
93
+
94
+ # Create an empty list to store the most relevant chunks
95
+ top_chunks = []
96
+
97
+ # Loop through the top indices and retrieve the corresponding text chunks
98
+ for i in top_indices:
99
+ top_chunks.append(i)
100
+ print(top_chunks)
101
+
102
+ # Return the list of most relevant chunks
103
+ return top_chunks
104
+
105
  def respond(message, history):
106
+ best_recipes_chunk = get_top_chunks(message, chunk_embeddings, cleaned_chunks)
107
+ print(best_recipes_chunk)
108
+
109
+ messages = [{"role":"system","content": "You are a chatbot that is a nutrition expert and helps people with their nutritional goals. You help them plan meals based on" + best_recipes_chunk + "."}]
110
+
 
 
111
  if history:
112
  messages.extend(history)
113
+
114
+ messages.append({"role":"user","content": "message"})
115
+
116
+ response = client.chat_completion(messages, max_tokens = 100, temperature = 1.3, top_p = 0.3)
117
+
118
  #temperature and top_p control randomness
119
  return response['choices'][0]['message']['content'].strip()
120
+
121
  chatbot = gr.ChatInterface(respond, type="messages")
122
  chatbot.launch()