shlokamhaisekar commited on
Commit
da19ed2
·
verified ·
1 Parent(s): ed4bb55

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -26
app.py CHANGED
@@ -1,65 +1,93 @@
1
  from huggingface_hub import InferenceClient
2
-
3
  #STEP1FROMSEMANTICSEARCH (import libraries)
4
  from sentence_transformers import SentenceTransformer
5
  import torch
6
  import gradio as gr
7
  import random
8
-
9
- client = InferenceClient("Qwen/Qwen2.5-72B-Instruct")
10
  #deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
11
-
12
  # Open the water_cycle.txt file in read mode with UTF-8 encoding - step 2 from semantic search
13
  with open("recipes.txt", "r", encoding="utf-8") as file:
14
  # Read the entire contents of the file and store it in a variable
15
  recipes_text = file.read()
16
-
17
  # Print the text below
18
  print(recipes_text)
19
-
20
  def preprocess_text(text):
21
  # Strip extra whitespace from the beginning and the end of the text
22
  cleaned_text = text.strip()
23
-
24
  # Split the cleaned_text by every newline character (\n)
25
  chunks = cleaned_text.split(".")
26
-
27
  # Create an empty list to store cleaned chunks
28
  cleaned_chunks = []
29
-
30
  # Write your for-in loop below to clean each chunk and add it to the cleaned_chunks list
31
  for chunk in chunks:
32
  clean = chunk.strip()
33
  if len(chunk)>0:
34
  cleaned_chunks.append(clean)
35
-
36
  # Print cleaned_chunks
37
  print(cleaned_chunks)
38
-
39
  # Print the length of cleaned_chunks
40
-
41
  print(len(cleaned_chunks))
42
-
43
  # Return the cleaned_chunks
44
  return cleaned_chunks
45
-
46
  # Call the preprocess_text function and store the result in a cleaned_chunks variable
47
  cleaned_chunks = preprocess_text(recipes_text) # Complete this line
48
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  def respond(message, history):
50
- messages = [{"role":"system", "content": "You are a chatbot that is very sweet and kind"}]
51
-
 
 
 
 
 
52
  if history:
53
  messages.extend(history)
54
-
55
- messages.append({"role":"user","content": message})
56
-
57
- response = client.chat_completion(messages, max_tokens = 100, temperature = 1.3, top_p = .3)
 
58
  #temperature and top_p control randomness
59
-
60
- print(response)
61
-
62
  return response['choices'][0]['message']['content'].strip()
63
-
64
  chatbot = gr.ChatInterface(respond, type="messages")
65
- chatbot.launch()
 
1
  from huggingface_hub import InferenceClient
 
2
  #STEP1FROMSEMANTICSEARCH (import libraries)
3
  from sentence_transformers import SentenceTransformer
4
  import torch
5
  import gradio as gr
6
  import random
7
+ client=InferenceClient("Qwen/Qwen2.5-72B-Instruct")
 
8
  #deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
 
9
  # Open the water_cycle.txt file in read mode with UTF-8 encoding - step 2 from semantic search
10
  with open("recipes.txt", "r", encoding="utf-8") as file:
11
  # Read the entire contents of the file and store it in a variable
12
  recipes_text = file.read()
 
13
  # Print the text below
14
  print(recipes_text)
 
15
  def preprocess_text(text):
16
  # Strip extra whitespace from the beginning and the end of the text
17
  cleaned_text = text.strip()
 
18
  # Split the cleaned_text by every newline character (\n)
19
  chunks = cleaned_text.split(".")
 
20
  # Create an empty list to store cleaned chunks
21
  cleaned_chunks = []
 
22
  # Write your for-in loop below to clean each chunk and add it to the cleaned_chunks list
23
  for chunk in chunks:
24
  clean = chunk.strip()
25
  if len(chunk)>0:
26
  cleaned_chunks.append(clean)
 
27
  # Print cleaned_chunks
28
  print(cleaned_chunks)
 
29
  # Print the length of cleaned_chunks
 
30
  print(len(cleaned_chunks))
 
31
  # Return the cleaned_chunks
32
  return cleaned_chunks
 
33
  # Call the preprocess_text function and store the result in a cleaned_chunks variable
34
  cleaned_chunks = preprocess_text(recipes_text) # Complete this line
35
+ # Load the pre-trained embedding model that converts text to vectors
36
+ model = SentenceTransformer('all-MiniLM-L6-v2')
37
+ def create_embeddings(text_chunks):
38
+ # Convert each text chunk into a vector embedding and store as a tensor
39
+ chunk_embeddings = model.encode(text_chunks, convert_to_tensor=True) # Replace ... with the text_chunks list
40
+ # Print the chunk embeddings
41
+ print(chunk_embeddings)
42
+ # Print the shape of chunk_embeddings
43
+ print(chunk_embeddings.shape)
44
+ # Return the chunk_embeddings
45
+ return chunk_embeddings
46
+ # Call the create_embeddings function and store the result in a new chunk_embeddings variable
47
+ chunk_embeddings = create_embeddings(cleaned_chunks) # Complete this line
48
+ #Step 5
49
+ # Define a function to find the most relevant text chunks for a given query, chunk_embeddings, and text_chunks
50
+ def get_top_chunks(query, chunk_embeddings, text_chunks):
51
+ # Convert the query text into a vector embedding
52
+ query_embedding = model.encode(query,convert_to_tensor=True) # Complete this line
53
+ # Normalize the query embedding to unit length for accurate similarity comparison
54
+ query_embedding_normalized = query_embedding / query_embedding.norm()
55
+ # Normalize all chunk embeddings to unit length for consistent comparison
56
+ chunk_embeddings_normalized = chunk_embeddings / chunk_embeddings.norm(dim=1, keepdim=True)
57
+ # Calculate cosine similarity between query and all chunks using matrix multiplication
58
+ similarities = torch.matmul(chunk_embeddings_normalized,query_embedding_normalized) # Complete this line
59
+ # Print the similarities
60
+ print(similarities)
61
+ # Find the indices of the 3 chunks with highest similarity scores
62
+ top_indices = torch.topk(similarities, k=1).indices
63
+ # Print the top indices
64
+ print(top_indices)
65
+ # Create an empty list to store the most relevant chunks
66
+ top_chunks = []
67
+ # Loop through the top indices and retrieve the corresponding text chunks
68
+ for i in top_indices:
69
+ top_chunks.append(i)
70
+ print(top_chunks)
71
+ # Return the list of most relevant chunks
72
+ return top_chunks
73
+ best_recipes_chunk = get_top_chunks(message, chunk_embeddings, cleaned_chunks)
74
+ print(best_recipes_chunk)
75
  def respond(message, history):
76
+ #responses = ["Yes", "No"]
77
+ #return random.choice(responses)
78
+ messages = [
79
+ {"role":"system",
80
+ "content": "You are a chatbot that is a nutrition expert and helps people with their nutritional goals. You help them plan meals."
81
+ }
82
+ ]
83
  if history:
84
  messages.extend(history)
85
+ messages.append(
86
+ {"role":"user",
87
+ "content": "message"}
88
+ )
89
+ response = client.chat_completion(messages, max_tokens=100, temperature=1.3, top_p=.2)
90
  #temperature and top_p control randomness
 
 
 
91
  return response['choices'][0]['message']['content'].strip()
 
92
  chatbot = gr.ChatInterface(respond, type="messages")
93
+ chatbot.launch()