Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -4,7 +4,6 @@ from huggingface_hub import InferenceClient
|
|
| 4 |
# SEMANTIC SEARCH STEP 1
|
| 5 |
from sentence_transformers import SentenceTransformer
|
| 6 |
import torch
|
| 7 |
-
#import lines go at the top!
|
| 8 |
|
| 9 |
# SEMANTIC SEARCH STEP 2 --> EDIT WITH YOUR OWN KNOWLEDGE BASE WHEN READY
|
| 10 |
with open("water_cycle.txt", "r", encoding="utf-8") as file:
|
|
@@ -27,13 +26,8 @@ def preprocess_text(text):
|
|
| 27 |
for chunk in chunks:
|
| 28 |
stripped_chunk = chunk.strip()
|
| 29 |
cleaned_chunks.append(stripped_chunk)
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
# Print cleaned_chunks
|
| 34 |
print(cleaned_chunks)
|
| 35 |
|
| 36 |
-
# Print the length of cleaned_chunks
|
| 37 |
print(len(cleaned_chunks))
|
| 38 |
|
| 39 |
# Return the cleaned_chunks
|
|
@@ -80,14 +74,10 @@ def get_top_chunks(query, chunk_embeddings, text_chunks):
|
|
| 80 |
# Print the similarities
|
| 81 |
print(similarities)
|
| 82 |
|
| 83 |
-
|
| 84 |
# Find the indices of the 3 chunks with highest similarity scores
|
| 85 |
top_indices = torch.topk(similarities, k=3).indices
|
| 86 |
-
|
| 87 |
-
# Print the top indices
|
| 88 |
print(top_indices)
|
| 89 |
|
| 90 |
-
|
| 91 |
# Create an empty list to store the most relevant chunks
|
| 92 |
top_chunks = []
|
| 93 |
|
|
@@ -99,7 +89,6 @@ def get_top_chunks(query, chunk_embeddings, text_chunks):
|
|
| 99 |
# Return the list of most relevant chunks
|
| 100 |
return top_chunks
|
| 101 |
|
| 102 |
-
|
| 103 |
# SEMANTIC SEARCH STEP 6
|
| 104 |
|
| 105 |
# Call the get_top_chunks function with the original query
|
|
@@ -107,13 +96,13 @@ top_results = get_top_chunks('Is water good?',chunk_embeddings, cleaned_chunks)
|
|
| 107 |
|
| 108 |
print(top_results)# Print the top results
|
| 109 |
|
| 110 |
-
|
| 111 |
client = InferenceClient("microsoft/phi-4")
|
| 112 |
# name of llm chatbot accessed ^^ or can use ' microsoft/phi-4 that's connected to the microsoft phi gen model
|
| 113 |
|
| 114 |
def respond(message,history):
|
| 115 |
|
| 116 |
-
info = get_top_chunks(message, chunk_embeddings,
|
| 117 |
messages = [{'role': 'system','content':f'You are a friendly chatbot using {info} to answer questions.'}]
|
| 118 |
#use string interporlation with variable info
|
| 119 |
|
|
@@ -127,14 +116,6 @@ def respond(message,history):
|
|
| 127 |
|
| 128 |
return response['choices'][0]['message']['content'].strip()
|
| 129 |
|
| 130 |
-
#def yes_or_no(message,history):
|
| 131 |
-
# return random.choice(['Yes','No'])
|
| 132 |
-
|
| 133 |
-
#def echo(message, history):
|
| 134 |
-
#always need two inputs
|
| 135 |
-
# return message
|
| 136 |
-
|
| 137 |
-
#print("Hello, World")
|
| 138 |
|
| 139 |
chatbot = gr.ChatInterface(respond, type='messages')
|
| 140 |
#defining my chatbot so user can interact, see their conversation and send new messages
|
|
|
|
| 4 |
# SEMANTIC SEARCH STEP 1
|
| 5 |
from sentence_transformers import SentenceTransformer
|
| 6 |
import torch
|
|
|
|
| 7 |
|
| 8 |
# SEMANTIC SEARCH STEP 2 --> EDIT WITH YOUR OWN KNOWLEDGE BASE WHEN READY
|
| 9 |
with open("water_cycle.txt", "r", encoding="utf-8") as file:
|
|
|
|
| 26 |
for chunk in chunks:
|
| 27 |
stripped_chunk = chunk.strip()
|
| 28 |
cleaned_chunks.append(stripped_chunk)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
print(cleaned_chunks)
|
| 30 |
|
|
|
|
| 31 |
print(len(cleaned_chunks))
|
| 32 |
|
| 33 |
# Return the cleaned_chunks
|
|
|
|
| 74 |
# Print the similarities
|
| 75 |
print(similarities)
|
| 76 |
|
|
|
|
| 77 |
# Find the indices of the 3 chunks with highest similarity scores
|
| 78 |
top_indices = torch.topk(similarities, k=3).indices
|
|
|
|
|
|
|
| 79 |
print(top_indices)
|
| 80 |
|
|
|
|
| 81 |
# Create an empty list to store the most relevant chunks
|
| 82 |
top_chunks = []
|
| 83 |
|
|
|
|
| 89 |
# Return the list of most relevant chunks
|
| 90 |
return top_chunks
|
| 91 |
|
|
|
|
| 92 |
# SEMANTIC SEARCH STEP 6
|
| 93 |
|
| 94 |
# Call the get_top_chunks function with the original query
|
|
|
|
| 96 |
|
| 97 |
print(top_results)# Print the top results
|
| 98 |
|
| 99 |
+
#the og code from gen ai lesson
|
| 100 |
client = InferenceClient("microsoft/phi-4")
|
| 101 |
# name of llm chatbot accessed ^^ or can use ' microsoft/phi-4 that's connected to the microsoft phi gen model
|
| 102 |
|
| 103 |
def respond(message,history):
|
| 104 |
|
| 105 |
+
info = get_top_chunks(message, chunk_embeddings, cleaned_chunks)
|
| 106 |
messages = [{'role': 'system','content':f'You are a friendly chatbot using {info} to answer questions.'}]
|
| 107 |
#use string interporlation with variable info
|
| 108 |
|
|
|
|
| 116 |
|
| 117 |
return response['choices'][0]['message']['content'].strip()
|
| 118 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
|
| 120 |
chatbot = gr.ChatInterface(respond, type='messages')
|
| 121 |
#defining my chatbot so user can interact, see their conversation and send new messages
|