Spaces:
Sleeping
Sleeping
Chatbot can access the datasets (imported glob)
Browse files
app.py
CHANGED
|
@@ -3,6 +3,7 @@ import random
|
|
| 3 |
from huggingface_hub import InferenceClient
|
| 4 |
from sentence_transformers import SentenceTransformer
|
| 5 |
import torch
|
|
|
|
| 6 |
|
| 7 |
def respond(message, history):
|
| 8 |
top_results = get_top_chunks(message, chunk_embeddings, cleaned_chunks)
|
|
@@ -38,12 +39,16 @@ print("hello world")
|
|
| 38 |
# declaring chatbot so that user can interact and see their conversation history and send new messages
|
| 39 |
|
| 40 |
# ===== LOAD & PROCESS YOUR NEW CONTENT =====
|
| 41 |
-
with open("toxic_foods_for_dogs.txt", "r", encoding="utf-8") as file:
|
| 42 |
# Read the entire contents of the file and store it in a variable
|
| 43 |
-
|
| 44 |
|
| 45 |
-
|
| 46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
|
| 48 |
# ===== APPLY THE COMPLETE WORKFLOW =====
|
| 49 |
def preprocess_text(text):
|
|
@@ -69,7 +74,7 @@ def preprocess_text(text):
|
|
| 69 |
# Return the cleaned_chunks
|
| 70 |
return cleaned_chunks
|
| 71 |
|
| 72 |
-
cleaned_chunks = preprocess_text(
|
| 73 |
|
| 74 |
# Load the pre-trained embedding model that converts text to vectors
|
| 75 |
model = SentenceTransformer('all-MiniLM-L6-v2')
|
|
|
|
| 3 |
from huggingface_hub import InferenceClient
|
| 4 |
from sentence_transformers import SentenceTransformer
|
| 5 |
import torch
|
| 6 |
+
import glob
|
| 7 |
|
| 8 |
def respond(message, history):
|
| 9 |
top_results = get_top_chunks(message, chunk_embeddings, cleaned_chunks)
|
|
|
|
| 39 |
# declaring chatbot so that user can interact and see their conversation history and send new messages
|
| 40 |
|
| 41 |
# ===== LOAD & PROCESS YOUR NEW CONTENT =====
|
| 42 |
+
#with open("toxic_foods_for_dogs.txt", "r", encoding="utf-8") as file:
|
| 43 |
# Read the entire contents of the file and store it in a variable
|
| 44 |
+
# toxic_food_text = file.read()
|
| 45 |
|
| 46 |
+
all_texts = []
|
| 47 |
+
for filepath in glob.glob("data/*.txt"):
|
| 48 |
+
with open(filepath, "r", encoding="utf-8") as file:
|
| 49 |
+
all_texts.append(file.read())
|
| 50 |
+
|
| 51 |
+
combined_text = "\n".join(all_texts)
|
| 52 |
|
| 53 |
# ===== APPLY THE COMPLETE WORKFLOW =====
|
| 54 |
def preprocess_text(text):
|
|
|
|
| 74 |
# Return the cleaned_chunks
|
| 75 |
return cleaned_chunks
|
| 76 |
|
| 77 |
+
cleaned_chunks = preprocess_text(combined_text)
|
| 78 |
|
| 79 |
# Load the pre-trained embedding model that converts text to vectors
|
| 80 |
model = SentenceTransformer('all-MiniLM-L6-v2')
|