barkbites

Sleeping

ritikaaA commited on Aug 14, 2025

Commit

031abc6

verified ·

1 Parent(s): 99ab6cc

Chatbot can access the datasets (imported glob)

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import random
 from huggingface_hub import InferenceClient
 from sentence_transformers import SentenceTransformer
 import torch
 def respond(message, history):
     top_results = get_top_chunks(message, chunk_embeddings, cleaned_chunks)
@@ -38,12 +39,16 @@ print("hello world")
 # declaring chatbot so that user can interact and see their conversation history and send new messages
 # ===== LOAD & PROCESS YOUR NEW CONTENT =====
-with open("toxic_foods_for_dogs.txt", "r", encoding="utf-8") as file:
   # Read the entire contents of the file and store it in a variable
-  toxic_food_text = file.read()
-# Print the text below
-print(toxic_food_text)
 # ===== APPLY THE COMPLETE WORKFLOW =====
 def preprocess_text(text):
@@ -69,7 +74,7 @@ def preprocess_text(text):
   # Return the cleaned_chunks
   return cleaned_chunks
-cleaned_chunks = preprocess_text(toxic_food_text)
 # Load the pre-trained embedding model that converts text to vectors
 model = SentenceTransformer('all-MiniLM-L6-v2')

 from huggingface_hub import InferenceClient
 from sentence_transformers import SentenceTransformer
 import torch
+import glob
 def respond(message, history):
     top_results = get_top_chunks(message, chunk_embeddings, cleaned_chunks)
 # declaring chatbot so that user can interact and see their conversation history and send new messages
 # ===== LOAD & PROCESS YOUR NEW CONTENT =====
+#with open("toxic_foods_for_dogs.txt", "r", encoding="utf-8") as file:
   # Read the entire contents of the file and store it in a variable
+ # toxic_food_text = file.read()
+all_texts = []
+for filepath in glob.glob("data/*.txt"):
+    with open(filepath, "r", encoding="utf-8") as file:
+        all_texts.append(file.read())
+combined_text = "\n".join(all_texts)
 # ===== APPLY THE COMPLETE WORKFLOW =====
 def preprocess_text(text):
   # Return the cleaned_chunks
   return cleaned_chunks
+cleaned_chunks = preprocess_text(combined_text)
 # Load the pre-trained embedding model that converts text to vectors
 model = SentenceTransformer('all-MiniLM-L6-v2')