ritikaaA commited on
Commit
031abc6
·
verified ·
1 Parent(s): 99ab6cc

Chatbot can access the datasets (imported glob)

Browse files
Files changed (1) hide show
  1. app.py +10 -5
app.py CHANGED
@@ -3,6 +3,7 @@ import random
3
  from huggingface_hub import InferenceClient
4
  from sentence_transformers import SentenceTransformer
5
  import torch
 
6
 
7
  def respond(message, history):
8
  top_results = get_top_chunks(message, chunk_embeddings, cleaned_chunks)
@@ -38,12 +39,16 @@ print("hello world")
38
  # declaring chatbot so that user can interact and see their conversation history and send new messages
39
 
40
  # ===== LOAD & PROCESS YOUR NEW CONTENT =====
41
- with open("toxic_foods_for_dogs.txt", "r", encoding="utf-8") as file:
42
  # Read the entire contents of the file and store it in a variable
43
- toxic_food_text = file.read()
44
 
45
- # Print the text below
46
- print(toxic_food_text)
 
 
 
 
47
 
48
  # ===== APPLY THE COMPLETE WORKFLOW =====
49
  def preprocess_text(text):
@@ -69,7 +74,7 @@ def preprocess_text(text):
69
  # Return the cleaned_chunks
70
  return cleaned_chunks
71
 
72
- cleaned_chunks = preprocess_text(toxic_food_text)
73
 
74
  # Load the pre-trained embedding model that converts text to vectors
75
  model = SentenceTransformer('all-MiniLM-L6-v2')
 
3
  from huggingface_hub import InferenceClient
4
  from sentence_transformers import SentenceTransformer
5
  import torch
6
+ import glob
7
 
8
  def respond(message, history):
9
  top_results = get_top_chunks(message, chunk_embeddings, cleaned_chunks)
 
39
  # declaring chatbot so that user can interact and see their conversation history and send new messages
40
 
41
  # ===== LOAD & PROCESS YOUR NEW CONTENT =====
42
+ #with open("toxic_foods_for_dogs.txt", "r", encoding="utf-8") as file:
43
  # Read the entire contents of the file and store it in a variable
44
+ # toxic_food_text = file.read()
45
 
46
+ all_texts = []
47
+ for filepath in glob.glob("data/*.txt"):
48
+ with open(filepath, "r", encoding="utf-8") as file:
49
+ all_texts.append(file.read())
50
+
51
+ combined_text = "\n".join(all_texts)
52
 
53
  # ===== APPLY THE COMPLETE WORKFLOW =====
54
  def preprocess_text(text):
 
74
  # Return the cleaned_chunks
75
  return cleaned_chunks
76
 
77
+ cleaned_chunks = preprocess_text(combined_text)
78
 
79
  # Load the pre-trained embedding model that converts text to vectors
80
  model = SentenceTransformer('all-MiniLM-L6-v2')