ritikaaA commited on
Commit
f300e64
·
verified ·
1 Parent(s): 2957e04

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -15
app.py CHANGED
@@ -53,34 +53,42 @@ for filepath in glob.glob("data/*.txt"):
53
 
54
  combined_text = "\n".join(all_texts)
55
 
 
 
 
 
 
56
 
57
- def preprocess_text(text):
58
- cleaned_text = text.strip()
59
- chunks = cleaned_text.split("\n")
60
- cleaned_chunks = [chunk.strip() for chunk in chunks if chunk.strip()]
61
- print(cleaned_chunks)
62
- print(len(cleaned_chunks))
63
- return cleaned_chunks
64
-
65
  #def preprocess_text(text):
 
 
 
 
 
 
 
 
 
66
  # Strip extra whitespace from the beginning and the end of the text
67
- #cleaned_text = text.strip()
68
  # Split the cleaned_text by every newline character (\n)
69
- #chunks = cleaned_text.split("\n")
70
 
71
  # Create an empty list to store cleaned chunks
72
- #cleaned_chunks = [chunk.strip() for chunk in chunks if chunk.strip()]
73
 
74
- # Write your for-in loop below to clean each chunk and add it to the cleaned_chunks list
 
 
75
 
76
  # Print cleaned_chunks
77
- #print(cleaned_chunks)
78
 
79
  # Print the length of cleaned_chunks
80
- # print(len(cleaned_chunks))
81
 
82
  # Return the cleaned_chunks
83
- #return cleaned_chunks
84
 
85
  cleaned_chunks = preprocess_text(combined_text)
86
 
 
53
 
54
  combined_text = "\n".join(all_texts)
55
 
56
+ #with open("food_brand_options.txt", "r", encoding:"utf-8") as f:
57
+ # brand_options = f.read()
58
+ #with open("foods_not_safe.txt", "r", encoding:"utf-8") as file:
59
+ # not_safe
60
+
61
 
 
 
 
 
 
 
 
 
62
  #def preprocess_text(text):
63
+ # cleaned_text = text.strip()
64
+ # chunks = cleaned_text.split("\n")
65
+ # cleaned_chunks = [chunk.strip() for chunk in chunks if chunk.strip()]
66
+ # print(cleaned_chunks)
67
+ # print(len(cleaned_chunks))
68
+ # return cleaned_chunks
69
+
70
+
71
+ def preprocess_text(text):
72
  # Strip extra whitespace from the beginning and the end of the text
73
+ cleaned_text = text.strip()
74
  # Split the cleaned_text by every newline character (\n)
75
+ chunks = cleaned_text.split("\n")
76
 
77
  # Create an empty list to store cleaned chunks
78
+ cleaned_chunks = []
79
 
80
+ for chunk in chunks:
81
+ stripped_chunk = chunk.strip()
82
+ cleaned_chunks.append(stripped_chunk)
83
 
84
  # Print cleaned_chunks
85
+ print(cleaned_chunks)
86
 
87
  # Print the length of cleaned_chunks
88
+ print(len(cleaned_chunks))
89
 
90
  # Return the cleaned_chunks
91
+ return cleaned_chunks
92
 
93
  cleaned_chunks = preprocess_text(combined_text)
94