Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -53,34 +53,42 @@ for filepath in glob.glob("data/*.txt"):
|
|
| 53 |
|
| 54 |
combined_text = "\n".join(all_texts)
|
| 55 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
-
def preprocess_text(text):
|
| 58 |
-
cleaned_text = text.strip()
|
| 59 |
-
chunks = cleaned_text.split("\n")
|
| 60 |
-
cleaned_chunks = [chunk.strip() for chunk in chunks if chunk.strip()]
|
| 61 |
-
print(cleaned_chunks)
|
| 62 |
-
print(len(cleaned_chunks))
|
| 63 |
-
return cleaned_chunks
|
| 64 |
-
|
| 65 |
#def preprocess_text(text):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
# Strip extra whitespace from the beginning and the end of the text
|
| 67 |
-
|
| 68 |
# Split the cleaned_text by every newline character (\n)
|
| 69 |
-
|
| 70 |
|
| 71 |
# Create an empty list to store cleaned chunks
|
| 72 |
-
|
| 73 |
|
| 74 |
-
|
|
|
|
|
|
|
| 75 |
|
| 76 |
# Print cleaned_chunks
|
| 77 |
-
|
| 78 |
|
| 79 |
# Print the length of cleaned_chunks
|
| 80 |
-
|
| 81 |
|
| 82 |
# Return the cleaned_chunks
|
| 83 |
-
|
| 84 |
|
| 85 |
cleaned_chunks = preprocess_text(combined_text)
|
| 86 |
|
|
|
|
| 53 |
|
| 54 |
combined_text = "\n".join(all_texts)
|
| 55 |
|
| 56 |
+
#with open("food_brand_options.txt", "r", encoding:"utf-8") as f:
|
| 57 |
+
# brand_options = f.read()
|
| 58 |
+
#with open("foods_not_safe.txt", "r", encoding:"utf-8") as file:
|
| 59 |
+
# not_safe
|
| 60 |
+
|
| 61 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
#def preprocess_text(text):
|
| 63 |
+
# cleaned_text = text.strip()
|
| 64 |
+
# chunks = cleaned_text.split("\n")
|
| 65 |
+
# cleaned_chunks = [chunk.strip() for chunk in chunks if chunk.strip()]
|
| 66 |
+
# print(cleaned_chunks)
|
| 67 |
+
# print(len(cleaned_chunks))
|
| 68 |
+
# return cleaned_chunks
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
def preprocess_text(text):
|
| 72 |
# Strip extra whitespace from the beginning and the end of the text
|
| 73 |
+
cleaned_text = text.strip()
|
| 74 |
# Split the cleaned_text by every newline character (\n)
|
| 75 |
+
chunks = cleaned_text.split("\n")
|
| 76 |
|
| 77 |
# Create an empty list to store cleaned chunks
|
| 78 |
+
cleaned_chunks = []
|
| 79 |
|
| 80 |
+
for chunk in chunks:
|
| 81 |
+
stripped_chunk = chunk.strip()
|
| 82 |
+
cleaned_chunks.append(stripped_chunk)
|
| 83 |
|
| 84 |
# Print cleaned_chunks
|
| 85 |
+
print(cleaned_chunks)
|
| 86 |
|
| 87 |
# Print the length of cleaned_chunks
|
| 88 |
+
print(len(cleaned_chunks))
|
| 89 |
|
| 90 |
# Return the cleaned_chunks
|
| 91 |
+
return cleaned_chunks
|
| 92 |
|
| 93 |
cleaned_chunks = preprocess_text(combined_text)
|
| 94 |
|