Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -22,9 +22,6 @@ from sklearn.model_selection import train_test_split
|
|
| 22 |
emails = pd.read_csv('emails.csv')
|
| 23 |
print(emails.head())
|
| 24 |
|
| 25 |
-
# What a message looks like
|
| 26 |
-
print(emails['message'][0])
|
| 27 |
-
|
| 28 |
# Getting the content of the emails and saving to a list
|
| 29 |
content_text = []
|
| 30 |
for item in emails.message:
|
|
@@ -33,15 +30,9 @@ for item in emails.message:
|
|
| 33 |
cleaned_message = message.replace("\n","").replace("\r","").replace("> >>> > >","")
|
| 34 |
content_text.append(cleaned_message)
|
| 35 |
|
| 36 |
-
# Checking content of emails (first 5 items)
|
| 37 |
-
print(content_text[:5])
|
| 38 |
-
|
| 39 |
# Taking a sample of the dataset
|
| 40 |
train, test = train_test_split(content_text, train_size = 0.01) # Dataset is too large to complete embedding step
|
| 41 |
|
| 42 |
-
print(train[:5])
|
| 43 |
-
print(len(train))
|
| 44 |
-
|
| 45 |
# Setting up ids for ChromaDB collections
|
| 46 |
ids = []
|
| 47 |
for i in range(len(train)):
|
|
|
|
| 22 |
emails = pd.read_csv('emails.csv')
|
| 23 |
print(emails.head())
|
| 24 |
|
|
|
|
|
|
|
|
|
|
| 25 |
# Getting the content of the emails and saving to a list
|
| 26 |
content_text = []
|
| 27 |
for item in emails.message:
|
|
|
|
| 30 |
cleaned_message = message.replace("\n","").replace("\r","").replace("> >>> > >","")
|
| 31 |
content_text.append(cleaned_message)
|
| 32 |
|
|
|
|
|
|
|
|
|
|
| 33 |
# Taking a sample of the dataset
|
| 34 |
train, test = train_test_split(content_text, train_size = 0.01) # Dataset is too large to complete embedding step
|
| 35 |
|
|
|
|
|
|
|
|
|
|
| 36 |
# Setting up ids for ChromaDB collections
|
| 37 |
ids = []
|
| 38 |
for i in range(len(train)):
|