Spaces:
Runtime error
Runtime error
updated code
Browse files- .ipynb_checkpoints/utils-checkpoint.py +7 -5
- utils.py +7 -5
.ipynb_checkpoints/utils-checkpoint.py
CHANGED
|
@@ -57,11 +57,12 @@ def get_multiple_predictions(csv):
|
|
| 57 |
df = pd.read_csv(csv)
|
| 58 |
df.columns = ['sequence']
|
| 59 |
|
| 60 |
-
df['
|
| 61 |
-
df['
|
|
|
|
| 62 |
|
| 63 |
# Remove OOV words
|
| 64 |
-
df['sequence_clean'] = df['
|
| 65 |
|
| 66 |
# Remove rows with blank string
|
| 67 |
invalid = df[(pd.isna(df['sequence_clean'])) | (df['sequence_clean'] == '')]
|
|
@@ -79,16 +80,17 @@ def get_multiple_predictions(csv):
|
|
| 79 |
|
| 80 |
# Join back to original sequence
|
| 81 |
final_results = df.join(pred_results)
|
| 82 |
-
final_results.drop(columns=['sequence_clean'], inplace=True)
|
| 83 |
final_results['others'] = final_results[labels].max(axis=1)
|
| 84 |
final_results['others'] = final_results['others'].apply(lambda x: 1 if x == 0 else 0)
|
| 85 |
|
| 86 |
# Get sentiment labels
|
| 87 |
-
final_results['sentiment'] = final_results['
|
| 88 |
candidate_labels=['positive', 'negative'],
|
| 89 |
hypothesis_template='The sentiment of this is {}'))
|
| 90 |
)
|
| 91 |
|
|
|
|
|
|
|
| 92 |
# Append invalid rows
|
| 93 |
if len(invalid) == 0:
|
| 94 |
return final_results.to_csv(index=False).encode('utf-8')
|
|
|
|
| 57 |
df = pd.read_csv(csv)
|
| 58 |
df.columns = ['sequence']
|
| 59 |
|
| 60 |
+
df['sequence_clean'] = df['sequence'].str.lower() #lower case
|
| 61 |
+
df['sequence_clean'] = df['sequence_clean'].str.strip()
|
| 62 |
+
df['sequence_clean'] = df['sequence_clean'].str.replace('[^0-9a-zA-Z\s]','') #remove special char, punctuation
|
| 63 |
|
| 64 |
# Remove OOV words
|
| 65 |
+
df['sequence_clean'] = df['sequence_clean'].apply(lambda x: ' '.join([i for i in x.split() if i in w2v_vocab]))
|
| 66 |
|
| 67 |
# Remove rows with blank string
|
| 68 |
invalid = df[(pd.isna(df['sequence_clean'])) | (df['sequence_clean'] == '')]
|
|
|
|
| 80 |
|
| 81 |
# Join back to original sequence
|
| 82 |
final_results = df.join(pred_results)
|
|
|
|
| 83 |
final_results['others'] = final_results[labels].max(axis=1)
|
| 84 |
final_results['others'] = final_results['others'].apply(lambda x: 1 if x == 0 else 0)
|
| 85 |
|
| 86 |
# Get sentiment labels
|
| 87 |
+
final_results['sentiment'] = final_results['sequence_clean'].apply(lambda x: get_sentiment_label_facebook(classifier(x,
|
| 88 |
candidate_labels=['positive', 'negative'],
|
| 89 |
hypothesis_template='The sentiment of this is {}'))
|
| 90 |
)
|
| 91 |
|
| 92 |
+
final_results.drop(columns=['sequence_clean'], inplace=True)
|
| 93 |
+
|
| 94 |
# Append invalid rows
|
| 95 |
if len(invalid) == 0:
|
| 96 |
return final_results.to_csv(index=False).encode('utf-8')
|
utils.py
CHANGED
|
@@ -57,11 +57,12 @@ def get_multiple_predictions(csv):
|
|
| 57 |
df = pd.read_csv(csv)
|
| 58 |
df.columns = ['sequence']
|
| 59 |
|
| 60 |
-
df['
|
| 61 |
-
df['
|
|
|
|
| 62 |
|
| 63 |
# Remove OOV words
|
| 64 |
-
df['sequence_clean'] = df['
|
| 65 |
|
| 66 |
# Remove rows with blank string
|
| 67 |
invalid = df[(pd.isna(df['sequence_clean'])) | (df['sequence_clean'] == '')]
|
|
@@ -79,16 +80,17 @@ def get_multiple_predictions(csv):
|
|
| 79 |
|
| 80 |
# Join back to original sequence
|
| 81 |
final_results = df.join(pred_results)
|
| 82 |
-
final_results.drop(columns=['sequence_clean'], inplace=True)
|
| 83 |
final_results['others'] = final_results[labels].max(axis=1)
|
| 84 |
final_results['others'] = final_results['others'].apply(lambda x: 1 if x == 0 else 0)
|
| 85 |
|
| 86 |
# Get sentiment labels
|
| 87 |
-
final_results['sentiment'] = final_results['
|
| 88 |
candidate_labels=['positive', 'negative'],
|
| 89 |
hypothesis_template='The sentiment of this is {}'))
|
| 90 |
)
|
| 91 |
|
|
|
|
|
|
|
| 92 |
# Append invalid rows
|
| 93 |
if len(invalid) == 0:
|
| 94 |
return final_results.to_csv(index=False).encode('utf-8')
|
|
|
|
| 57 |
df = pd.read_csv(csv)
|
| 58 |
df.columns = ['sequence']
|
| 59 |
|
| 60 |
+
df['sequence_clean'] = df['sequence'].str.lower() #lower case
|
| 61 |
+
df['sequence_clean'] = df['sequence_clean'].str.strip()
|
| 62 |
+
df['sequence_clean'] = df['sequence_clean'].str.replace('[^0-9a-zA-Z\s]','') #remove special char, punctuation
|
| 63 |
|
| 64 |
# Remove OOV words
|
| 65 |
+
df['sequence_clean'] = df['sequence_clean'].apply(lambda x: ' '.join([i for i in x.split() if i in w2v_vocab]))
|
| 66 |
|
| 67 |
# Remove rows with blank string
|
| 68 |
invalid = df[(pd.isna(df['sequence_clean'])) | (df['sequence_clean'] == '')]
|
|
|
|
| 80 |
|
| 81 |
# Join back to original sequence
|
| 82 |
final_results = df.join(pred_results)
|
|
|
|
| 83 |
final_results['others'] = final_results[labels].max(axis=1)
|
| 84 |
final_results['others'] = final_results['others'].apply(lambda x: 1 if x == 0 else 0)
|
| 85 |
|
| 86 |
# Get sentiment labels
|
| 87 |
+
final_results['sentiment'] = final_results['sequence_clean'].apply(lambda x: get_sentiment_label_facebook(classifier(x,
|
| 88 |
candidate_labels=['positive', 'negative'],
|
| 89 |
hypothesis_template='The sentiment of this is {}'))
|
| 90 |
)
|
| 91 |
|
| 92 |
+
final_results.drop(columns=['sequence_clean'], inplace=True)
|
| 93 |
+
|
| 94 |
# Append invalid rows
|
| 95 |
if len(invalid) == 0:
|
| 96 |
return final_results.to_csv(index=False).encode('utf-8')
|