Update utils/ghg_classifier.py
Browse files- utils/ghg_classifier.py +10 -6
utils/ghg_classifier.py
CHANGED
|
@@ -10,10 +10,9 @@ from transformers import pipeline
|
|
| 10 |
|
| 11 |
# Labels dictionary ###
|
| 12 |
_lab_dict = {
|
| 13 |
-
'
|
| 14 |
-
'
|
| 15 |
-
'
|
| 16 |
-
'NA':'NA',
|
| 17 |
}
|
| 18 |
|
| 19 |
|
|
@@ -74,9 +73,12 @@ def ghg_classification(haystack_doc:pd.DataFrame,
|
|
| 74 |
"""
|
| 75 |
logging.info("Working on GHG Extraction")
|
| 76 |
haystack_doc['GHG Label'] = 'NA'
|
| 77 |
-
haystack_doc['GHG Score'] =
|
|
|
|
| 78 |
temp = haystack_doc[haystack_doc['Target Label'] == 'TARGET']
|
|
|
|
| 79 |
df = haystack_doc[haystack_doc['Target Label'] == 'NEGATIVE']
|
|
|
|
| 80 |
|
| 81 |
if not classifier_model:
|
| 82 |
classifier_model = st.session_state['ghg_classifier']
|
|
@@ -84,9 +86,11 @@ def ghg_classification(haystack_doc:pd.DataFrame,
|
|
| 84 |
results = classifier_model(list(temp.text))
|
| 85 |
labels_= [(l[0]['label'],l[0]['score']) for l in results]
|
| 86 |
temp['GHG Label'],temp['GHG Score'] = zip(*labels_)
|
|
|
|
|
|
|
| 87 |
df = pd.concat([df,temp])
|
| 88 |
-
df['GHG Label'] = df['GHG Label'].apply(lambda i: _lab_dict[i])
|
| 89 |
df = df.reset_index(drop =True)
|
|
|
|
| 90 |
df.index += 1
|
| 91 |
|
| 92 |
return df
|
|
|
|
| 10 |
|
| 11 |
# Labels dictionary ###
|
| 12 |
_lab_dict = {
|
| 13 |
+
'GHG':'GHG',
|
| 14 |
+
'NOT_GHG':'NON GHG TRANSPORT TARGET',
|
| 15 |
+
'NEGATIVE':'OTHERS',
|
|
|
|
| 16 |
}
|
| 17 |
|
| 18 |
|
|
|
|
| 73 |
"""
|
| 74 |
logging.info("Working on GHG Extraction")
|
| 75 |
haystack_doc['GHG Label'] = 'NA'
|
| 76 |
+
haystack_doc['GHG Score'] = 0.0
|
| 77 |
+
# applying GHG Identifier to only 'Target' paragraphs.
|
| 78 |
temp = haystack_doc[haystack_doc['Target Label'] == 'TARGET']
|
| 79 |
+
temp = temp.reset_index(drop=True)
|
| 80 |
df = haystack_doc[haystack_doc['Target Label'] == 'NEGATIVE']
|
| 81 |
+
df = df.reset_index(drop=True)
|
| 82 |
|
| 83 |
if not classifier_model:
|
| 84 |
classifier_model = st.session_state['ghg_classifier']
|
|
|
|
| 86 |
results = classifier_model(list(temp.text))
|
| 87 |
labels_= [(l[0]['label'],l[0]['score']) for l in results]
|
| 88 |
temp['GHG Label'],temp['GHG Score'] = zip(*labels_)
|
| 89 |
+
temp['GHG Label'] = temp['GHG Label'].apply(lambda x: _lab_dict[x])
|
| 90 |
+
# merge back Target and non-Target dataframe
|
| 91 |
df = pd.concat([df,temp])
|
|
|
|
| 92 |
df = df.reset_index(drop =True)
|
| 93 |
+
df['GHG Score'] = df['GHG Score'].round(2)
|
| 94 |
df.index += 1
|
| 95 |
|
| 96 |
return df
|