Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -113,6 +113,7 @@ def _inference_classifier(text):
|
|
| 113 |
def inference(file_in,file_col_name,input_batch,isurl,use_archive,limit_companies=10):
|
| 114 |
input_batch_content = []
|
| 115 |
if file_in is not None:
|
|
|
|
| 116 |
dft = pd.read_csv(
|
| 117 |
file_in,
|
| 118 |
compression=dict(method='zip')
|
|
@@ -120,13 +121,15 @@ def inference(file_in,file_col_name,input_batch,isurl,use_archive,limit_companie
|
|
| 120 |
assert file_col_name in dft.columns, "Indicated col_name not found in file"
|
| 121 |
input_batch_r = dft[file_col_name].values.tolist()
|
| 122 |
else:
|
|
|
|
| 123 |
assert len(input_batch) > 0, "input_batch array is empty"
|
| 124 |
input_batch_r = input_batch
|
| 125 |
|
| 126 |
-
print("
|
| 127 |
-
print("+",input_batch_r)
|
| 128 |
|
| 129 |
if isurl:
|
|
|
|
|
|
|
| 130 |
for row_in in input_batch_r:
|
| 131 |
if isinstance(row_in , list):
|
| 132 |
url = row_in[0]
|
|
@@ -140,14 +143,16 @@ def inference(file_in,file_col_name,input_batch,isurl,use_archive,limit_companie
|
|
| 140 |
extracted = Extractor().extract(requests.get(url).text)
|
| 141 |
input_batch_content.append(extracted['content'])
|
| 142 |
else:
|
|
|
|
| 143 |
if isinstance(input_batch_r[0], list):
|
|
|
|
| 144 |
for row_in in input_batch_r:
|
| 145 |
input_batch_content.append(row_in[0])
|
| 146 |
else:
|
|
|
|
| 147 |
input_batch_content = input_batch_r
|
| 148 |
|
| 149 |
-
print("
|
| 150 |
-
print("+",input_batch_content)
|
| 151 |
|
| 152 |
prob_outs = _inference_classifier(input_batch_content)
|
| 153 |
#sentiment = _inference_sentiment_model_via_api_query({"inputs": extracted['content']})
|
|
@@ -187,7 +192,7 @@ demo = gr.Interface(fn=inference,
|
|
| 187 |
gr.Dropdown(label='data type', choices=['text','url'], type='index', value='url'),
|
| 188 |
gr.Checkbox(label='if url parse cached in archive.org'),
|
| 189 |
gr.Slider(minimum=1, maximum=10, step=1, label='Limit NER output', value=5)],
|
| 190 |
-
outputs=[gr.Dataframe(label='output raw', col_count=1, datatype='number', type='array', wrap=True
|
| 191 |
#gr.Label(label='Company'),
|
| 192 |
#gr.Label(label='ESG'),
|
| 193 |
#gr.Label(label='Sentiment'),
|
|
|
|
| 113 |
def inference(file_in,file_col_name,input_batch,isurl,use_archive,limit_companies=10):
|
| 114 |
input_batch_content = []
|
| 115 |
if file_in is not None:
|
| 116 |
+
print("[i] Input is file:",file_in)
|
| 117 |
dft = pd.read_csv(
|
| 118 |
file_in,
|
| 119 |
compression=dict(method='zip')
|
|
|
|
| 121 |
assert file_col_name in dft.columns, "Indicated col_name not found in file"
|
| 122 |
input_batch_r = dft[file_col_name].values.tolist()
|
| 123 |
else:
|
| 124 |
+
print("[i] Input is list")
|
| 125 |
assert len(input_batch) > 0, "input_batch array is empty"
|
| 126 |
input_batch_r = input_batch
|
| 127 |
|
| 128 |
+
print("[i] Input size:",len(input_batch_r))
|
|
|
|
| 129 |
|
| 130 |
if isurl:
|
| 131 |
+
print("[i] Data is URL")
|
| 132 |
+
print("[i] Use chached URL from archive.org") if use_archive
|
| 133 |
for row_in in input_batch_r:
|
| 134 |
if isinstance(row_in , list):
|
| 135 |
url = row_in[0]
|
|
|
|
| 143 |
extracted = Extractor().extract(requests.get(url).text)
|
| 144 |
input_batch_content.append(extracted['content'])
|
| 145 |
else:
|
| 146 |
+
print("[i] Data is news contents")
|
| 147 |
if isinstance(input_batch_r[0], list):
|
| 148 |
+
print("[i] Data is list of lists format")
|
| 149 |
for row_in in input_batch_r:
|
| 150 |
input_batch_content.append(row_in[0])
|
| 151 |
else:
|
| 152 |
+
print("[i] Data is single list format")
|
| 153 |
input_batch_content = input_batch_r
|
| 154 |
|
| 155 |
+
print("[i] Batch size:",len(input_batch_content))
|
|
|
|
| 156 |
|
| 157 |
prob_outs = _inference_classifier(input_batch_content)
|
| 158 |
#sentiment = _inference_sentiment_model_via_api_query({"inputs": extracted['content']})
|
|
|
|
| 192 |
gr.Dropdown(label='data type', choices=['text','url'], type='index', value='url'),
|
| 193 |
gr.Checkbox(label='if url parse cached in archive.org'),
|
| 194 |
gr.Slider(minimum=1, maximum=10, step=1, label='Limit NER output', value=5)],
|
| 195 |
+
outputs=[gr.Dataframe(label='output raw', col_count=1, datatype='number', type='array', wrap=True)],#, header=OUT_HEADERS)],
|
| 196 |
#gr.Label(label='Company'),
|
| 197 |
#gr.Label(label='ESG'),
|
| 198 |
#gr.Label(label='Sentiment'),
|