Spaces:
Sleeping
Sleeping
Update app103.py
Browse files
app103.py
CHANGED
|
@@ -1240,23 +1240,62 @@ if "task_choice" in st.session_state:
|
|
| 1240 |
'task_type': 'Named Entity Recognition (NER)',
|
| 1241 |
'Use few-shot example?': 'Yes' if use_few_shot else 'No',
|
| 1242 |
})
|
|
|
|
|
|
|
| 1243 |
|
| 1244 |
-
|
| 1245 |
-
|
| 1246 |
-
|
| 1247 |
-
|
| 1248 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1249 |
|
| 1250 |
-
|
| 1251 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1252 |
|
| 1253 |
####
|
| 1254 |
|
| 1255 |
######
|
| 1256 |
|
| 1257 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1258 |
else:
|
| 1259 |
-
labeled_examples = []
|
| 1260 |
for line in response.split('\n'):
|
| 1261 |
if line.strip():
|
| 1262 |
parts = line.rsplit('Label:', 1)
|
|
@@ -1272,13 +1311,15 @@ if "task_choice" in st.session_state:
|
|
| 1272 |
'task_type': 'Data Labeling',
|
| 1273 |
'Use few-shot example?': 'Yes' if use_few_shot else 'No',
|
| 1274 |
})
|
| 1275 |
-
|
| 1276 |
-
|
| 1277 |
-
|
| 1278 |
-
|
|
|
|
|
|
|
| 1279 |
|
| 1280 |
-
|
| 1281 |
-
|
| 1282 |
|
| 1283 |
# ########3
|
| 1284 |
# if labeled_examples:
|
|
@@ -1292,18 +1333,54 @@ if "task_choice" in st.session_state:
|
|
| 1292 |
# file_name='labeled_examples.csv',
|
| 1293 |
# mime='text/csv'
|
| 1294 |
# )
|
| 1295 |
-
|
| 1296 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1297 |
# CSV
|
| 1298 |
st.session_state.labeled_examples_csv = df.to_csv(index=False).encode('utf-8')
|
| 1299 |
-
|
| 1300 |
# JSON
|
| 1301 |
st.session_state.labeled_examples_json = json.dumps({
|
| 1302 |
"metadata": {
|
| 1303 |
"domain": domain,
|
| 1304 |
"labels": labels,
|
| 1305 |
"used_few_shot": use_few_shot,
|
| 1306 |
-
"task_type":
|
| 1307 |
"timestamp": datetime.now().isoformat()
|
| 1308 |
},
|
| 1309 |
"examples": labeled_examples
|
|
|
|
| 1240 |
'task_type': 'Named Entity Recognition (NER)',
|
| 1241 |
'Use few-shot example?': 'Yes' if use_few_shot else 'No',
|
| 1242 |
})
|
| 1243 |
+
###newnewnewnew
|
| 1244 |
+
labeled_examples = []
|
| 1245 |
|
| 1246 |
+
if classification_type == "Named Entity Recognition (NER)":
|
| 1247 |
+
ner_entities = []
|
| 1248 |
+
for line in response.strip().split('\n'):
|
| 1249 |
+
if line.strip():
|
| 1250 |
+
if '-' in line:
|
| 1251 |
+
entity_text, entity_type = line.rsplit('-', 1)
|
| 1252 |
+
ner_entities.append({
|
| 1253 |
+
'entity': entity_text.strip(),
|
| 1254 |
+
'label': entity_type.strip()
|
| 1255 |
+
})
|
| 1256 |
+
labeled_examples = [{
|
| 1257 |
+
'ner_output': response.strip(),
|
| 1258 |
+
'entities': ner_entities,
|
| 1259 |
+
'system_prompt': st.session_state.system_prompt,
|
| 1260 |
+
'system_role': st.session_state.system_role,
|
| 1261 |
+
'task_type': 'Named Entity Recognition (NER)',
|
| 1262 |
+
'Use few-shot example?': 'Yes' if use_few_shot else 'No',
|
| 1263 |
+
}]
|
| 1264 |
+
|
| 1265 |
|
| 1266 |
+
|
| 1267 |
+
# #new 24/4/2025
|
| 1268 |
+
# # Save and provide download options
|
| 1269 |
+
# if labeled_examples:
|
| 1270 |
+
# # Update session state
|
| 1271 |
+
# st.session_state.labeled_examples = labeled_examples
|
| 1272 |
+
|
| 1273 |
+
# # Convert to CSV and JSON
|
| 1274 |
+
# df = pd.DataFrame(labeled_examples)
|
| 1275 |
|
| 1276 |
####
|
| 1277 |
|
| 1278 |
######
|
| 1279 |
|
| 1280 |
|
| 1281 |
+
# else:
|
| 1282 |
+
# labeled_examples = []
|
| 1283 |
+
# for line in response.split('\n'):
|
| 1284 |
+
# if line.strip():
|
| 1285 |
+
# parts = line.rsplit('Label:', 1)
|
| 1286 |
+
# if len(parts) == 2:
|
| 1287 |
+
# text = parts[0].strip()
|
| 1288 |
+
# label = parts[1].strip()
|
| 1289 |
+
# if text and label:
|
| 1290 |
+
# labeled_examples.append({
|
| 1291 |
+
# 'text': text,
|
| 1292 |
+
# 'label': label,
|
| 1293 |
+
# 'system_prompt': st.session_state.system_prompt,
|
| 1294 |
+
# 'system_role': st.session_state.system_role,
|
| 1295 |
+
# 'task_type': 'Data Labeling',
|
| 1296 |
+
# 'Use few-shot example?': 'Yes' if use_few_shot else 'No',
|
| 1297 |
+
# })
|
| 1298 |
else:
|
|
|
|
| 1299 |
for line in response.split('\n'):
|
| 1300 |
if line.strip():
|
| 1301 |
parts = line.rsplit('Label:', 1)
|
|
|
|
| 1311 |
'task_type': 'Data Labeling',
|
| 1312 |
'Use few-shot example?': 'Yes' if use_few_shot else 'No',
|
| 1313 |
})
|
| 1314 |
+
|
| 1315 |
+
|
| 1316 |
+
# # Save and provide download options
|
| 1317 |
+
# if labeled_examples:
|
| 1318 |
+
# # Update session state
|
| 1319 |
+
# st.session_state.labeled_examples = labeled_examples
|
| 1320 |
|
| 1321 |
+
# # Convert to CSV and JSON
|
| 1322 |
+
# df = pd.DataFrame(labeled_examples)
|
| 1323 |
|
| 1324 |
# ########3
|
| 1325 |
# if labeled_examples:
|
|
|
|
| 1333 |
# file_name='labeled_examples.csv',
|
| 1334 |
# mime='text/csv'
|
| 1335 |
# )
|
| 1336 |
+
# "domain": domain,
|
| 1337 |
+
# "labels": labels,
|
| 1338 |
+
# "used_few_shot": use_few_shot,
|
| 1339 |
+
# "task_###########
|
| 1340 |
+
# #new 22/4/2025
|
| 1341 |
+
# # CSV
|
| 1342 |
+
# st.session_state.labeled_examples_csv = df.to_csv(index=False).encode('utf-8')
|
| 1343 |
+
|
| 1344 |
+
# # JSON
|
| 1345 |
+
# st.session_state.labeled_examples_json = json.dumps({
|
| 1346 |
+
# "metadata": {
|
| 1347 |
+
# type": "Named Entity Recognition (NER)",
|
| 1348 |
+
# "timestamp": datetime.now().isoformat()
|
| 1349 |
+
# },
|
| 1350 |
+
# "examples": labeled_examples
|
| 1351 |
+
# }, indent=2).encode('utf-8')
|
| 1352 |
+
#########newnewnew
|
| 1353 |
+
# Save and provide download options
|
| 1354 |
+
if labeled_examples:
|
| 1355 |
+
st.session_state.labeled_examples = labeled_examples
|
| 1356 |
+
|
| 1357 |
+
if classification_type == "Named Entity Recognition (NER)":
|
| 1358 |
+
# Flatten NER entities for CSV
|
| 1359 |
+
flat_data = []
|
| 1360 |
+
for example in labeled_examples:
|
| 1361 |
+
for ent in example.get('entities', []):
|
| 1362 |
+
flat_data.append({
|
| 1363 |
+
'entity': ent['entity'],
|
| 1364 |
+
'label': ent['label'],
|
| 1365 |
+
'system_prompt': example['system_prompt'],
|
| 1366 |
+
'system_role': example['system_role'],
|
| 1367 |
+
'task_type': example['task_type'],
|
| 1368 |
+
'Use few-shot example?': example['Use few-shot example?']
|
| 1369 |
+
})
|
| 1370 |
+
df = pd.DataFrame(flat_data)
|
| 1371 |
+
else:
|
| 1372 |
+
df = pd.DataFrame(labeled_examples)
|
| 1373 |
+
|
| 1374 |
# CSV
|
| 1375 |
st.session_state.labeled_examples_csv = df.to_csv(index=False).encode('utf-8')
|
| 1376 |
+
|
| 1377 |
# JSON
|
| 1378 |
st.session_state.labeled_examples_json = json.dumps({
|
| 1379 |
"metadata": {
|
| 1380 |
"domain": domain,
|
| 1381 |
"labels": labels,
|
| 1382 |
"used_few_shot": use_few_shot,
|
| 1383 |
+
"task_type": classification_type,
|
| 1384 |
"timestamp": datetime.now().isoformat()
|
| 1385 |
},
|
| 1386 |
"examples": labeled_examples
|