Spaces:
Sleeping
Sleeping
Update app103.py
Browse files
app103.py
CHANGED
|
@@ -1192,9 +1192,40 @@ if "task_choice" in st.session_state:
|
|
| 1192 |
# Save labeled examples to CSV
|
| 1193 |
#new 14/4/2025
|
| 1194 |
labeled_examples = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1195 |
# if classification_type == "Named Entity Recognition (NER)":
|
| 1196 |
-
# labeled_examples = [
|
| 1197 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1198 |
# if line.strip():
|
| 1199 |
# parts = line.rsplit('Entities:', 1)
|
| 1200 |
# if len(parts) == 2:
|
|
@@ -1209,58 +1240,27 @@ if "task_choice" in st.session_state:
|
|
| 1209 |
# 'task_type': 'Named Entity Recognition (NER)',
|
| 1210 |
# 'Use few-shot example?': 'Yes' if use_few_shot else 'No',
|
| 1211 |
# })
|
|
|
|
|
|
|
| 1212 |
|
| 1213 |
-
# #new 22/4/2025
|
| 1214 |
# if classification_type == "Named Entity Recognition (NER)":
|
| 1215 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1216 |
# 'ner_output': response.strip(),
|
|
|
|
| 1217 |
# 'system_prompt': st.session_state.system_prompt,
|
| 1218 |
# 'system_role': st.session_state.system_role,
|
| 1219 |
# 'task_type': 'Named Entity Recognition (NER)',
|
| 1220 |
# 'Use few-shot example?': 'Yes' if use_few_shot else 'No',
|
| 1221 |
-
# }]
|
| 1222 |
-
|
| 1223 |
-
#new 24/4/2025
|
| 1224 |
-
labeled_examples = []
|
| 1225 |
-
|
| 1226 |
-
if classification_type == "Named Entity Recognition (NER)":
|
| 1227 |
-
# Split response into lines and try to extract the text and entities
|
| 1228 |
-
for line in response.strip().split('\n'):
|
| 1229 |
-
if line.strip():
|
| 1230 |
-
parts = line.rsplit('Entities:', 1)
|
| 1231 |
-
if len(parts) == 2:
|
| 1232 |
-
text = parts[0].strip()
|
| 1233 |
-
entities = parts[1].strip()
|
| 1234 |
-
if text and entities:
|
| 1235 |
-
labeled_examples.append({
|
| 1236 |
-
'text': text,
|
| 1237 |
-
'entities': entities,
|
| 1238 |
-
'system_prompt': st.session_state.system_prompt,
|
| 1239 |
-
'system_role': st.session_state.system_role,
|
| 1240 |
-
'task_type': 'Named Entity Recognition (NER)',
|
| 1241 |
-
'Use few-shot example?': 'Yes' if use_few_shot else 'No',
|
| 1242 |
-
})
|
| 1243 |
-
###newnewnewnew
|
| 1244 |
-
labeled_examples = []
|
| 1245 |
-
|
| 1246 |
-
if classification_type == "Named Entity Recognition (NER)":
|
| 1247 |
-
ner_entities = []
|
| 1248 |
-
for line in response.strip().split('\n'):
|
| 1249 |
-
if line.strip():
|
| 1250 |
-
if '-' in line:
|
| 1251 |
-
entity_text, entity_type = line.rsplit('-', 1)
|
| 1252 |
-
ner_entities.append({
|
| 1253 |
-
'entity': entity_text.strip(),
|
| 1254 |
-
'label': entity_type.strip()
|
| 1255 |
-
})
|
| 1256 |
-
labeled_examples = [{
|
| 1257 |
-
'ner_output': response.strip(),
|
| 1258 |
-
'entities': ner_entities,
|
| 1259 |
-
'system_prompt': st.session_state.system_prompt,
|
| 1260 |
-
'system_role': st.session_state.system_role,
|
| 1261 |
-
'task_type': 'Named Entity Recognition (NER)',
|
| 1262 |
-
'Use few-shot example?': 'Yes' if use_few_shot else 'No',
|
| 1263 |
-
}]
|
| 1264 |
|
| 1265 |
|
| 1266 |
|
|
@@ -1278,24 +1278,8 @@ if "task_choice" in st.session_state:
|
|
| 1278 |
######
|
| 1279 |
|
| 1280 |
|
| 1281 |
-
# else:
|
| 1282 |
-
# labeled_examples = []
|
| 1283 |
-
# for line in response.split('\n'):
|
| 1284 |
-
# if line.strip():
|
| 1285 |
-
# parts = line.rsplit('Label:', 1)
|
| 1286 |
-
# if len(parts) == 2:
|
| 1287 |
-
# text = parts[0].strip()
|
| 1288 |
-
# label = parts[1].strip()
|
| 1289 |
-
# if text and label:
|
| 1290 |
-
# labeled_examples.append({
|
| 1291 |
-
# 'text': text,
|
| 1292 |
-
# 'label': label,
|
| 1293 |
-
# 'system_prompt': st.session_state.system_prompt,
|
| 1294 |
-
# 'system_role': st.session_state.system_role,
|
| 1295 |
-
# 'task_type': 'Data Labeling',
|
| 1296 |
-
# 'Use few-shot example?': 'Yes' if use_few_shot else 'No',
|
| 1297 |
-
# })
|
| 1298 |
else:
|
|
|
|
| 1299 |
for line in response.split('\n'):
|
| 1300 |
if line.strip():
|
| 1301 |
parts = line.rsplit('Label:', 1)
|
|
@@ -1311,29 +1295,46 @@ if "task_choice" in st.session_state:
|
|
| 1311 |
'task_type': 'Data Labeling',
|
| 1312 |
'Use few-shot example?': 'Yes' if use_few_shot else 'No',
|
| 1313 |
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1314 |
|
| 1315 |
|
| 1316 |
-
#
|
| 1317 |
-
|
| 1318 |
-
|
| 1319 |
-
|
| 1320 |
|
| 1321 |
-
|
| 1322 |
-
|
| 1323 |
|
| 1324 |
-
|
| 1325 |
-
|
| 1326 |
-
|
| 1327 |
|
| 1328 |
-
|
| 1329 |
|
| 1330 |
-
|
| 1331 |
-
|
| 1332 |
-
|
| 1333 |
-
|
| 1334 |
-
|
| 1335 |
-
|
| 1336 |
-
|
|
|
|
| 1337 |
# "labels": labels,
|
| 1338 |
# "used_few_shot": use_few_shot,
|
| 1339 |
# "task_###########
|
|
@@ -1354,22 +1355,22 @@ if "task_choice" in st.session_state:
|
|
| 1354 |
if labeled_examples:
|
| 1355 |
st.session_state.labeled_examples = labeled_examples
|
| 1356 |
|
| 1357 |
-
if classification_type == "Named Entity Recognition (NER)":
|
| 1358 |
-
|
| 1359 |
-
|
| 1360 |
-
|
| 1361 |
-
|
| 1362 |
-
|
| 1363 |
-
|
| 1364 |
-
|
| 1365 |
-
|
| 1366 |
-
|
| 1367 |
-
|
| 1368 |
-
|
| 1369 |
-
|
| 1370 |
-
|
| 1371 |
-
else:
|
| 1372 |
-
|
| 1373 |
|
| 1374 |
# CSV
|
| 1375 |
st.session_state.labeled_examples_csv = df.to_csv(index=False).encode('utf-8')
|
|
|
|
| 1192 |
# Save labeled examples to CSV
|
| 1193 |
#new 14/4/2025
|
| 1194 |
labeled_examples = []
|
| 1195 |
+
if classification_type == "Named Entity Recognition (NER)":
|
| 1196 |
+
labeled_examples = []
|
| 1197 |
+
for line in response.split('\n'):
|
| 1198 |
+
if line.strip():
|
| 1199 |
+
parts = line.rsplit('Entities:', 1)
|
| 1200 |
+
if len(parts) == 2:
|
| 1201 |
+
text = parts[0].strip()
|
| 1202 |
+
entities = parts[1].strip()
|
| 1203 |
+
if text and entities:
|
| 1204 |
+
labeled_examples.append({
|
| 1205 |
+
'text': text,
|
| 1206 |
+
'entities': entities,
|
| 1207 |
+
'system_prompt': st.session_state.system_prompt,
|
| 1208 |
+
'system_role': st.session_state.system_role,
|
| 1209 |
+
'task_type': 'Named Entity Recognition (NER)',
|
| 1210 |
+
'Use few-shot example?': 'Yes' if use_few_shot else 'No',
|
| 1211 |
+
})
|
| 1212 |
+
|
| 1213 |
+
# #new 22/4/2025
|
| 1214 |
# if classification_type == "Named Entity Recognition (NER)":
|
| 1215 |
+
# labeled_examples = [{
|
| 1216 |
+
# 'ner_output': response.strip(),
|
| 1217 |
+
# 'system_prompt': st.session_state.system_prompt,
|
| 1218 |
+
# 'system_role': st.session_state.system_role,
|
| 1219 |
+
# 'task_type': 'Named Entity Recognition (NER)',
|
| 1220 |
+
# 'Use few-shot example?': 'Yes' if use_few_shot else 'No',
|
| 1221 |
+
# }]
|
| 1222 |
+
|
| 1223 |
+
# #new 24/4/2025
|
| 1224 |
+
# labeled_examples = []
|
| 1225 |
+
|
| 1226 |
+
# if classification_type == "Named Entity Recognition (NER)":
|
| 1227 |
+
# # Split response into lines and try to extract the text and entities
|
| 1228 |
+
# for line in response.strip().split('\n'):
|
| 1229 |
# if line.strip():
|
| 1230 |
# parts = line.rsplit('Entities:', 1)
|
| 1231 |
# if len(parts) == 2:
|
|
|
|
| 1240 |
# 'task_type': 'Named Entity Recognition (NER)',
|
| 1241 |
# 'Use few-shot example?': 'Yes' if use_few_shot else 'No',
|
| 1242 |
# })
|
| 1243 |
+
# ###newnewnewnew
|
| 1244 |
+
# labeled_examples = []
|
| 1245 |
|
|
|
|
| 1246 |
# if classification_type == "Named Entity Recognition (NER)":
|
| 1247 |
+
# ner_entities = []
|
| 1248 |
+
# for line in response.strip().split('\n'):
|
| 1249 |
+
# if line.strip():
|
| 1250 |
+
# if '-' in line:
|
| 1251 |
+
# entity_text, entity_type = line.rsplit('-', 1)
|
| 1252 |
+
# ner_entities.append({
|
| 1253 |
+
# 'entity': entity_text.strip(),
|
| 1254 |
+
# 'label': entity_type.strip()
|
| 1255 |
+
# })
|
| 1256 |
+
# labeled_examples = [{
|
| 1257 |
# 'ner_output': response.strip(),
|
| 1258 |
+
# 'entities': ner_entities,
|
| 1259 |
# 'system_prompt': st.session_state.system_prompt,
|
| 1260 |
# 'system_role': st.session_state.system_role,
|
| 1261 |
# 'task_type': 'Named Entity Recognition (NER)',
|
| 1262 |
# 'Use few-shot example?': 'Yes' if use_few_shot else 'No',
|
| 1263 |
+
# }]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1264 |
|
| 1265 |
|
| 1266 |
|
|
|
|
| 1278 |
######
|
| 1279 |
|
| 1280 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1281 |
else:
|
| 1282 |
+
labeled_examples = []
|
| 1283 |
for line in response.split('\n'):
|
| 1284 |
if line.strip():
|
| 1285 |
parts = line.rsplit('Label:', 1)
|
|
|
|
| 1295 |
'task_type': 'Data Labeling',
|
| 1296 |
'Use few-shot example?': 'Yes' if use_few_shot else 'No',
|
| 1297 |
})
|
| 1298 |
+
# else:
|
| 1299 |
+
# for line in response.split('\n'):
|
| 1300 |
+
# if line.strip():
|
| 1301 |
+
# parts = line.rsplit('Label:', 1)
|
| 1302 |
+
# if len(parts) == 2:
|
| 1303 |
+
# text = parts[0].strip()
|
| 1304 |
+
# label = parts[1].strip()
|
| 1305 |
+
# if text and label:
|
| 1306 |
+
# labeled_examples.append({
|
| 1307 |
+
# 'text': text,
|
| 1308 |
+
# 'label': label,
|
| 1309 |
+
# 'system_prompt': st.session_state.system_prompt,
|
| 1310 |
+
# 'system_role': st.session_state.system_role,
|
| 1311 |
+
# 'task_type': 'Data Labeling',
|
| 1312 |
+
# 'Use few-shot example?': 'Yes' if use_few_shot else 'No',
|
| 1313 |
+
# })
|
| 1314 |
|
| 1315 |
|
| 1316 |
+
# Save and provide download options
|
| 1317 |
+
if labeled_examples:
|
| 1318 |
+
# Update session state
|
| 1319 |
+
st.session_state.labeled_examples = labeled_examples
|
| 1320 |
|
| 1321 |
+
# Convert to CSV and JSON
|
| 1322 |
+
df = pd.DataFrame(labeled_examples)
|
| 1323 |
|
| 1324 |
+
########3
|
| 1325 |
+
if labeled_examples:
|
| 1326 |
+
df = pd.DataFrame(labeled_examples)
|
| 1327 |
|
| 1328 |
+
csv = df.to_csv(index=False).encode('utf-8')
|
| 1329 |
|
| 1330 |
+
st.download_button(
|
| 1331 |
+
label="📥 Download Labeled Examples CSV",
|
| 1332 |
+
data=csv,
|
| 1333 |
+
file_name='labeled_examples.csv',
|
| 1334 |
+
mime='text/csv'
|
| 1335 |
+
)
|
| 1336 |
+
|
| 1337 |
+
#"domain": domain,
|
| 1338 |
# "labels": labels,
|
| 1339 |
# "used_few_shot": use_few_shot,
|
| 1340 |
# "task_###########
|
|
|
|
| 1355 |
if labeled_examples:
|
| 1356 |
st.session_state.labeled_examples = labeled_examples
|
| 1357 |
|
| 1358 |
+
# if classification_type == "Named Entity Recognition (NER)":
|
| 1359 |
+
# # Flatten NER entities for CSV
|
| 1360 |
+
# flat_data = []
|
| 1361 |
+
# for example in labeled_examples:
|
| 1362 |
+
# for ent in example.get('entities', []):
|
| 1363 |
+
# flat_data.append({
|
| 1364 |
+
# 'entity': ent['entity'],
|
| 1365 |
+
# 'label': ent['label'],
|
| 1366 |
+
# 'system_prompt': example['system_prompt'],
|
| 1367 |
+
# 'system_role': example['system_role'],
|
| 1368 |
+
# 'task_type': example['task_type'],
|
| 1369 |
+
# 'Use few-shot example?': example['Use few-shot example?']
|
| 1370 |
+
# })
|
| 1371 |
+
# df = pd.DataFrame(flat_data)
|
| 1372 |
+
# else:
|
| 1373 |
+
# df = pd.DataFrame(labeled_examples)
|
| 1374 |
|
| 1375 |
# CSV
|
| 1376 |
st.session_state.labeled_examples_csv = df.to_csv(index=False).encode('utf-8')
|