Spaces:
Sleeping
Sleeping
File size: 1,925 Bytes
1d8c2e0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
"""
Export utilities
"""
import json
def prepare_export(items_with_positive, feedback_scores, feedback_comments=None):
"""
Prépare les données pour l'export avec identifiants uniques
Args:
items_with_positive: Liste de tuples (original_idx, item)
feedback_scores: Dict {idx: score}
feedback_comments: Dict {idx: comment} (optionnel)
Returns:
Liste de dicts prêts pour l'export
"""
export_data = []
for original_idx, item in items_with_positive:
if original_idx in feedback_scores:
export_item = {
'code': item.get('anchor', item.get('code', '')),
'positive_feedback': item.get('positive', ''),
'score': feedback_scores[original_idx],
}
# Add unique identifiers for merging
# Priority: code_id > author_id > hash of content
if 'code_id' in item:
export_item['code_id'] = item['code_id']
if 'author_id' in item:
export_item['author_id'] = item['author_id']
# Add optional fields
if 'language' in item:
export_item['language'] = item['language']
# Add comment if exists
if feedback_comments and original_idx in feedback_comments:
comment = feedback_comments[original_idx]
if comment.strip():
export_item['comment'] = comment
# Add original index for reference (local to this file)
export_item['original_index'] = original_idx
export_data.append(export_item)
return export_data
def export_to_jsonl(export_data):
"""
Convertit les données en format JSONL
Args:
export_data: Liste de dicts
Returns:
String JSONL
"""
return "\n".join(json.dumps(item, ensure_ascii=False) for item in export_data)
|