import os import json from pathlib import Path from typing import Dict, List def parse_info_txt(info_path: str) -> Dict: """ Parsuje info.txt z aukcji """ with open(info_path, 'r', encoding='utf-8') as f: content = f.read() metadata = {} # TITLE if 'TITLE:' in content: title_start = content.find('TITLE:') + len('TITLE:') title_end = content.find('\n', title_start) metadata['title'] = content[title_start:title_end].strip() else: metadata['title'] = 'Unknown' # LINK if 'LINK:' in content: link_start = content.find('LINK:') + len('LINK:') link_end = content.find('\n', link_start) metadata['link'] = content[link_start:link_end].strip() else: metadata['link'] = '' # PARAMETERS metadata['parameters'] = {} if 'PARAMETERS:' in content: params_start = content.find('PARAMETERS:') + len('PARAMETERS:') params_end = content.find('----', params_start) if params_end == -1: params_end = content.find('DESCRIPTION:', params_start) params_text = content[params_start:params_end] for line in params_text.split('\n'): if line.strip().startswith('*'): line_clean = line.strip()[2:] if ':' in line_clean: key, value = line_clean.split(':', 1) metadata['parameters'][key.strip()] = value.strip() # DESCRIPTION if 'DESCRIPTION:' in content: desc_start = content.find('DESCRIPTION:') + len('DESCRIPTION:') metadata['description'] = content[desc_start:].strip() else: metadata['description'] = '' return metadata def organize_dataset(root_dir: str, output_json: str = 'dataset/dataset.json'): """ Skanuje strukturę i tworzy dataset.json """ root = Path(root_dir) dataset = [] for platform_dir in sorted(root.iterdir()): if not platform_dir.is_dir(): continue platform_name = platform_dir.name print(f"\n📁 Platform: {platform_name}") for auction_dir in sorted(platform_dir.iterdir()): if not auction_dir.is_dir(): continue auction_id = auction_dir.name info_txt = auction_dir / 'info.txt' if not info_txt.exists(): print(f" ⚠️ {auction_id} - brak info.txt") continue try: metadata = parse_info_txt(str(info_txt)) except Exception as e: print(f" ❌ {auction_id} - błąd: {e}") continue # Zbierz zdjęcia images = sorted([ img.name for img in auction_dir.glob('*.jpg') ]) images += sorted([ img.name for img in auction_dir.glob('*.png') ]) if not images: print(f" ⚠️ {auction_id} - brak zdjęć") continue entry = { 'id': f"{platform_name}_{auction_id}", 'platform': platform_name, 'folder_path': str(auction_dir.relative_to(root)), 'image_count': len(images), 'images': images, 'label': 0, # Default: authentic 'label_confidence': 0.0, # Do ręcznego wypełnienia **metadata } dataset.append(entry) print(f" ✓ {auction_id} ({len(images)} zdjęć)") # Zapis os.makedirs(os.path.dirname(output_json), exist_ok=True) with open(output_json, 'w', encoding='utf-8') as f: json.dump(dataset, f, ensure_ascii=False, indent=2) print(f"\n✅ Dataset wczytany: {len(dataset)} aukcji") print(f"💾 Zapisano: {output_json}") return dataset if __name__ == '__main__': dataset = organize_dataset('dataset/raw_data') if dataset: print("\n" + "="*60) print("PRZYKŁAD PIERWSZEJ AUKCJI:") print("="*60) print(json.dumps(dataset[0], indent=2, ensure_ascii=False)[:800])