File size: 4,269 Bytes
718c4ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import os
import json
from pathlib import Path
from typing import Dict, List

def parse_info_txt(info_path: str) -> Dict:
    """
    Parsuje info.txt z aukcji
    """
    with open(info_path, 'r', encoding='utf-8') as f:
        content = f.read()
    
    metadata = {}
    
    # TITLE
    if 'TITLE:' in content:
        title_start = content.find('TITLE:') + len('TITLE:')
        title_end = content.find('\n', title_start)
        metadata['title'] = content[title_start:title_end].strip()
    else:
        metadata['title'] = 'Unknown'
    
    # LINK
    if 'LINK:' in content:
        link_start = content.find('LINK:') + len('LINK:')
        link_end = content.find('\n', link_start)
        metadata['link'] = content[link_start:link_end].strip()
    else:
        metadata['link'] = ''
    
    # PARAMETERS
    metadata['parameters'] = {}
    if 'PARAMETERS:' in content:
        params_start = content.find('PARAMETERS:') + len('PARAMETERS:')
        params_end = content.find('----', params_start)
        if params_end == -1:
            params_end = content.find('DESCRIPTION:', params_start)
        
        params_text = content[params_start:params_end]
        
        for line in params_text.split('\n'):
            if line.strip().startswith('*'):
                line_clean = line.strip()[2:]
                if ':' in line_clean:
                    key, value = line_clean.split(':', 1)
                    metadata['parameters'][key.strip()] = value.strip()
    
    # DESCRIPTION
    if 'DESCRIPTION:' in content:
        desc_start = content.find('DESCRIPTION:') + len('DESCRIPTION:')
        metadata['description'] = content[desc_start:].strip()
    else:
        metadata['description'] = ''
    
    return metadata

def organize_dataset(root_dir: str, output_json: str = 'dataset/dataset.json'):
    """
    Skanuje strukturę i tworzy dataset.json
    """
    root = Path(root_dir)
    dataset = []
    
    for platform_dir in sorted(root.iterdir()):
        if not platform_dir.is_dir():
            continue
        
        platform_name = platform_dir.name
        print(f"\n📁 Platform: {platform_name}")
        
        for auction_dir in sorted(platform_dir.iterdir()):
            if not auction_dir.is_dir():
                continue
            
            auction_id = auction_dir.name
            info_txt = auction_dir / 'info.txt'
            
            if not info_txt.exists():
                print(f"  ⚠️  {auction_id} - brak info.txt")
                continue
            
            try:
                metadata = parse_info_txt(str(info_txt))
            except Exception as e:
                print(f"  ❌ {auction_id} - błąd: {e}")
                continue
            
            # Zbierz zdjęcia
            images = sorted([
                img.name for img in auction_dir.glob('*.jpg')
            ])
            images += sorted([
                img.name for img in auction_dir.glob('*.png')
            ])
            
            if not images:
                print(f"  ⚠️  {auction_id} - brak zdjęć")
                continue
            
            entry = {
                'id': f"{platform_name}_{auction_id}",
                'platform': platform_name,
                'folder_path': str(auction_dir.relative_to(root)),
                'image_count': len(images),
                'images': images,
                'label': 0,  # Default: authentic
                'label_confidence': 0.0,  # Do ręcznego wypełnienia
                **metadata
            }
            
            dataset.append(entry)
            print(f"  ✓ {auction_id} ({len(images)} zdjęć)")
    
    # Zapis
    os.makedirs(os.path.dirname(output_json), exist_ok=True)
    with open(output_json, 'w', encoding='utf-8') as f:
        json.dump(dataset, f, ensure_ascii=False, indent=2)
    
    print(f"\n✅ Dataset wczytany: {len(dataset)} aukcji")
    print(f"💾 Zapisano: {output_json}")
    
    return dataset

if __name__ == '__main__':
    dataset = organize_dataset('dataset/raw_data')
    
    if dataset:
        print("\n" + "="*60)
        print("PRZYKŁAD PIERWSZEJ AUKCJI:")
        print("="*60)
        print(json.dumps(dataset[0], indent=2, ensure_ascii=False)[:800])