hatamo commited on
Commit
718c4ae
·
1 Parent(s): 107bede

Initial deployment of Antique Authenticity API

Browse files
Dockerfile ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10
2
+
3
+ WORKDIR /app
4
+
5
+ # Copy requirements first for better caching
6
+ COPY requirements.txt .
7
+
8
+ # Install dependencies
9
+ RUN pip install --no-cache-dir -r requirements.txt
10
+
11
+ # Copy the entire project
12
+ COPY . .
13
+
14
+ # Expose port 7860 (Hugging Face Spaces default)
15
+ EXPOSE 7860
16
+
17
+ # Set environment variables
18
+ ENV PYTHONUNBUFFERED=1
19
+
20
+ # Run the app
21
+ CMD ["python", "app.py"]
app.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py - Main entry point for Hugging Face Spaces
2
+ import sys
3
+ import os
4
+
5
+ # Add code directory to path
6
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'code'))
7
+
8
+ # Import and run the FastAPI app
9
+ from code.app import app
10
+
11
+ if __name__ == "__main__":
12
+ import uvicorn
13
+ uvicorn.run(app, host="0.0.0.0", port=7860)
code/__pycache__/allegro_api.cpython-312.pyc ADDED
Binary file (2.72 kB). View file
 
code/__pycache__/dataset_loader.cpython-312.pyc ADDED
Binary file (4.18 kB). View file
 
code/__pycache__/model.cpython-312.pyc ADDED
Binary file (4.6 kB). View file
 
code/__pycache__/web_scraper_allegro.cpython-312.pyc ADDED
Binary file (4.35 kB). View file
 
code/__pycache__/web_scraper_olx.cpython-312.pyc ADDED
Binary file (9.6 kB). View file
 
code/app.py ADDED
@@ -0,0 +1,271 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ from fastapi import FastAPI, UploadFile, Form, File
3
+ from fastapi.responses import JSONResponse
4
+ from fastapi.middleware.cors import CORSMiddleware
5
+ import torch
6
+ from PIL import Image
7
+ import io
8
+ from model import AuctionAuthenticityModel
9
+ from torchvision import transforms
10
+ import os
11
+ import numpy as np
12
+
13
+
14
+ app = FastAPI(
15
+ title="Antique Auction Authenticity API",
16
+ description="AI model do oceny autentyczności aukcji antyków",
17
+ version="1.0.0"
18
+ )
19
+
20
+ app.add_middleware(
21
+ CORSMiddleware,
22
+ allow_origins=["*"],
23
+ allow_credentials=True,
24
+ allow_methods=["*"],
25
+ allow_headers=["*"],
26
+ )
27
+
28
+ DEVICE = torch.device('cpu')
29
+ MODEL_PATH = '../weights/auction_model.pt'
30
+
31
+ model = None
32
+ transform = transforms.Compose([
33
+ transforms.Resize((224, 224)),
34
+ transforms.ToTensor(),
35
+ transforms.Normalize(
36
+ mean=[0.485, 0.456, 0.406],
37
+ std=[0.229, 0.224, 0.225]
38
+ )
39
+ ])
40
+
41
+ @app.on_event("startup")
42
+ async def load_model():
43
+ global model
44
+ print("🚀 Ładowanie modelu...")
45
+ model = AuctionAuthenticityModel(num_classes=3, device=DEVICE).to(DEVICE)
46
+ if os.path.exists(MODEL_PATH):
47
+ model.load_state_dict(torch.load(MODEL_PATH, map_location=DEVICE))
48
+ print(f"✓ Model załadowany z {MODEL_PATH}")
49
+ else:
50
+ print("⚠️ Brak wag - pretrained")
51
+ model.eval()
52
+ print("✓ Model gotowy")
53
+
54
+ @app.post("/predict")
55
+ async def predict(
56
+ image: UploadFile = File(...),
57
+ title: str = Form(...),
58
+ description: str = Form(...)
59
+ ):
60
+ try:
61
+ img_data = await image.read()
62
+ img = Image.open(io.BytesIO(img_data)).convert('RGB')
63
+ img_tensor = transform(img).unsqueeze(0).to(DEVICE)
64
+ text = f"{title} {description}"
65
+
66
+ with torch.no_grad():
67
+ logits = model(img_tensor, [text])
68
+ probs = torch.softmax(logits, dim=1)[0]
69
+
70
+ orig_prob = float(probs[0]) # label 0
71
+ scam_prob = float(probs[1]) # label 1
72
+ repl_prob = float(probs[2]) # label 2
73
+
74
+ probs_dict = {
75
+ "ORIGINAL": orig_prob,
76
+ "SCAM": scam_prob,
77
+ "REPLICA": repl_prob
78
+ }
79
+ best_label = max(probs_dict, key=probs_dict.get)
80
+ best_prob = probs_dict[best_label]
81
+
82
+ # Niepewny: max prob < 0.6 LUB margin < 0.15
83
+ sorted_probs = sorted(probs_dict.values(), reverse=True)
84
+ margin = sorted_probs[0] - sorted_probs[1]
85
+
86
+ if best_prob < 0.6 or margin < 0.15:
87
+ verdict = "UNCERTAIN"
88
+ else:
89
+ verdict = best_label
90
+
91
+ return JSONResponse({
92
+ "status": "success",
93
+ "original_probability": round(orig_prob, 3),
94
+ "scam_probability": round(scam_prob, 3),
95
+ "replica_probability": round(repl_prob, 3),
96
+ "verdict": verdict,
97
+ "confidence": round(best_prob, 3),
98
+ "margin": round(margin, 3),
99
+ "message": f"Aukcja ma {best_prob*100:.1f}% pewności: {verdict}"
100
+ })
101
+ except Exception as e:
102
+ return JSONResponse(
103
+ {"status": "error", "error": str(e)},
104
+ status_code=400
105
+ )
106
+
107
+ @app.post("/predict_ensemble")
108
+ async def predict_ensemble(
109
+ images: list[UploadFile] = File(...), # wiele plików!
110
+ title: str = Form(...),
111
+ description: str = Form(...)
112
+ ):
113
+ predictions = []
114
+
115
+ for i, img_file in enumerate(images):
116
+ img_data = await img_file.read()
117
+ img = Image.open(io.BytesIO(img_data)).convert('RGB')
118
+ img_tensor = transform(img).unsqueeze(0).to(DEVICE)
119
+ text = f"{title} {description}"
120
+
121
+ with torch.no_grad():
122
+ logits = model(img_tensor, [text])
123
+ probs = torch.softmax(logits, dim=1)[0].cpu().numpy()
124
+ predictions.append(probs)
125
+
126
+ # Średnia z wszystkich zdjęć
127
+ avg_probs = np.mean(predictions, axis=0)
128
+
129
+ orig_prob = float(avg_probs[0])
130
+ scam_prob = float(avg_probs[1])
131
+ repl_prob = float(avg_probs[2])
132
+
133
+ probs_dict = {"ORIGINAL": orig_prob, "SCAM": scam_prob, "REPLICA": repl_prob}
134
+ best_label = max(probs_dict, key=probs_dict.get)
135
+ best_prob = probs_dict[best_label]
136
+
137
+ sorted_probs = sorted(probs_dict.values(), reverse=True)
138
+ margin = sorted_probs[0] - sorted_probs[1]
139
+
140
+ if best_prob < 0.6 or margin < 0.15:
141
+ verdict = "UNCERTAIN"
142
+ else:
143
+ verdict = best_label
144
+
145
+ return JSONResponse({
146
+ "status": "success",
147
+ "image_count": len(images),
148
+ "original_probability": round(orig_prob, 3),
149
+ "scam_probability": round(scam_prob, 3),
150
+ "replica_probability": round(repl_prob, 3),
151
+ "verdict": verdict,
152
+ "confidence": round(best_prob, 3),
153
+ "margin": round(margin, 3),
154
+ "per_image_probs": [p.tolist() for p in predictions] # dla debug
155
+ })
156
+
157
+ @app.post("/validate_url")
158
+ async def validate_url(
159
+ url: str = Form(...),
160
+ max_images: int = Form(3)
161
+ ):
162
+ try:
163
+ import numpy as np
164
+ from io import BytesIO
165
+ import requests
166
+
167
+ max_images = max(1, min(max_images, 10))
168
+
169
+ # 1. Scraper
170
+ if "allegro.pl" in url:
171
+ from web_scraper_allegro import scrape_allegro_offer
172
+ auction = scrape_allegro_offer(url)
173
+ elif "olx.pl" in url:
174
+ from web_scraper_olx import scrape_olx_offer
175
+ auction = scrape_olx_offer(url)
176
+ elif "ebay." in url:
177
+ from web_scraper_ebay import scrape_ebay_offer
178
+ auction = scrape_ebay_offer(url)
179
+ else:
180
+ return JSONResponse({"error": "Unsupported platform"}, status_code=400)
181
+
182
+ if not auction.get("image_urls"):
183
+ return JSONResponse({"error": "No images"}, status_code=400)
184
+
185
+ # 2. Ile zdjęć
186
+ total_available = len(auction["image_urls"])
187
+ images_to_use = min(max_images, total_available)
188
+
189
+ # 3. Model BEZ HTTP (bezpośrednio!)
190
+ img_probs = []
191
+ text = auction["title"] + " " + auction["description"]
192
+
193
+ for i, img_url in enumerate(auction["image_urls"][:images_to_use]):
194
+ print(f"📸 {i+1}/{images_to_use}")
195
+
196
+ img_resp = requests.get(img_url, timeout=15)
197
+ img_resp.raise_for_status()
198
+
199
+ img = Image.open(BytesIO(img_resp.content)).convert('RGB')
200
+ img_tensor = transform(img).unsqueeze(0).to(DEVICE)
201
+
202
+ with torch.no_grad():
203
+ logits = model(img_tensor, [text])
204
+ probs = torch.softmax(logits, dim=1)[0]
205
+
206
+ img_probs.append({
207
+ "original_probability": float(probs[0]),
208
+ "scam_probability": float(probs[1]),
209
+ "replica_probability": float(probs[2])
210
+ })
211
+
212
+ # 4. Średnia
213
+ avg_orig = np.mean([p["original_probability"] for p in img_probs])
214
+ avg_scam = np.mean([p["scam_probability"] for p in img_probs])
215
+ avg_repl = np.mean([p["replica_probability"] for p in img_probs])
216
+
217
+ probs_dict = {"ORIGINAL": avg_orig, "SCAM": avg_scam, "REPLICA": avg_repl}
218
+ best_label = max(probs_dict, key=probs_dict.get)
219
+ best_prob = float(probs_dict[best_label])
220
+
221
+ sorted_probs = sorted(probs_dict.values(), reverse=True)
222
+ margin = float(sorted_probs[0] - sorted_probs[1])
223
+
224
+ if best_prob < 0.6 or margin < 0.15:
225
+ verdict = "UNCERTAIN"
226
+ else:
227
+ verdict = best_label
228
+
229
+ return {
230
+ "status": "success",
231
+ "url": url,
232
+ "title": auction["title"][:100] + "...",
233
+ "platform": auction["platform"],
234
+ "total_images_available": total_available,
235
+ "requested_max_images": max_images,
236
+ "image_count_used": images_to_use,
237
+ "original_probability": round(avg_orig, 3),
238
+ "scam_probability": round(avg_scam, 3),
239
+ "replica_probability": round(avg_repl, 3),
240
+ "verdict": verdict,
241
+ "confidence": round(best_prob, 3),
242
+ "margin": round(margin, 3)
243
+ }
244
+
245
+ except Exception as e:
246
+ import traceback
247
+ return JSONResponse({
248
+ "status": "error",
249
+ "error": str(e),
250
+ "traceback": traceback.format_exc()
251
+ }, status_code=500)
252
+
253
+
254
+ @app.get("/health")
255
+ def health():
256
+ return {"status": "ok", "message": "API running"}
257
+
258
+ @app.get("/")
259
+ def root():
260
+ return {
261
+ "name": "Antique Auction Authenticity API",
262
+ "version": "1.0.0",
263
+ "endpoints": {
264
+ "POST /predict": "Oceń aukcję",
265
+ "GET /health": "Health check"
266
+ }
267
+ }
268
+
269
+ if __name__ == '__main__':
270
+ import uvicorn
271
+ uvicorn.run(app, host='0.0.0.0', port=7860)
code/dataset_loader.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from pathlib import Path
3
+ from PIL import Image
4
+ import torch
5
+ from torch.utils.data import Dataset, DataLoader
6
+ from torchvision import transforms
7
+
8
+ class AuctionDatasetFromJSON(Dataset):
9
+ def __init__(self, json_path: str, root_dir: str, transform=None, max_samples=None):
10
+ """
11
+ json_path: dataset/dataset.json
12
+ root_dir: dataset/raw_data
13
+ """
14
+ with open(json_path, 'r', encoding='utf-8') as f:
15
+ self.data = json.load(f)
16
+
17
+ if max_samples:
18
+ self.data = self.data[:max_samples]
19
+
20
+ self.root_dir = Path(root_dir)
21
+ self.transform = transform
22
+
23
+ def __len__(self):
24
+ return len(self.data)
25
+
26
+ def __getitem__(self, idx):
27
+ auction = self.data[idx]
28
+
29
+ # Ścieżka do zdjęcia
30
+ img_path = self.root_dir / auction['folder_path'] / auction['images'][0]
31
+
32
+ try:
33
+ img = Image.open(img_path).convert('RGB')
34
+ except Exception as e:
35
+ print(f"Błąd wczytywania {img_path}: {e}")
36
+ # Fallback: czarne zdjęcie
37
+ img = Image.new('RGB', (224, 224), color='black')
38
+
39
+ if self.transform:
40
+ img = self.transform(img)
41
+
42
+ # Tekst: title + opis
43
+ text = f"{auction.get('title', '')} {auction.get('description', '')}"
44
+
45
+ return {
46
+ 'image': img,
47
+ 'text': text,
48
+ 'platform': auction['platform'],
49
+ 'title': auction['title'],
50
+ 'id': auction['id'],
51
+ 'label': torch.tensor(auction.get('label', 0), dtype=torch.long),
52
+ 'folder_path': auction['folder_path']
53
+ }
54
+
55
+ # Transformacje
56
+ get_transforms = lambda: transforms.Compose([
57
+ transforms.Resize((224, 224)),
58
+ transforms.ToTensor(),
59
+ transforms.Normalize(
60
+ mean=[0.485, 0.456, 0.406],
61
+ std=[0.229, 0.224, 0.225]
62
+ )
63
+ ])
64
+
65
+ if __name__ == '__main__':
66
+ print("Testowanie DataLoadera...")
67
+
68
+ dataset = AuctionDatasetFromJSON(
69
+ json_path='../dataset/dataset.json',
70
+ root_dir='../dataset/raw_data',
71
+ transform=get_transforms(),
72
+ max_samples=5
73
+ )
74
+
75
+ print(f"✓ Dataset załadowany: {len(dataset)} próbek")
76
+
77
+ loader = DataLoader(dataset, batch_size=2, shuffle=True, num_workers=0)
78
+
79
+ for batch in loader:
80
+ print(f"\nBatch:")
81
+ print(f" - Image shape: {batch['image'].shape}")
82
+ print(f" - Texts: {len(batch['text'])}")
83
+ print(f" - Platforms: {batch['platform']}")
84
+ print(f" - Labels: {batch['label']}")
85
+ print(f" - Example text: {batch['text'][0][:100]}...")
86
+ break
code/evaluate_live.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # evaluate_live.py
2
+ import requests
3
+ from io import BytesIO
4
+
5
+ # Import Twoich scraperów
6
+ from web_scraper_allegro import scrape_allegro_offer
7
+ from web_scraper_olx import scrape_olx_offer
8
+ from web_scraper_ebay import scrape_ebay_offer
9
+
10
+ API_URL = "http://localhost:7860/predict"
11
+
12
+ def call_model(auction):
13
+ if not auction.get("image_urls"):
14
+ return {"error": "No images found"}
15
+
16
+ img_url = auction["image_urls"][0]
17
+ print(f"📸 Pobieram zdjęcie: {img_url}")
18
+
19
+ img_resp = requests.get(img_url, timeout=20)
20
+ img_resp.raise_for_status()
21
+
22
+ files = {
23
+ "image": ("image.jpg", BytesIO(img_resp.content), "image/jpeg")
24
+ }
25
+ data = {
26
+ "title": auction.get("title", ""),
27
+ "description": auction.get("description", "")
28
+ }
29
+
30
+ r = requests.post(API_URL, files=files, data=data, timeout=120)
31
+ r.raise_for_status()
32
+ return r.json()
33
+
34
+ def scrape_offer(url: str):
35
+ """Automatycznie wybiera scraper na podstawie domeny"""
36
+ if "allegro.pl" in url:
37
+ return scrape_allegro_offer(url)
38
+ elif "olx.pl" in url:
39
+ return scrape_olx_offer(url)
40
+ elif "ebay." in url:
41
+ return scrape_ebay_offer(url)
42
+ else:
43
+ raise ValueError("Nieobsługiwana platforma")
44
+
45
+ def evaluate_url(url: str):
46
+ """Pełny pipeline: scrape → model → wynik"""
47
+ print(f"🔍 Analizuję: {url}")
48
+
49
+ auction = scrape_offer(url)
50
+ print(f"📋 Zebrane: {auction['title'][:50]}...")
51
+
52
+ model_result = call_model(auction)
53
+
54
+ return {
55
+ "url": url,
56
+ "platform": auction["platform"],
57
+ "title": auction["title"],
58
+ "model_result": model_result,
59
+ }
60
+
61
+ if __name__ == "__main__":
62
+ while True:
63
+ url = input("\nPodaj link do aukcji (lub 'q' do wyjścia): ")
64
+ if url.lower() == 'q':
65
+ break
66
+
67
+ try:
68
+ result = evaluate_url(url)
69
+ print("\n" + "="*80)
70
+ print(f"VERDICT: {result['model_result'].get('verdict')}")
71
+ print(f"CONFIDENCE: {result['model_result'].get('confidence')}")
72
+ print("="*80)
73
+ except Exception as e:
74
+ print(f"❌ Błąd: {e}")
code/labeling_app/labeling_app.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, render_template, request, jsonify, send_file
2
+ import json
3
+ import os
4
+ from pathlib import Path
5
+
6
+ app = Flask(__name__)
7
+
8
+ # WAŻNE: ustaw ścieżkę POPRAWNIE (zależy gdzie masz folder)
9
+ DATASET_PATH = Path(__file__).parent.parent.parent / 'dataset' / 'dataset.json'
10
+ RAW_DATA_PATH = Path(__file__).parent.parent.parent / 'dataset' / 'raw_data'
11
+
12
+ print(f"Dataset path: {DATASET_PATH}")
13
+ print(f"Raw data path: {RAW_DATA_PATH}")
14
+
15
+ def load_dataset():
16
+ with open(DATASET_PATH, 'r', encoding='utf-8') as f:
17
+ return json.load(f)
18
+
19
+ def save_dataset(data):
20
+ with open(DATASET_PATH, 'w', encoding='utf-8') as f:
21
+ json.dump(data, f, indent=2, ensure_ascii=False)
22
+
23
+ @app.route('/')
24
+ def index():
25
+ dataset = load_dataset()
26
+ return render_template('labeling.html', total_auctions=len(dataset))
27
+
28
+ @app.route('/image/<path:image_path>')
29
+ def serve_image(image_path):
30
+ """Serwuj zdjęcie"""
31
+ full_path = RAW_DATA_PATH / image_path
32
+ print(f"Szukam: {full_path}")
33
+ if full_path.exists():
34
+ return send_file(full_path)
35
+ return "Not found", 404
36
+
37
+ @app.route('/api/next_unlabeled')
38
+ def next_unlabeled():
39
+ dataset = load_dataset()
40
+
41
+ for i, auction in enumerate(dataset):
42
+ if auction.get('label_confidence', 0) == 0:
43
+ # Przygotuj WSZYSTKIE zdjęcia
44
+ images = []
45
+ for img_name in auction['images']:
46
+ img_path = f"{auction['folder_path']}/{img_name}"
47
+ images.append(f"/image/{img_path}")
48
+
49
+ return jsonify({
50
+ 'index': i,
51
+ 'id': auction['id'],
52
+ 'title': auction['title'],
53
+ 'description': auction['description'][:300] + '...',
54
+ 'platform': auction['platform'],
55
+ 'link': auction['link'],
56
+ 'parameters': auction.get('parameters', {}),
57
+ 'images': images,
58
+ 'total': len(dataset),
59
+ 'current': i + 1
60
+ })
61
+
62
+ return jsonify({'error': 'Wszystkie aukcje etykietowane!'})
63
+
64
+ @app.route('/api/save_label', methods=['POST'])
65
+ def save_label():
66
+ data = request.json
67
+ dataset = load_dataset()
68
+
69
+ auction_index = data['auction_index']
70
+ dataset[auction_index]['label'] = data['label']
71
+ dataset[auction_index]['label_confidence'] = data['confidence']
72
+
73
+ save_dataset(dataset)
74
+ return jsonify({'status': 'ok'})
75
+
76
+ @app.route('/api/stats')
77
+ def get_stats():
78
+ dataset = load_dataset()
79
+
80
+ total = len(dataset)
81
+ labeled = len([a for a in dataset if a.get('label_confidence', 0) > 0])
82
+ unlabeled = total - labeled
83
+
84
+ by_label = {
85
+ 'ORIGINAL': len([a for a in dataset if a.get('label') == 0]),
86
+ 'SCAM': len([a for a in dataset if a.get('label') == 1]),
87
+ 'REPLICA': len([a for a in dataset if a.get('label') == 2])
88
+ }
89
+
90
+ return jsonify({
91
+ 'total': total,
92
+ 'labeled': labeled,
93
+ 'unlabeled': unlabeled,
94
+ 'by_label': by_label,
95
+ 'progress': round(labeled / total * 100, 1) if total > 0 else 0
96
+ })
97
+
98
+ if __name__ == '__main__':
99
+ app.run(debug=True, port=5000)
code/labeling_app/templates/labeling.html ADDED
@@ -0,0 +1,660 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="pl">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Labeling Aukcji Antyków</title>
7
+ <style>
8
+ * {
9
+ margin: 0;
10
+ padding: 0;
11
+ box-sizing: border-box;
12
+ }
13
+
14
+ body {
15
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
16
+ background: #f5f5f5;
17
+ padding: 20px;
18
+ }
19
+
20
+ .container {
21
+ max-width: 1200px;
22
+ margin: 0 auto;
23
+ }
24
+
25
+ header {
26
+ background: white;
27
+ padding: 20px;
28
+ border-radius: 8px;
29
+ margin-bottom: 20px;
30
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
31
+ display: flex;
32
+ justify-content: space-between;
33
+ align-items: center;
34
+ }
35
+
36
+ h1 {
37
+ font-size: 28px;
38
+ color: #333;
39
+ }
40
+
41
+ .progress-bar {
42
+ width: 300px;
43
+ height: 8px;
44
+ background: #e0e0e0;
45
+ border-radius: 4px;
46
+ overflow: hidden;
47
+ }
48
+
49
+ .progress-fill {
50
+ height: 100%;
51
+ background: #4CAF50;
52
+ transition: width 0.3s;
53
+ }
54
+
55
+ .progress-text {
56
+ font-size: 14px;
57
+ color: #666;
58
+ margin-top: 8px;
59
+ }
60
+
61
+ .main-content {
62
+ display: grid;
63
+ grid-template-columns: 2fr 1fr;
64
+ gap: 20px;
65
+ }
66
+
67
+ .auction-card {
68
+ background: white;
69
+ border-radius: 8px;
70
+ padding: 20px;
71
+ box-shadow: 0 2px 8px rgba(0,0,0,0.1);
72
+ }
73
+
74
+ .auction-header {
75
+ margin-bottom: 20px;
76
+ border-bottom: 1px solid #e0e0e0;
77
+ padding-bottom: 15px;
78
+ }
79
+
80
+ .auction-title {
81
+ font-size: 20px;
82
+ font-weight: 600;
83
+ color: #333;
84
+ margin-bottom: 8px;
85
+ }
86
+
87
+ .auction-meta {
88
+ display: flex;
89
+ gap: 15px;
90
+ font-size: 12px;
91
+ color: #888;
92
+ }
93
+
94
+ .badge {
95
+ background: #f0f0f0;
96
+ padding: 4px 8px;
97
+ border-radius: 4px;
98
+ }
99
+
100
+ .images-carousel {
101
+ margin-bottom: 20px;
102
+ border-radius: 8px;
103
+ overflow: hidden;
104
+ background: #f9f9f9;
105
+ max-height: 400px;
106
+ display: flex;
107
+ align-items: center;
108
+ justify-content: center;
109
+ }
110
+
111
+ .images-carousel img {
112
+ max-width: 100%;
113
+ max-height: 400px;
114
+ object-fit: contain;
115
+ }
116
+
117
+ .description {
118
+ background: #f9f9f9;
119
+ padding: 15px;
120
+ border-radius: 6px;
121
+ margin-bottom: 20px;
122
+ line-height: 1.6;
123
+ color: #555;
124
+ font-size: 14px;
125
+ }
126
+
127
+ .parameters {
128
+ background: #f9f9f9;
129
+ padding: 15px;
130
+ border-radius: 6px;
131
+ margin-bottom: 20px;
132
+ }
133
+
134
+ .parameters h3 {
135
+ font-size: 14px;
136
+ color: #333;
137
+ margin-bottom: 10px;
138
+ }
139
+
140
+ .param-item {
141
+ display: flex;
142
+ justify-content: space-between;
143
+ padding: 8px 0;
144
+ border-bottom: 1px solid #e0e0e0;
145
+ font-size: 13px;
146
+ }
147
+
148
+ .param-item:last-child {
149
+ border-bottom: none;
150
+ }
151
+
152
+ .param-key {
153
+ color: #666;
154
+ font-weight: 500;
155
+ }
156
+
157
+ .param-val {
158
+ color: #333;
159
+ }
160
+
161
+ .sidebar {
162
+ display: flex;
163
+ flex-direction: column;
164
+ gap: 20px;
165
+ }
166
+
167
+ .rating-section {
168
+ background: white;
169
+ padding: 20px;
170
+ border-radius: 8px;
171
+ box-shadow: 0 2px 8px rgba(0,0,0,0.1);
172
+ }
173
+
174
+ .rating-section h2 {
175
+ font-size: 16px;
176
+ margin-bottom: 15px;
177
+ color: #333;
178
+ }
179
+
180
+ .button-group {
181
+ display: flex;
182
+ flex-direction: column;
183
+ gap: 10px;
184
+ margin-bottom: 20px;
185
+ }
186
+
187
+ .btn {
188
+ padding: 12px 16px;
189
+ border: none;
190
+ border-radius: 6px;
191
+ font-size: 14px;
192
+ font-weight: 600;
193
+ cursor: pointer;
194
+ transition: all 0.2s;
195
+ }
196
+
197
+ .btn-original {
198
+ background: #4CAF50;
199
+ color: white;
200
+ }
201
+
202
+ .btn-original:hover {
203
+ background: #45a049;
204
+ transform: translateY(-2px);
205
+ }
206
+
207
+ .btn-original.active {
208
+ box-shadow: 0 4px 12px rgba(76, 175, 80, 0.4);
209
+ }
210
+
211
+ .btn-scam {
212
+ background: #f44336;
213
+ color: white;
214
+ }
215
+
216
+ .btn-scam:hover {
217
+ background: #da190b;
218
+ transform: translateY(-2px);
219
+ }
220
+
221
+ .btn-scam.active {
222
+ box-shadow: 0 4px 12px rgba(244, 67, 54, 0.4);
223
+ }
224
+
225
+ .btn-replica {
226
+ background: #FF9800;
227
+ color: white;
228
+ }
229
+
230
+ .btn-replica:hover {
231
+ background: #e68900;
232
+ transform: translateY(-2px);
233
+ }
234
+
235
+ .btn-replica.active {
236
+ box-shadow: 0 4px 12px rgba(255, 152, 0, 0.4);
237
+ }
238
+
239
+ .confidence-section {
240
+ background: #f9f9f9;
241
+ padding: 15px;
242
+ border-radius: 6px;
243
+ margin-bottom: 15px;
244
+ }
245
+
246
+ .confidence-label {
247
+ font-size: 13px;
248
+ color: #666;
249
+ margin-bottom: 10px;
250
+ display: flex;
251
+ justify-content: space-between;
252
+ }
253
+
254
+ .confidence-value {
255
+ font-weight: 600;
256
+ color: #333;
257
+ }
258
+
259
+ .confidence-slider {
260
+ width: 100%;
261
+ }
262
+
263
+ .btn-save {
264
+ background: #2196F3;
265
+ color: white;
266
+ width: 100%;
267
+ padding: 14px;
268
+ font-size: 15px;
269
+ font-weight: 600;
270
+ }
271
+
272
+ .btn-save:hover {
273
+ background: #0b7dda;
274
+ }
275
+
276
+ .btn-save:disabled {
277
+ background: #ccc;
278
+ cursor: not-allowed;
279
+ }
280
+
281
+ .stats-card {
282
+ background: white;
283
+ padding: 15px;
284
+ border-radius: 8px;
285
+ box-shadow: 0 2px 8px rgba(0,0,0,0.1);
286
+ }
287
+
288
+ .stat-item {
289
+ display: flex;
290
+ justify-content: space-between;
291
+ padding: 10px 0;
292
+ border-bottom: 1px solid #e0e0e0;
293
+ font-size: 13px;
294
+ }
295
+
296
+ .stat-item:last-child {
297
+ border-bottom: none;
298
+ }
299
+
300
+ .stat-label {
301
+ color: #666;
302
+ }
303
+
304
+ .stat-value {
305
+ font-weight: 600;
306
+ color: #333;
307
+ }
308
+
309
+ .message {
310
+ padding: 15px;
311
+ border-radius: 6px;
312
+ margin-bottom: 20px;
313
+ text-align: center;
314
+ font-weight: 500;
315
+ }
316
+
317
+ .message.success {
318
+ background: #d4edda;
319
+ color: #155724;
320
+ }
321
+
322
+ .message.error {
323
+ background: #f8d7da;
324
+ color: #721c24;
325
+ }
326
+ .images-container {
327
+ margin-bottom: 20px;
328
+ }
329
+
330
+ .images-carousel {
331
+ margin-bottom: 10px;
332
+ border-radius: 8px;
333
+ overflow: hidden;
334
+ background: #f9f9f9;
335
+ max-height: 400px;
336
+ display: flex;
337
+ align-items: center;
338
+ justify-content: center;
339
+ }
340
+
341
+ .images-carousel img {
342
+ max-width: 100%;
343
+ max-height: 400px;
344
+ object-fit: contain;
345
+ }
346
+
347
+ .images-controls {
348
+ display: flex;
349
+ justify-content: space-between;
350
+ align-items: center;
351
+ margin-bottom: 10px;
352
+ }
353
+
354
+ .nav-btn {
355
+ background: #2196F3;
356
+ color: white;
357
+ border: none;
358
+ padding: 8px 12px;
359
+ border-radius: 4px;
360
+ cursor: pointer;
361
+ font-size: 13px;
362
+ }
363
+
364
+ .nav-btn:hover {
365
+ background: #0b7dda;
366
+ }
367
+
368
+ .nav-btn:disabled {
369
+ background: #ccc;
370
+ cursor: not-allowed;
371
+ }
372
+
373
+ .image-counter {
374
+ font-size: 12px;
375
+ color: #666;
376
+ font-weight: 600;
377
+ }
378
+
379
+ .thumbnails {
380
+ display: flex;
381
+ gap: 8px;
382
+ overflow-x: auto;
383
+ padding: 10px 0;
384
+ }
385
+
386
+ .thumbnail {
387
+ width: 60px;
388
+ height: 60px;
389
+ border: 2px solid #e0e0e0;
390
+ border-radius: 4px;
391
+ cursor: pointer;
392
+ overflow: hidden;
393
+ flex-shrink: 0;
394
+ }
395
+
396
+ .thumbnail img {
397
+ width: 100%;
398
+ height: 100%;
399
+ object-fit: cover;
400
+ }
401
+
402
+ .thumbnail.active {
403
+ border-color: #2196F3;
404
+ }
405
+
406
+
407
+ </style>
408
+ </head>
409
+ <body>
410
+ <div class="container">
411
+ <header>
412
+ <h1>🏺 Labeling Aukcji Antyków</h1>
413
+ <div>
414
+ <div class="progress-bar">
415
+ <div class="progress-fill" id="progressFill"></div>
416
+ </div>
417
+ <div class="progress-text">
418
+ <span id="progressText">0 / 0</span>
419
+ </div>
420
+ </div>
421
+ </header>
422
+
423
+ <div class="main-content">
424
+ <div>
425
+ <div id="message"></div>
426
+
427
+ <div class="auction-card">
428
+ <div class="auction-header">
429
+ <div class="auction-title" id="title">Ładowanie...</div>
430
+ <div class="auction-meta">
431
+ <span class="badge" id="platform">-</span>
432
+ <span class="badge" id="position">-</span>
433
+ </div>
434
+ </div>
435
+
436
+ <div class="images-container">
437
+ <div class="images-carousel" id="imagesCarousel">
438
+ <img id="currentImage" src="" alt="Zdjęcie aukcji">
439
+ </div>
440
+ <div class="images-controls">
441
+ <button id="prevBtn" class="nav-btn">← Poprzednie</button>
442
+ <span id="imageCounter" class="image-counter">1 / 1</span>
443
+ <button id="nextBtn" class="nav-btn">Następne →</button>
444
+ </div>
445
+ <div class="thumbnails" id="thumbnails"></div>
446
+ </div>
447
+
448
+ <div class="description" id="description">-</div>
449
+
450
+ <div class="parameters" id="parametersDiv">
451
+ <h3>Parametry:</h3>
452
+ <div id="paramsList"></div>
453
+ </div>
454
+
455
+ <a id="auctionLink" target="_blank" style="color: #2196F3; text-decoration: none;">
456
+ → Otwórz aukcję
457
+ </a>
458
+ </div>
459
+ </div>
460
+
461
+ <div class="sidebar">
462
+ <div class="rating-section">
463
+ <h2>Ocena autentyczności</h2>
464
+
465
+ <div class="button-group">
466
+ <button class="btn btn-original" data-label="0">✓ ORYGINAŁ</button>
467
+ <button class="btn btn-scam" data-label="1">✗ SCAM</button>
468
+ <button class="btn btn-replica" data-label="2">⚙ REPLIKA</button>
469
+ </div>
470
+
471
+ <div class="confidence-section">
472
+ <div class="confidence-label">
473
+ <span>Pewność oceny:</span>
474
+ <span class="confidence-value" id="confidenceValue">0%</span>
475
+ </div>
476
+ <input type="range" min="0" max="5" value="0" class="confidence-slider" id="confidenceSlider">
477
+ <div style="display: flex; justify-content: space-between; font-size: 11px; color: #999; margin-top: 5px;">
478
+ <span>Niska</span>
479
+ <span>Bardzo wysoka</span>
480
+ </div>
481
+ </div>
482
+
483
+ <button class="btn btn-save" id="saveBtn" disabled>💾 Zapisz i Dalej</button>
484
+ </div>
485
+
486
+ <div class="stats-card">
487
+ <h3 style="margin-bottom: 15px; color: #333;">Statystyki</h3>
488
+ <div class="stat-item">
489
+ <span class="stat-label">Razem:</span>
490
+ <span class="stat-value" id="statTotal">0</span>
491
+ </div>
492
+ <div class="stat-item">
493
+ <span class="stat-label">Etykietowane:</span>
494
+ <span class="stat-value" id="statLabeled">0</span>
495
+ </div>
496
+ <div class="stat-item">
497
+ <span class="stat-label">Do zrobienia:</span>
498
+ <span class="stat-value" id="statUnlabeled">0</span>
499
+ </div>
500
+ <div class="stat-item">
501
+ <span class="stat-label">🏛 Oryginały:</span>
502
+ <span class="stat-value" id="statOriginal">0</span>
503
+ </div>
504
+ <div class="stat-item">
505
+ <span class="stat-label">🚫 Scamy:</span>
506
+ <span class="stat-value" id="statScam">0</span>
507
+ </div>
508
+ <div class="stat-item">
509
+ <span class="stat-label">⚙ Repliki:</span>
510
+ <span class="stat-value" id="statReplica">0</span>
511
+ </div>
512
+ </div>
513
+ </div>
514
+ </div>
515
+ </div>
516
+
517
+ <script>
518
+ let currentAuction = null;
519
+ let selectedLabel = null;
520
+ let selectedConfidence = 0;
521
+ let currentImageIndex = 0;
522
+
523
+ async function loadNextAuction() {
524
+ const res = await fetch('/api/next_unlabeled');
525
+ const data = await res.json();
526
+
527
+ if (data.error) {
528
+ showMessage(data.error, 'success');
529
+ document.querySelector('.auction-card').style.display = 'none';
530
+ return;
531
+ }
532
+
533
+ currentAuction = data;
534
+ currentImageIndex = 0;
535
+ selectedLabel = null;
536
+ selectedConfidence = 0;
537
+
538
+ // Pokaż dane
539
+ document.getElementById('title').textContent = data.title;
540
+ document.getElementById('platform').textContent = data.platform.toUpperCase();
541
+ document.getElementById('position').textContent = `${data.current} / ${data.total}`;
542
+ document.getElementById('description').textContent = data.description;
543
+ document.getElementById('auctionLink').href = data.link;
544
+
545
+ // Parametry
546
+ const paramsList = document.getElementById('paramsList');
547
+ paramsList.innerHTML = '';
548
+ Object.entries(data.parameters).forEach(([key, val]) => {
549
+ const item = document.createElement('div');
550
+ item.className = 'param-item';
551
+ item.innerHTML = `<span class="param-key">${key}:</span><span class="param-val">${val}</span>`;
552
+ paramsList.appendChild(item);
553
+ });
554
+
555
+ // Thumbnails
556
+ const thumbnails = document.getElementById('thumbnails');
557
+ thumbnails.innerHTML = '';
558
+ data.images.forEach((imgUrl, idx) => {
559
+ const thumb = document.createElement('div');
560
+ thumb.className = 'thumbnail' + (idx === 0 ? ' active' : '');
561
+ thumb.innerHTML = `<img src="${imgUrl}" alt="Thumbnail ${idx+1}">`;
562
+ thumb.addEventListener('click', () => showImage(idx));
563
+ thumbnails.appendChild(thumb);
564
+ });
565
+
566
+ showImage(0);
567
+ updateSaveButton();
568
+ updateStats();
569
+ }
570
+
571
+ function showImage(index) {
572
+ if (index < 0 || index >= currentAuction.images.length) return;
573
+
574
+ currentImageIndex = index;
575
+ document.getElementById('currentImage').src = currentAuction.images[index];
576
+ document.getElementById('imageCounter').textContent = `${index + 1} / ${currentAuction.images.length}`;
577
+
578
+ // Zaznacz thumbnail
579
+ document.querySelectorAll('.thumbnail').forEach((t, i) => {
580
+ t.classList.toggle('active', i === index);
581
+ });
582
+
583
+ // Włącz/wyłącz przyciski
584
+ document.getElementById('prevBtn').disabled = index === 0;
585
+ document.getElementById('nextBtn').disabled = index === currentAuction.images.length - 1;
586
+ }
587
+
588
+ document.getElementById('prevBtn').addEventListener('click', () => {
589
+ showImage(currentImageIndex - 1);
590
+ });
591
+
592
+ document.getElementById('nextBtn').addEventListener('click', () => {
593
+ showImage(currentImageIndex + 1);
594
+ });
595
+
596
+ document.querySelectorAll('.btn-original, .btn-scam, .btn-replica').forEach(btn => {
597
+ btn.addEventListener('click', function() {
598
+ selectedLabel = parseInt(this.dataset.label);
599
+ document.querySelectorAll('.btn-original, .btn-scam, .btn-replica').forEach(b => b.classList.remove('active'));
600
+ this.classList.add('active');
601
+ updateSaveButton();
602
+ });
603
+ });
604
+
605
+ document.getElementById('confidenceSlider').addEventListener('input', function() {
606
+ selectedConfidence = parseInt(this.value);
607
+ const labels = ['0%', '20%', '40%', '60%', '80%', '100%'];
608
+ document.getElementById('confidenceValue').textContent = labels[selectedConfidence];
609
+ updateSaveButton();
610
+ });
611
+
612
+ function updateSaveButton() {
613
+ const canSave = selectedLabel !== null && selectedConfidence > 0;
614
+ document.getElementById('saveBtn').disabled = !canSave;
615
+ }
616
+
617
+ document.getElementById('saveBtn').addEventListener('click', async function() {
618
+ const res = await fetch('/api/save_label', {
619
+ method: 'POST',
620
+ headers: {'Content-Type': 'application/json'},
621
+ body: JSON.stringify({
622
+ auction_index: currentAuction.index,
623
+ label: selectedLabel,
624
+ confidence: selectedConfidence
625
+ })
626
+ });
627
+
628
+ if (res.ok) {
629
+ loadNextAuction();
630
+ }
631
+ });
632
+
633
+ async function updateStats() {
634
+ const res = await fetch('/api/stats');
635
+ const stats = await res.json();
636
+
637
+ document.getElementById('progressText').textContent = `${stats.labeled} / ${stats.total}`;
638
+ document.getElementById('progressFill').style.width = stats.progress + '%';
639
+
640
+ document.getElementById('statTotal').textContent = stats.total;
641
+ document.getElementById('statLabeled').textContent = stats.labeled;
642
+ document.getElementById('statUnlabeled').textContent = stats.unlabeled;
643
+ document.getElementById('statOriginal').textContent = stats.by_label.ORIGINAL;
644
+ document.getElementById('statScam').textContent = stats.by_label.SCAM;
645
+ document.getElementById('statReplica').textContent = stats.by_label.REPLICA;
646
+ }
647
+
648
+ function showMessage(text, type) {
649
+ const msgDiv = document.getElementById('message');
650
+ msgDiv.textContent = text;
651
+ msgDiv.className = `message ${type}`;
652
+ msgDiv.style.display = 'block';
653
+ setTimeout(() => msgDiv.style.display = 'none', 3000);
654
+ }
655
+
656
+ loadNextAuction();
657
+
658
+ </script>
659
+ </body>
660
+ </html>
code/model.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model.py
2
+ import torch
3
+ import torch.nn as nn
4
+ from transformers import DistilBertTokenizer, DistilBertModel
5
+ from torchvision.models import efficientnet_b0
6
+
7
+ class AuctionAuthenticityModel(nn.Module):
8
+ def __init__(self, num_classes=3, device='cpu'): # 3 klasy!
9
+ super().__init__()
10
+ self.device = device
11
+
12
+ # Vision
13
+ self.vision_model = efficientnet_b0(pretrained=True)
14
+ self.vision_model.classifier = nn.Identity()
15
+ vision_out_dim = 1280
16
+
17
+ # Text
18
+ self.text_model = DistilBertModel.from_pretrained(
19
+ 'distilbert-base-multilingual-cased'
20
+ )
21
+ text_out_dim = 768
22
+
23
+ self.tokenizer = DistilBertTokenizer.from_pretrained(
24
+ 'distilbert-base-multilingual-cased'
25
+ )
26
+
27
+ # Fusion (bez BatchNorm!)
28
+ hidden_dim = 256
29
+ self.fusion = nn.Sequential(
30
+ nn.Linear(vision_out_dim + text_out_dim, hidden_dim),
31
+ nn.ReLU(),
32
+ nn.Dropout(0.3),
33
+ nn.Linear(hidden_dim, 128),
34
+ nn.ReLU(),
35
+ nn.Dropout(0.2),
36
+ nn.Linear(128, num_classes)
37
+ )
38
+
39
+ def forward(self, images, texts):
40
+ vision_features = self.vision_model(images)
41
+ tokens = self.tokenizer(
42
+ texts, padding=True, truncation=True, max_length=512, return_tensors='pt'
43
+ ).to(self.device)
44
+ text_outputs = self.text_model(**tokens)
45
+ text_features = text_outputs.last_hidden_state[:, 0, :]
46
+
47
+ combined = torch.cat([vision_features, text_features], dim=1)
48
+ logits = self.fusion(combined)
49
+ return logits
50
+
51
+ def count_parameters(self):
52
+ return sum(p.numel() for p in self.parameters() if p.requires_grad)
53
+
54
+
55
+ if __name__ == '__main__':
56
+ print("Testowanie modelu...")
57
+
58
+ device = torch.device('cpu')
59
+ model = AuctionAuthenticityModel(device=device).to(device)
60
+
61
+ print(f"✓ Model stworzony")
62
+ print(f" - Parametrów: {model.count_parameters():,}")
63
+
64
+ # Dummy test
65
+ dummy_img = torch.randn(2, 3, 224, 224).to(device)
66
+ dummy_texts = ["Silver spoon antique", "Polish silverware 19th century"]
67
+
68
+ with torch.no_grad():
69
+ output = model(dummy_img, dummy_texts)
70
+
71
+ print(f"✓ Forward pass: {output.shape}")
72
+ print(f" - Output: {output}")
73
+
74
+ # Estimate model size
75
+ print(f"\n📊 Rozmiar modelu:")
76
+ torch.save(model.state_dict(), 'temp_model.pt')
77
+ import os
78
+ size_mb = os.path.getsize('temp_model.pt') / (1024*1024)
79
+ print(f" - {size_mb:.1f} MB")
80
+ os.remove('temp_model.pt')
code/parse_auction_data.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ from pathlib import Path
4
+ from typing import Dict, List
5
+
6
+ def parse_info_txt(info_path: str) -> Dict:
7
+ """
8
+ Parsuje info.txt z aukcji
9
+ """
10
+ with open(info_path, 'r', encoding='utf-8') as f:
11
+ content = f.read()
12
+
13
+ metadata = {}
14
+
15
+ # TITLE
16
+ if 'TITLE:' in content:
17
+ title_start = content.find('TITLE:') + len('TITLE:')
18
+ title_end = content.find('\n', title_start)
19
+ metadata['title'] = content[title_start:title_end].strip()
20
+ else:
21
+ metadata['title'] = 'Unknown'
22
+
23
+ # LINK
24
+ if 'LINK:' in content:
25
+ link_start = content.find('LINK:') + len('LINK:')
26
+ link_end = content.find('\n', link_start)
27
+ metadata['link'] = content[link_start:link_end].strip()
28
+ else:
29
+ metadata['link'] = ''
30
+
31
+ # PARAMETERS
32
+ metadata['parameters'] = {}
33
+ if 'PARAMETERS:' in content:
34
+ params_start = content.find('PARAMETERS:') + len('PARAMETERS:')
35
+ params_end = content.find('----', params_start)
36
+ if params_end == -1:
37
+ params_end = content.find('DESCRIPTION:', params_start)
38
+
39
+ params_text = content[params_start:params_end]
40
+
41
+ for line in params_text.split('\n'):
42
+ if line.strip().startswith('*'):
43
+ line_clean = line.strip()[2:]
44
+ if ':' in line_clean:
45
+ key, value = line_clean.split(':', 1)
46
+ metadata['parameters'][key.strip()] = value.strip()
47
+
48
+ # DESCRIPTION
49
+ if 'DESCRIPTION:' in content:
50
+ desc_start = content.find('DESCRIPTION:') + len('DESCRIPTION:')
51
+ metadata['description'] = content[desc_start:].strip()
52
+ else:
53
+ metadata['description'] = ''
54
+
55
+ return metadata
56
+
57
+ def organize_dataset(root_dir: str, output_json: str = 'dataset/dataset.json'):
58
+ """
59
+ Skanuje strukturę i tworzy dataset.json
60
+ """
61
+ root = Path(root_dir)
62
+ dataset = []
63
+
64
+ for platform_dir in sorted(root.iterdir()):
65
+ if not platform_dir.is_dir():
66
+ continue
67
+
68
+ platform_name = platform_dir.name
69
+ print(f"\n📁 Platform: {platform_name}")
70
+
71
+ for auction_dir in sorted(platform_dir.iterdir()):
72
+ if not auction_dir.is_dir():
73
+ continue
74
+
75
+ auction_id = auction_dir.name
76
+ info_txt = auction_dir / 'info.txt'
77
+
78
+ if not info_txt.exists():
79
+ print(f" ⚠️ {auction_id} - brak info.txt")
80
+ continue
81
+
82
+ try:
83
+ metadata = parse_info_txt(str(info_txt))
84
+ except Exception as e:
85
+ print(f" ❌ {auction_id} - błąd: {e}")
86
+ continue
87
+
88
+ # Zbierz zdjęcia
89
+ images = sorted([
90
+ img.name for img in auction_dir.glob('*.jpg')
91
+ ])
92
+ images += sorted([
93
+ img.name for img in auction_dir.glob('*.png')
94
+ ])
95
+
96
+ if not images:
97
+ print(f" ⚠️ {auction_id} - brak zdjęć")
98
+ continue
99
+
100
+ entry = {
101
+ 'id': f"{platform_name}_{auction_id}",
102
+ 'platform': platform_name,
103
+ 'folder_path': str(auction_dir.relative_to(root)),
104
+ 'image_count': len(images),
105
+ 'images': images,
106
+ 'label': 0, # Default: authentic
107
+ 'label_confidence': 0.0, # Do ręcznego wypełnienia
108
+ **metadata
109
+ }
110
+
111
+ dataset.append(entry)
112
+ print(f" ✓ {auction_id} ({len(images)} zdjęć)")
113
+
114
+ # Zapis
115
+ os.makedirs(os.path.dirname(output_json), exist_ok=True)
116
+ with open(output_json, 'w', encoding='utf-8') as f:
117
+ json.dump(dataset, f, ensure_ascii=False, indent=2)
118
+
119
+ print(f"\n✅ Dataset wczytany: {len(dataset)} aukcji")
120
+ print(f"💾 Zapisano: {output_json}")
121
+
122
+ return dataset
123
+
124
+ if __name__ == '__main__':
125
+ dataset = organize_dataset('dataset/raw_data')
126
+
127
+ if dataset:
128
+ print("\n" + "="*60)
129
+ print("PRZYKŁAD PIERWSZEJ AUKCJI:")
130
+ print("="*60)
131
+ print(json.dumps(dataset[0], indent=2, ensure_ascii=False)[:800])
code/train.py ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn.functional as F
3
+ from torch.optim import AdamW
4
+ from torch.utils.data import DataLoader, random_split
5
+ from tqdm import tqdm
6
+ from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
7
+ from model import AuctionAuthenticityModel
8
+ from dataset_loader import AuctionDatasetFromJSON, get_transforms
9
+ import json
10
+
11
+ def train_epoch(model, loader, optimizer, device, epoch):
12
+ model.train()
13
+ total_loss = 0
14
+ progress_bar = tqdm(loader, desc=f"Epoch {epoch} [TRAIN]")
15
+
16
+ for batch in progress_bar:
17
+ images = batch['image'].to(device)
18
+ texts = batch['text']
19
+ labels = batch['label'].to(device)
20
+
21
+ optimizer.zero_grad()
22
+
23
+ logits = model(images, texts)
24
+ loss = F.cross_entropy(logits, labels)
25
+
26
+ loss.backward()
27
+ torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
28
+ optimizer.step()
29
+
30
+ total_loss += loss.item()
31
+ progress_bar.set_postfix(loss=f'{loss.item():.4f}')
32
+
33
+ return total_loss / len(loader)
34
+
35
+ def validate(model, loader, device, epoch):
36
+ model.eval()
37
+ all_preds = []
38
+ all_labels = []
39
+ total_loss = 0
40
+
41
+ with torch.no_grad():
42
+ progress_bar = tqdm(loader, desc=f"Epoch {epoch} [VAL]")
43
+ for batch in progress_bar:
44
+ images = batch['image'].to(device)
45
+ texts = batch['text']
46
+ labels = batch['label'].to(device)
47
+
48
+ logits = model(images, texts)
49
+ loss = F.cross_entropy(logits, labels)
50
+ total_loss += loss.item()
51
+
52
+ preds = torch.argmax(logits, dim=1).cpu().numpy()
53
+
54
+ all_preds.extend(preds)
55
+ all_labels.extend(labels.cpu().numpy())
56
+
57
+ acc = accuracy_score(all_labels, all_preds)
58
+ prec = precision_score(all_labels, all_preds, zero_division=0)
59
+ rec = recall_score(all_labels, all_preds, zero_division=0)
60
+ f1 = f1_score(all_labels, all_preds, zero_division=0)
61
+
62
+ return {
63
+ 'loss': total_loss / len(loader),
64
+ 'accuracy': acc,
65
+ 'precision': prec,
66
+ 'recall': rec,
67
+ 'f1': f1
68
+ }
69
+
70
+ def main():
71
+ # Konfiguracja
72
+ BATCH_SIZE = 4
73
+ EPOCHS = 5
74
+ LEARNING_RATE = 2e-5
75
+ DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
76
+
77
+ print(f"🖥️ Device: {DEVICE}")
78
+ print(f"📦 Batch size: {BATCH_SIZE}")
79
+ print(f"📚 Epochs: {EPOCHS}")
80
+
81
+ # Załaduj dataset
82
+ print("\n📥 Ładowanie datasetu...")
83
+ dataset = AuctionDatasetFromJSON(
84
+ json_path='../dataset/dataset.json',
85
+ root_dir='../dataset/raw_data',
86
+ transform=get_transforms()
87
+ )
88
+
89
+ print(f"✓ {len(dataset)} aukcji załadowanych")
90
+
91
+ # Split: 80% train, 20% val
92
+ train_size = int(0.8 * len(dataset))
93
+ val_size = len(dataset) - train_size
94
+ train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
95
+
96
+ train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
97
+ val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)
98
+
99
+ print(f" - Train: {len(train_dataset)}")
100
+ print(f" - Val: {len(val_dataset)}")
101
+
102
+ # Model
103
+ print("\n🧠 Inicjalizacja modelu...")
104
+ model = AuctionAuthenticityModel(device=DEVICE).to(DEVICE)
105
+ print(f"✓ Model gotowy ({model.count_parameters():,} parametrów)")
106
+
107
+ # Optimizer
108
+ optimizer = AdamW(model.parameters(), lr=LEARNING_RATE)
109
+
110
+ # Training loop
111
+ print("\n🚀 Rozpoczynam trening...\n")
112
+
113
+ history = {
114
+ 'train_loss': [],
115
+ 'val_loss': [],
116
+ 'val_accuracy': [],
117
+ 'val_f1': []
118
+ }
119
+
120
+ for epoch in range(EPOCHS):
121
+ # Train
122
+ train_loss = train_epoch(model, train_loader, optimizer, DEVICE, epoch+1)
123
+
124
+ # Validate
125
+ val_metrics = validate(model, val_loader, DEVICE, epoch+1)
126
+
127
+ # Log
128
+ history['train_loss'].append(train_loss)
129
+ history['val_loss'].append(val_metrics['loss'])
130
+ history['val_accuracy'].append(val_metrics['accuracy'])
131
+ history['val_f1'].append(val_metrics['f1'])
132
+
133
+ print(f"\n{'='*60}")
134
+ print(f"Epoch {epoch+1}/{EPOCHS}")
135
+ print(f" Train Loss: {train_loss:.4f}")
136
+ print(f" Val Loss: {val_metrics['loss']:.4f}")
137
+ print(f" Val Acc: {val_metrics['accuracy']:.4f}")
138
+ print(f" Val Prec: {val_metrics['precision']:.4f}")
139
+ print(f" Val Rec: {val_metrics['recall']:.4f}")
140
+ print(f" Val F1: {val_metrics['f1']:.4f}")
141
+ print(f"{'='*60}\n")
142
+
143
+ # Zapis modelu
144
+ print("\n💾 Zapis modelu...")
145
+ torch.save(model.state_dict(), '../weights/auction_model.pt')
146
+ print("✓ Zapisano: weights/auction_model.pt")
147
+
148
+ # Zapis historii
149
+ with open('../weights/training_history.json', 'w') as f:
150
+ json.dump(history, f, indent=2)
151
+ print("✓ Zapisano: weights/training_history.json")
152
+
153
+ print("\n✅ Trening ukończony!")
154
+
155
+ if __name__ == '__main__':
156
+ main()
code/web_scraper_allegro.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # scrape_allegro_offer.py
2
+ import undetected_chromedriver as uc
3
+ from selenium.webdriver.common.by import By
4
+ import time
5
+ import requests
6
+
7
+ def sanitize_folder_name(text): # helper function
8
+ polish_chars = {
9
+ "ą": "a", "ć": "c", "ę": "e", "ł": "l", "ń": "n",
10
+ "ó": "o", "ś": "s", "ź": "z", "ż": "z"
11
+ }
12
+ text = text.lower()
13
+ result = ""
14
+ for char in text:
15
+ if char in polish_chars:
16
+ result += polish_chars[char]
17
+ elif char.isalnum():
18
+ result += char
19
+ else:
20
+ result += "_"
21
+ while "__" in result:
22
+ result = result.replace("__", "_")
23
+ return result.strip("_")
24
+
25
+ def scrape_allegro_offer(url: str):
26
+ """Zwraca dane aukcji bez zapisywania na dysk"""
27
+ options = uc.ChromeOptions()
28
+ options.add_argument("--window-position=-3000,0")
29
+ driver = uc.Chrome(use_subprocess=True, options=options)
30
+
31
+ try:
32
+ print(f"🔍 Allegro: {url}")
33
+ driver.get(url)
34
+ time.sleep(10)
35
+
36
+ # TITLE
37
+ try:
38
+ title_element = driver.find_element(By.TAG_NAME, "h1")
39
+ title_str = title_element.text.strip()
40
+ except:
41
+ title_str = "untitled"
42
+
43
+ # PARAMETERS
44
+ parameter_list = []
45
+ try:
46
+ rows = driver.find_elements(By.CSS_SELECTOR, "tr")
47
+ for row in rows:
48
+ cells = row.find_elements(By.TAG_NAME, "td")
49
+ if len(cells) == 2:
50
+ name = cells[0].text.strip()
51
+ value = cells[1].text.strip()
52
+ if name and value:
53
+ parameter_list.append(f"{name}: {value}")
54
+ except:
55
+ pass
56
+
57
+ # DESCRIPTION
58
+ try:
59
+ description_element = driver.find_element(By.CSS_SELECTOR, "div._0d3bd_am0a-")
60
+ description_content = description_element.text
61
+ except:
62
+ description_content = "No description"
63
+
64
+ # IMAGES
65
+ unique_links = set()
66
+ try:
67
+ images = driver.find_elements(By.CSS_SELECTOR, ".msub_80.m9tr_5r._07951_IOf8s")
68
+ allowed_sizes = ["/s128/", "/s360/", "/s512/", "/s720/", "/s1024/", "/s1440/", "/original/"]
69
+ for img in images:
70
+ src = img.get_attribute("src")
71
+ if src and "allegroimg.com" in src:
72
+ if not any(size in src for size in allowed_sizes):
73
+ continue
74
+ for size in allowed_sizes:
75
+ src = src.replace(size, "/original/")
76
+ unique_links.add(src)
77
+ except Exception as e:
78
+ print(f"Image error: {e}")
79
+
80
+ return {
81
+ "platform": "allegro",
82
+ "url": url,
83
+ "title": title_str,
84
+ "description": description_content,
85
+ "parameters": parameter_list,
86
+ "image_urls": list(unique_links)
87
+ }
88
+
89
+ finally:
90
+ driver.quit()
91
+
92
+ if __name__ == "__main__":
93
+ url = input("Allegro URL: ")
94
+ result = scrape_allegro_offer(url)
95
+ print(result)
code/web_scraper_ebay.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # scrape_ebay_offer.py
2
+ import undetected_chromedriver as uc
3
+ from selenium.webdriver.common.by import By
4
+ import time
5
+ import requests
6
+
7
+ def scrape_ebay_offer(url: str):
8
+ """Zwraca dane aukcji bez zapisywania na dysk"""
9
+ print(f"🔍 eBay: {url}")
10
+ options = uc.ChromeOptions()
11
+ options.add_argument("--window-position=-3000,0")
12
+ driver = uc.Chrome(use_subprocess=True, options=options)
13
+
14
+ try:
15
+ driver.get(url)
16
+ time.sleep(4)
17
+
18
+ # TITLE
19
+ try:
20
+ title_element = driver.find_element(By.CSS_SELECTOR, "h1.x-item-title__mainTitle")
21
+ title_str = title_element.text.strip()
22
+ except:
23
+ title_str = "untitled_ebay"
24
+
25
+ # PARAMETERS
26
+ parameter_list = []
27
+ try:
28
+ rows = driver.find_elements(By.CSS_SELECTOR, ".ux-labels-values")
29
+ for row in rows:
30
+ try:
31
+ label = row.find_element(By.CSS_SELECTOR, ".ux-labels-values__labels").text.strip()
32
+ value = row.find_element(By.CSS_SELECTOR, ".ux-labels-values__values").text.strip()
33
+ if label and value:
34
+ parameter_list.append(f"{label}: {value}")
35
+ except:
36
+ continue
37
+ except:
38
+ pass
39
+
40
+ # DESCRIPTION
41
+ description_content = "No description"
42
+ try:
43
+ frame = driver.find_element(By.ID, "desc_ifr")
44
+ driver.switch_to.frame(frame)
45
+ description_content = driver.find_element(By.TAG_NAME, "body").text.strip()
46
+ driver.switch_to.default_content()
47
+ except:
48
+ pass
49
+
50
+ # IMAGES
51
+ unique_links = set()
52
+ try:
53
+ thumbnails = driver.find_elements(By.CSS_SELECTOR, ".ux-image-grid-item img")
54
+ for img in thumbnails:
55
+ src = img.get_attribute("src") or img.get_attribute("data-src")
56
+ if src and "ebayimg.com" in src:
57
+ # Zamień na HD
58
+ hd_link = src.replace("/s-l64/", "/s-l1600").replace("/s-l140/", "/s-l1600")
59
+ unique_links.add(hd_link)
60
+ except:
61
+ pass
62
+
63
+ return {
64
+ "platform": "ebay",
65
+ "url": url,
66
+ "title": title_str,
67
+ "description": description_content,
68
+ "parameters": parameter_list,
69
+ "image_urls": list(unique_links)
70
+ }
71
+
72
+ finally:
73
+ driver.quit()
74
+
75
+ if __name__ == "__main__":
76
+ url = input("eBay URL: ")
77
+ result = scrape_ebay_offer(url)
78
+ print(result)
code/web_scraper_olx.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # scrape_olx_offer.py
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+
5
+ def scrape_olx_offer(url: str):
6
+ """Zwraca dane aukcji bez zapisywania na dysk"""
7
+ headers = {
8
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36"
9
+ }
10
+
11
+ print(f"🔍 OLX: {url}")
12
+ response = requests.get(url, headers=headers)
13
+
14
+ if response.status_code != 200:
15
+ raise ValueError(f"OLX error: {response.status_code}")
16
+
17
+ soup = BeautifulSoup(response.content, "html.parser")
18
+
19
+ # TITLE
20
+ title_element = soup.find("h4", class_="css-1au435n")
21
+ title = title_element.get_text().strip() if title_element else "untitled"
22
+
23
+ # DESCRIPTION
24
+ description_element = soup.find("div", class_="css-19duwlz")
25
+ description = description_element.get_text(separator="\n").strip() if description_element else "No description"
26
+
27
+ # PARAMETERS
28
+ parameter_list = []
29
+ parameters_container = soup.find("div", attrs={"data-testid": "ad-parameters-container"})
30
+ if parameters_container:
31
+ params = parameters_container.find_all("p", class_="css-13x8d99")
32
+ for p in params:
33
+ parameter_list.append(p.get_text().strip())
34
+
35
+ # IMAGES
36
+ images = soup.select('img[data-testid^="swiper-image"]')
37
+ unique_links = set()
38
+ for img in images:
39
+ link = img.get("src")
40
+ if link:
41
+ unique_links.add(link)
42
+
43
+ return {
44
+ "platform": "olx",
45
+ "url": url,
46
+ "title": title,
47
+ "description": description,
48
+ "parameters": parameter_list,
49
+ "image_urls": list(unique_links)
50
+ }
51
+
52
+ if __name__ == "__main__":
53
+ url = input("OLX URL: ")
54
+ result = scrape_olx_offer(url)
55
+ print(result)
requirements.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ torch
2
+ torchvision
3
+ transformers
4
+ pillow
5
+ numpy
6
+ scikit-learn
7
+ tqdm
8
+ fastapi
9
+ uvicorn
10
+ python-multipart
11
+ undetected_chromedriver
12
+ bs4
13
+ requests
14
+ flask
weights/auction_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2dc1ae0fd83c1ebf39b2aad59f554c404398b73b33fc2411c3d2db0dea26b64e
3
+ size 557543075
weights/training_history.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train_loss": [
3
+ 0.8840779519081116,
4
+ 0.4452889025211334,
5
+ 0.24018713772296907,
6
+ 0.12335345685482026,
7
+ 0.05679535768926144
8
+ ],
9
+ "val_loss": [
10
+ 0.6094270433698382,
11
+ 0.30430711592946735,
12
+ 0.15748658563409532,
13
+ 0.07173337734171323,
14
+ 0.03333232658249991
15
+ ],
16
+ "val_accuracy": [
17
+ 1.0,
18
+ 1.0,
19
+ 1.0,
20
+ 1.0,
21
+ 1.0
22
+ ],
23
+ "val_f1": [
24
+ 0.0,
25
+ 0.0,
26
+ 0.0,
27
+ 0.0,
28
+ 0.0
29
+ ]
30
+ }