Spaces:
Running
Running
Initial deployment of Antique Authenticity API
Browse files- Dockerfile +21 -0
- app.py +13 -0
- code/__pycache__/allegro_api.cpython-312.pyc +0 -0
- code/__pycache__/dataset_loader.cpython-312.pyc +0 -0
- code/__pycache__/model.cpython-312.pyc +0 -0
- code/__pycache__/web_scraper_allegro.cpython-312.pyc +0 -0
- code/__pycache__/web_scraper_olx.cpython-312.pyc +0 -0
- code/app.py +271 -0
- code/dataset_loader.py +86 -0
- code/evaluate_live.py +74 -0
- code/labeling_app/labeling_app.py +99 -0
- code/labeling_app/templates/labeling.html +660 -0
- code/model.py +80 -0
- code/parse_auction_data.py +131 -0
- code/train.py +156 -0
- code/web_scraper_allegro.py +95 -0
- code/web_scraper_ebay.py +78 -0
- code/web_scraper_olx.py +55 -0
- requirements.txt +14 -0
- weights/auction_model.pt +3 -0
- weights/training_history.json +30 -0
Dockerfile
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.10
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
# Copy requirements first for better caching
|
| 6 |
+
COPY requirements.txt .
|
| 7 |
+
|
| 8 |
+
# Install dependencies
|
| 9 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 10 |
+
|
| 11 |
+
# Copy the entire project
|
| 12 |
+
COPY . .
|
| 13 |
+
|
| 14 |
+
# Expose port 7860 (Hugging Face Spaces default)
|
| 15 |
+
EXPOSE 7860
|
| 16 |
+
|
| 17 |
+
# Set environment variables
|
| 18 |
+
ENV PYTHONUNBUFFERED=1
|
| 19 |
+
|
| 20 |
+
# Run the app
|
| 21 |
+
CMD ["python", "app.py"]
|
app.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# app.py - Main entry point for Hugging Face Spaces
|
| 2 |
+
import sys
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
+
# Add code directory to path
|
| 6 |
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'code'))
|
| 7 |
+
|
| 8 |
+
# Import and run the FastAPI app
|
| 9 |
+
from code.app import app
|
| 10 |
+
|
| 11 |
+
if __name__ == "__main__":
|
| 12 |
+
import uvicorn
|
| 13 |
+
uvicorn.run(app, host="0.0.0.0", port=7860)
|
code/__pycache__/allegro_api.cpython-312.pyc
ADDED
|
Binary file (2.72 kB). View file
|
|
|
code/__pycache__/dataset_loader.cpython-312.pyc
ADDED
|
Binary file (4.18 kB). View file
|
|
|
code/__pycache__/model.cpython-312.pyc
ADDED
|
Binary file (4.6 kB). View file
|
|
|
code/__pycache__/web_scraper_allegro.cpython-312.pyc
ADDED
|
Binary file (4.35 kB). View file
|
|
|
code/__pycache__/web_scraper_olx.cpython-312.pyc
ADDED
|
Binary file (9.6 kB). View file
|
|
|
code/app.py
ADDED
|
@@ -0,0 +1,271 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# app.py
|
| 2 |
+
from fastapi import FastAPI, UploadFile, Form, File
|
| 3 |
+
from fastapi.responses import JSONResponse
|
| 4 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 5 |
+
import torch
|
| 6 |
+
from PIL import Image
|
| 7 |
+
import io
|
| 8 |
+
from model import AuctionAuthenticityModel
|
| 9 |
+
from torchvision import transforms
|
| 10 |
+
import os
|
| 11 |
+
import numpy as np
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
app = FastAPI(
|
| 15 |
+
title="Antique Auction Authenticity API",
|
| 16 |
+
description="AI model do oceny autentyczności aukcji antyków",
|
| 17 |
+
version="1.0.0"
|
| 18 |
+
)
|
| 19 |
+
|
| 20 |
+
app.add_middleware(
|
| 21 |
+
CORSMiddleware,
|
| 22 |
+
allow_origins=["*"],
|
| 23 |
+
allow_credentials=True,
|
| 24 |
+
allow_methods=["*"],
|
| 25 |
+
allow_headers=["*"],
|
| 26 |
+
)
|
| 27 |
+
|
| 28 |
+
DEVICE = torch.device('cpu')
|
| 29 |
+
MODEL_PATH = '../weights/auction_model.pt'
|
| 30 |
+
|
| 31 |
+
model = None
|
| 32 |
+
transform = transforms.Compose([
|
| 33 |
+
transforms.Resize((224, 224)),
|
| 34 |
+
transforms.ToTensor(),
|
| 35 |
+
transforms.Normalize(
|
| 36 |
+
mean=[0.485, 0.456, 0.406],
|
| 37 |
+
std=[0.229, 0.224, 0.225]
|
| 38 |
+
)
|
| 39 |
+
])
|
| 40 |
+
|
| 41 |
+
@app.on_event("startup")
|
| 42 |
+
async def load_model():
|
| 43 |
+
global model
|
| 44 |
+
print("🚀 Ładowanie modelu...")
|
| 45 |
+
model = AuctionAuthenticityModel(num_classes=3, device=DEVICE).to(DEVICE)
|
| 46 |
+
if os.path.exists(MODEL_PATH):
|
| 47 |
+
model.load_state_dict(torch.load(MODEL_PATH, map_location=DEVICE))
|
| 48 |
+
print(f"✓ Model załadowany z {MODEL_PATH}")
|
| 49 |
+
else:
|
| 50 |
+
print("⚠️ Brak wag - pretrained")
|
| 51 |
+
model.eval()
|
| 52 |
+
print("✓ Model gotowy")
|
| 53 |
+
|
| 54 |
+
@app.post("/predict")
|
| 55 |
+
async def predict(
|
| 56 |
+
image: UploadFile = File(...),
|
| 57 |
+
title: str = Form(...),
|
| 58 |
+
description: str = Form(...)
|
| 59 |
+
):
|
| 60 |
+
try:
|
| 61 |
+
img_data = await image.read()
|
| 62 |
+
img = Image.open(io.BytesIO(img_data)).convert('RGB')
|
| 63 |
+
img_tensor = transform(img).unsqueeze(0).to(DEVICE)
|
| 64 |
+
text = f"{title} {description}"
|
| 65 |
+
|
| 66 |
+
with torch.no_grad():
|
| 67 |
+
logits = model(img_tensor, [text])
|
| 68 |
+
probs = torch.softmax(logits, dim=1)[0]
|
| 69 |
+
|
| 70 |
+
orig_prob = float(probs[0]) # label 0
|
| 71 |
+
scam_prob = float(probs[1]) # label 1
|
| 72 |
+
repl_prob = float(probs[2]) # label 2
|
| 73 |
+
|
| 74 |
+
probs_dict = {
|
| 75 |
+
"ORIGINAL": orig_prob,
|
| 76 |
+
"SCAM": scam_prob,
|
| 77 |
+
"REPLICA": repl_prob
|
| 78 |
+
}
|
| 79 |
+
best_label = max(probs_dict, key=probs_dict.get)
|
| 80 |
+
best_prob = probs_dict[best_label]
|
| 81 |
+
|
| 82 |
+
# Niepewny: max prob < 0.6 LUB margin < 0.15
|
| 83 |
+
sorted_probs = sorted(probs_dict.values(), reverse=True)
|
| 84 |
+
margin = sorted_probs[0] - sorted_probs[1]
|
| 85 |
+
|
| 86 |
+
if best_prob < 0.6 or margin < 0.15:
|
| 87 |
+
verdict = "UNCERTAIN"
|
| 88 |
+
else:
|
| 89 |
+
verdict = best_label
|
| 90 |
+
|
| 91 |
+
return JSONResponse({
|
| 92 |
+
"status": "success",
|
| 93 |
+
"original_probability": round(orig_prob, 3),
|
| 94 |
+
"scam_probability": round(scam_prob, 3),
|
| 95 |
+
"replica_probability": round(repl_prob, 3),
|
| 96 |
+
"verdict": verdict,
|
| 97 |
+
"confidence": round(best_prob, 3),
|
| 98 |
+
"margin": round(margin, 3),
|
| 99 |
+
"message": f"Aukcja ma {best_prob*100:.1f}% pewności: {verdict}"
|
| 100 |
+
})
|
| 101 |
+
except Exception as e:
|
| 102 |
+
return JSONResponse(
|
| 103 |
+
{"status": "error", "error": str(e)},
|
| 104 |
+
status_code=400
|
| 105 |
+
)
|
| 106 |
+
|
| 107 |
+
@app.post("/predict_ensemble")
|
| 108 |
+
async def predict_ensemble(
|
| 109 |
+
images: list[UploadFile] = File(...), # wiele plików!
|
| 110 |
+
title: str = Form(...),
|
| 111 |
+
description: str = Form(...)
|
| 112 |
+
):
|
| 113 |
+
predictions = []
|
| 114 |
+
|
| 115 |
+
for i, img_file in enumerate(images):
|
| 116 |
+
img_data = await img_file.read()
|
| 117 |
+
img = Image.open(io.BytesIO(img_data)).convert('RGB')
|
| 118 |
+
img_tensor = transform(img).unsqueeze(0).to(DEVICE)
|
| 119 |
+
text = f"{title} {description}"
|
| 120 |
+
|
| 121 |
+
with torch.no_grad():
|
| 122 |
+
logits = model(img_tensor, [text])
|
| 123 |
+
probs = torch.softmax(logits, dim=1)[0].cpu().numpy()
|
| 124 |
+
predictions.append(probs)
|
| 125 |
+
|
| 126 |
+
# Średnia z wszystkich zdjęć
|
| 127 |
+
avg_probs = np.mean(predictions, axis=0)
|
| 128 |
+
|
| 129 |
+
orig_prob = float(avg_probs[0])
|
| 130 |
+
scam_prob = float(avg_probs[1])
|
| 131 |
+
repl_prob = float(avg_probs[2])
|
| 132 |
+
|
| 133 |
+
probs_dict = {"ORIGINAL": orig_prob, "SCAM": scam_prob, "REPLICA": repl_prob}
|
| 134 |
+
best_label = max(probs_dict, key=probs_dict.get)
|
| 135 |
+
best_prob = probs_dict[best_label]
|
| 136 |
+
|
| 137 |
+
sorted_probs = sorted(probs_dict.values(), reverse=True)
|
| 138 |
+
margin = sorted_probs[0] - sorted_probs[1]
|
| 139 |
+
|
| 140 |
+
if best_prob < 0.6 or margin < 0.15:
|
| 141 |
+
verdict = "UNCERTAIN"
|
| 142 |
+
else:
|
| 143 |
+
verdict = best_label
|
| 144 |
+
|
| 145 |
+
return JSONResponse({
|
| 146 |
+
"status": "success",
|
| 147 |
+
"image_count": len(images),
|
| 148 |
+
"original_probability": round(orig_prob, 3),
|
| 149 |
+
"scam_probability": round(scam_prob, 3),
|
| 150 |
+
"replica_probability": round(repl_prob, 3),
|
| 151 |
+
"verdict": verdict,
|
| 152 |
+
"confidence": round(best_prob, 3),
|
| 153 |
+
"margin": round(margin, 3),
|
| 154 |
+
"per_image_probs": [p.tolist() for p in predictions] # dla debug
|
| 155 |
+
})
|
| 156 |
+
|
| 157 |
+
@app.post("/validate_url")
|
| 158 |
+
async def validate_url(
|
| 159 |
+
url: str = Form(...),
|
| 160 |
+
max_images: int = Form(3)
|
| 161 |
+
):
|
| 162 |
+
try:
|
| 163 |
+
import numpy as np
|
| 164 |
+
from io import BytesIO
|
| 165 |
+
import requests
|
| 166 |
+
|
| 167 |
+
max_images = max(1, min(max_images, 10))
|
| 168 |
+
|
| 169 |
+
# 1. Scraper
|
| 170 |
+
if "allegro.pl" in url:
|
| 171 |
+
from web_scraper_allegro import scrape_allegro_offer
|
| 172 |
+
auction = scrape_allegro_offer(url)
|
| 173 |
+
elif "olx.pl" in url:
|
| 174 |
+
from web_scraper_olx import scrape_olx_offer
|
| 175 |
+
auction = scrape_olx_offer(url)
|
| 176 |
+
elif "ebay." in url:
|
| 177 |
+
from web_scraper_ebay import scrape_ebay_offer
|
| 178 |
+
auction = scrape_ebay_offer(url)
|
| 179 |
+
else:
|
| 180 |
+
return JSONResponse({"error": "Unsupported platform"}, status_code=400)
|
| 181 |
+
|
| 182 |
+
if not auction.get("image_urls"):
|
| 183 |
+
return JSONResponse({"error": "No images"}, status_code=400)
|
| 184 |
+
|
| 185 |
+
# 2. Ile zdjęć
|
| 186 |
+
total_available = len(auction["image_urls"])
|
| 187 |
+
images_to_use = min(max_images, total_available)
|
| 188 |
+
|
| 189 |
+
# 3. Model BEZ HTTP (bezpośrednio!)
|
| 190 |
+
img_probs = []
|
| 191 |
+
text = auction["title"] + " " + auction["description"]
|
| 192 |
+
|
| 193 |
+
for i, img_url in enumerate(auction["image_urls"][:images_to_use]):
|
| 194 |
+
print(f"📸 {i+1}/{images_to_use}")
|
| 195 |
+
|
| 196 |
+
img_resp = requests.get(img_url, timeout=15)
|
| 197 |
+
img_resp.raise_for_status()
|
| 198 |
+
|
| 199 |
+
img = Image.open(BytesIO(img_resp.content)).convert('RGB')
|
| 200 |
+
img_tensor = transform(img).unsqueeze(0).to(DEVICE)
|
| 201 |
+
|
| 202 |
+
with torch.no_grad():
|
| 203 |
+
logits = model(img_tensor, [text])
|
| 204 |
+
probs = torch.softmax(logits, dim=1)[0]
|
| 205 |
+
|
| 206 |
+
img_probs.append({
|
| 207 |
+
"original_probability": float(probs[0]),
|
| 208 |
+
"scam_probability": float(probs[1]),
|
| 209 |
+
"replica_probability": float(probs[2])
|
| 210 |
+
})
|
| 211 |
+
|
| 212 |
+
# 4. Średnia
|
| 213 |
+
avg_orig = np.mean([p["original_probability"] for p in img_probs])
|
| 214 |
+
avg_scam = np.mean([p["scam_probability"] for p in img_probs])
|
| 215 |
+
avg_repl = np.mean([p["replica_probability"] for p in img_probs])
|
| 216 |
+
|
| 217 |
+
probs_dict = {"ORIGINAL": avg_orig, "SCAM": avg_scam, "REPLICA": avg_repl}
|
| 218 |
+
best_label = max(probs_dict, key=probs_dict.get)
|
| 219 |
+
best_prob = float(probs_dict[best_label])
|
| 220 |
+
|
| 221 |
+
sorted_probs = sorted(probs_dict.values(), reverse=True)
|
| 222 |
+
margin = float(sorted_probs[0] - sorted_probs[1])
|
| 223 |
+
|
| 224 |
+
if best_prob < 0.6 or margin < 0.15:
|
| 225 |
+
verdict = "UNCERTAIN"
|
| 226 |
+
else:
|
| 227 |
+
verdict = best_label
|
| 228 |
+
|
| 229 |
+
return {
|
| 230 |
+
"status": "success",
|
| 231 |
+
"url": url,
|
| 232 |
+
"title": auction["title"][:100] + "...",
|
| 233 |
+
"platform": auction["platform"],
|
| 234 |
+
"total_images_available": total_available,
|
| 235 |
+
"requested_max_images": max_images,
|
| 236 |
+
"image_count_used": images_to_use,
|
| 237 |
+
"original_probability": round(avg_orig, 3),
|
| 238 |
+
"scam_probability": round(avg_scam, 3),
|
| 239 |
+
"replica_probability": round(avg_repl, 3),
|
| 240 |
+
"verdict": verdict,
|
| 241 |
+
"confidence": round(best_prob, 3),
|
| 242 |
+
"margin": round(margin, 3)
|
| 243 |
+
}
|
| 244 |
+
|
| 245 |
+
except Exception as e:
|
| 246 |
+
import traceback
|
| 247 |
+
return JSONResponse({
|
| 248 |
+
"status": "error",
|
| 249 |
+
"error": str(e),
|
| 250 |
+
"traceback": traceback.format_exc()
|
| 251 |
+
}, status_code=500)
|
| 252 |
+
|
| 253 |
+
|
| 254 |
+
@app.get("/health")
|
| 255 |
+
def health():
|
| 256 |
+
return {"status": "ok", "message": "API running"}
|
| 257 |
+
|
| 258 |
+
@app.get("/")
|
| 259 |
+
def root():
|
| 260 |
+
return {
|
| 261 |
+
"name": "Antique Auction Authenticity API",
|
| 262 |
+
"version": "1.0.0",
|
| 263 |
+
"endpoints": {
|
| 264 |
+
"POST /predict": "Oceń aukcję",
|
| 265 |
+
"GET /health": "Health check"
|
| 266 |
+
}
|
| 267 |
+
}
|
| 268 |
+
|
| 269 |
+
if __name__ == '__main__':
|
| 270 |
+
import uvicorn
|
| 271 |
+
uvicorn.run(app, host='0.0.0.0', port=7860)
|
code/dataset_loader.py
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
from PIL import Image
|
| 4 |
+
import torch
|
| 5 |
+
from torch.utils.data import Dataset, DataLoader
|
| 6 |
+
from torchvision import transforms
|
| 7 |
+
|
| 8 |
+
class AuctionDatasetFromJSON(Dataset):
|
| 9 |
+
def __init__(self, json_path: str, root_dir: str, transform=None, max_samples=None):
|
| 10 |
+
"""
|
| 11 |
+
json_path: dataset/dataset.json
|
| 12 |
+
root_dir: dataset/raw_data
|
| 13 |
+
"""
|
| 14 |
+
with open(json_path, 'r', encoding='utf-8') as f:
|
| 15 |
+
self.data = json.load(f)
|
| 16 |
+
|
| 17 |
+
if max_samples:
|
| 18 |
+
self.data = self.data[:max_samples]
|
| 19 |
+
|
| 20 |
+
self.root_dir = Path(root_dir)
|
| 21 |
+
self.transform = transform
|
| 22 |
+
|
| 23 |
+
def __len__(self):
|
| 24 |
+
return len(self.data)
|
| 25 |
+
|
| 26 |
+
def __getitem__(self, idx):
|
| 27 |
+
auction = self.data[idx]
|
| 28 |
+
|
| 29 |
+
# Ścieżka do zdjęcia
|
| 30 |
+
img_path = self.root_dir / auction['folder_path'] / auction['images'][0]
|
| 31 |
+
|
| 32 |
+
try:
|
| 33 |
+
img = Image.open(img_path).convert('RGB')
|
| 34 |
+
except Exception as e:
|
| 35 |
+
print(f"Błąd wczytywania {img_path}: {e}")
|
| 36 |
+
# Fallback: czarne zdjęcie
|
| 37 |
+
img = Image.new('RGB', (224, 224), color='black')
|
| 38 |
+
|
| 39 |
+
if self.transform:
|
| 40 |
+
img = self.transform(img)
|
| 41 |
+
|
| 42 |
+
# Tekst: title + opis
|
| 43 |
+
text = f"{auction.get('title', '')} {auction.get('description', '')}"
|
| 44 |
+
|
| 45 |
+
return {
|
| 46 |
+
'image': img,
|
| 47 |
+
'text': text,
|
| 48 |
+
'platform': auction['platform'],
|
| 49 |
+
'title': auction['title'],
|
| 50 |
+
'id': auction['id'],
|
| 51 |
+
'label': torch.tensor(auction.get('label', 0), dtype=torch.long),
|
| 52 |
+
'folder_path': auction['folder_path']
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
# Transformacje
|
| 56 |
+
get_transforms = lambda: transforms.Compose([
|
| 57 |
+
transforms.Resize((224, 224)),
|
| 58 |
+
transforms.ToTensor(),
|
| 59 |
+
transforms.Normalize(
|
| 60 |
+
mean=[0.485, 0.456, 0.406],
|
| 61 |
+
std=[0.229, 0.224, 0.225]
|
| 62 |
+
)
|
| 63 |
+
])
|
| 64 |
+
|
| 65 |
+
if __name__ == '__main__':
|
| 66 |
+
print("Testowanie DataLoadera...")
|
| 67 |
+
|
| 68 |
+
dataset = AuctionDatasetFromJSON(
|
| 69 |
+
json_path='../dataset/dataset.json',
|
| 70 |
+
root_dir='../dataset/raw_data',
|
| 71 |
+
transform=get_transforms(),
|
| 72 |
+
max_samples=5
|
| 73 |
+
)
|
| 74 |
+
|
| 75 |
+
print(f"✓ Dataset załadowany: {len(dataset)} próbek")
|
| 76 |
+
|
| 77 |
+
loader = DataLoader(dataset, batch_size=2, shuffle=True, num_workers=0)
|
| 78 |
+
|
| 79 |
+
for batch in loader:
|
| 80 |
+
print(f"\nBatch:")
|
| 81 |
+
print(f" - Image shape: {batch['image'].shape}")
|
| 82 |
+
print(f" - Texts: {len(batch['text'])}")
|
| 83 |
+
print(f" - Platforms: {batch['platform']}")
|
| 84 |
+
print(f" - Labels: {batch['label']}")
|
| 85 |
+
print(f" - Example text: {batch['text'][0][:100]}...")
|
| 86 |
+
break
|
code/evaluate_live.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# evaluate_live.py
|
| 2 |
+
import requests
|
| 3 |
+
from io import BytesIO
|
| 4 |
+
|
| 5 |
+
# Import Twoich scraperów
|
| 6 |
+
from web_scraper_allegro import scrape_allegro_offer
|
| 7 |
+
from web_scraper_olx import scrape_olx_offer
|
| 8 |
+
from web_scraper_ebay import scrape_ebay_offer
|
| 9 |
+
|
| 10 |
+
API_URL = "http://localhost:7860/predict"
|
| 11 |
+
|
| 12 |
+
def call_model(auction):
|
| 13 |
+
if not auction.get("image_urls"):
|
| 14 |
+
return {"error": "No images found"}
|
| 15 |
+
|
| 16 |
+
img_url = auction["image_urls"][0]
|
| 17 |
+
print(f"📸 Pobieram zdjęcie: {img_url}")
|
| 18 |
+
|
| 19 |
+
img_resp = requests.get(img_url, timeout=20)
|
| 20 |
+
img_resp.raise_for_status()
|
| 21 |
+
|
| 22 |
+
files = {
|
| 23 |
+
"image": ("image.jpg", BytesIO(img_resp.content), "image/jpeg")
|
| 24 |
+
}
|
| 25 |
+
data = {
|
| 26 |
+
"title": auction.get("title", ""),
|
| 27 |
+
"description": auction.get("description", "")
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
r = requests.post(API_URL, files=files, data=data, timeout=120)
|
| 31 |
+
r.raise_for_status()
|
| 32 |
+
return r.json()
|
| 33 |
+
|
| 34 |
+
def scrape_offer(url: str):
|
| 35 |
+
"""Automatycznie wybiera scraper na podstawie domeny"""
|
| 36 |
+
if "allegro.pl" in url:
|
| 37 |
+
return scrape_allegro_offer(url)
|
| 38 |
+
elif "olx.pl" in url:
|
| 39 |
+
return scrape_olx_offer(url)
|
| 40 |
+
elif "ebay." in url:
|
| 41 |
+
return scrape_ebay_offer(url)
|
| 42 |
+
else:
|
| 43 |
+
raise ValueError("Nieobsługiwana platforma")
|
| 44 |
+
|
| 45 |
+
def evaluate_url(url: str):
|
| 46 |
+
"""Pełny pipeline: scrape → model → wynik"""
|
| 47 |
+
print(f"🔍 Analizuję: {url}")
|
| 48 |
+
|
| 49 |
+
auction = scrape_offer(url)
|
| 50 |
+
print(f"📋 Zebrane: {auction['title'][:50]}...")
|
| 51 |
+
|
| 52 |
+
model_result = call_model(auction)
|
| 53 |
+
|
| 54 |
+
return {
|
| 55 |
+
"url": url,
|
| 56 |
+
"platform": auction["platform"],
|
| 57 |
+
"title": auction["title"],
|
| 58 |
+
"model_result": model_result,
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
if __name__ == "__main__":
|
| 62 |
+
while True:
|
| 63 |
+
url = input("\nPodaj link do aukcji (lub 'q' do wyjścia): ")
|
| 64 |
+
if url.lower() == 'q':
|
| 65 |
+
break
|
| 66 |
+
|
| 67 |
+
try:
|
| 68 |
+
result = evaluate_url(url)
|
| 69 |
+
print("\n" + "="*80)
|
| 70 |
+
print(f"VERDICT: {result['model_result'].get('verdict')}")
|
| 71 |
+
print(f"CONFIDENCE: {result['model_result'].get('confidence')}")
|
| 72 |
+
print("="*80)
|
| 73 |
+
except Exception as e:
|
| 74 |
+
print(f"❌ Błąd: {e}")
|
code/labeling_app/labeling_app.py
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from flask import Flask, render_template, request, jsonify, send_file
|
| 2 |
+
import json
|
| 3 |
+
import os
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
|
| 6 |
+
app = Flask(__name__)
|
| 7 |
+
|
| 8 |
+
# WAŻNE: ustaw ścieżkę POPRAWNIE (zależy gdzie masz folder)
|
| 9 |
+
DATASET_PATH = Path(__file__).parent.parent.parent / 'dataset' / 'dataset.json'
|
| 10 |
+
RAW_DATA_PATH = Path(__file__).parent.parent.parent / 'dataset' / 'raw_data'
|
| 11 |
+
|
| 12 |
+
print(f"Dataset path: {DATASET_PATH}")
|
| 13 |
+
print(f"Raw data path: {RAW_DATA_PATH}")
|
| 14 |
+
|
| 15 |
+
def load_dataset():
|
| 16 |
+
with open(DATASET_PATH, 'r', encoding='utf-8') as f:
|
| 17 |
+
return json.load(f)
|
| 18 |
+
|
| 19 |
+
def save_dataset(data):
|
| 20 |
+
with open(DATASET_PATH, 'w', encoding='utf-8') as f:
|
| 21 |
+
json.dump(data, f, indent=2, ensure_ascii=False)
|
| 22 |
+
|
| 23 |
+
@app.route('/')
|
| 24 |
+
def index():
|
| 25 |
+
dataset = load_dataset()
|
| 26 |
+
return render_template('labeling.html', total_auctions=len(dataset))
|
| 27 |
+
|
| 28 |
+
@app.route('/image/<path:image_path>')
|
| 29 |
+
def serve_image(image_path):
|
| 30 |
+
"""Serwuj zdjęcie"""
|
| 31 |
+
full_path = RAW_DATA_PATH / image_path
|
| 32 |
+
print(f"Szukam: {full_path}")
|
| 33 |
+
if full_path.exists():
|
| 34 |
+
return send_file(full_path)
|
| 35 |
+
return "Not found", 404
|
| 36 |
+
|
| 37 |
+
@app.route('/api/next_unlabeled')
|
| 38 |
+
def next_unlabeled():
|
| 39 |
+
dataset = load_dataset()
|
| 40 |
+
|
| 41 |
+
for i, auction in enumerate(dataset):
|
| 42 |
+
if auction.get('label_confidence', 0) == 0:
|
| 43 |
+
# Przygotuj WSZYSTKIE zdjęcia
|
| 44 |
+
images = []
|
| 45 |
+
for img_name in auction['images']:
|
| 46 |
+
img_path = f"{auction['folder_path']}/{img_name}"
|
| 47 |
+
images.append(f"/image/{img_path}")
|
| 48 |
+
|
| 49 |
+
return jsonify({
|
| 50 |
+
'index': i,
|
| 51 |
+
'id': auction['id'],
|
| 52 |
+
'title': auction['title'],
|
| 53 |
+
'description': auction['description'][:300] + '...',
|
| 54 |
+
'platform': auction['platform'],
|
| 55 |
+
'link': auction['link'],
|
| 56 |
+
'parameters': auction.get('parameters', {}),
|
| 57 |
+
'images': images,
|
| 58 |
+
'total': len(dataset),
|
| 59 |
+
'current': i + 1
|
| 60 |
+
})
|
| 61 |
+
|
| 62 |
+
return jsonify({'error': 'Wszystkie aukcje etykietowane!'})
|
| 63 |
+
|
| 64 |
+
@app.route('/api/save_label', methods=['POST'])
|
| 65 |
+
def save_label():
|
| 66 |
+
data = request.json
|
| 67 |
+
dataset = load_dataset()
|
| 68 |
+
|
| 69 |
+
auction_index = data['auction_index']
|
| 70 |
+
dataset[auction_index]['label'] = data['label']
|
| 71 |
+
dataset[auction_index]['label_confidence'] = data['confidence']
|
| 72 |
+
|
| 73 |
+
save_dataset(dataset)
|
| 74 |
+
return jsonify({'status': 'ok'})
|
| 75 |
+
|
| 76 |
+
@app.route('/api/stats')
|
| 77 |
+
def get_stats():
|
| 78 |
+
dataset = load_dataset()
|
| 79 |
+
|
| 80 |
+
total = len(dataset)
|
| 81 |
+
labeled = len([a for a in dataset if a.get('label_confidence', 0) > 0])
|
| 82 |
+
unlabeled = total - labeled
|
| 83 |
+
|
| 84 |
+
by_label = {
|
| 85 |
+
'ORIGINAL': len([a for a in dataset if a.get('label') == 0]),
|
| 86 |
+
'SCAM': len([a for a in dataset if a.get('label') == 1]),
|
| 87 |
+
'REPLICA': len([a for a in dataset if a.get('label') == 2])
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
return jsonify({
|
| 91 |
+
'total': total,
|
| 92 |
+
'labeled': labeled,
|
| 93 |
+
'unlabeled': unlabeled,
|
| 94 |
+
'by_label': by_label,
|
| 95 |
+
'progress': round(labeled / total * 100, 1) if total > 0 else 0
|
| 96 |
+
})
|
| 97 |
+
|
| 98 |
+
if __name__ == '__main__':
|
| 99 |
+
app.run(debug=True, port=5000)
|
code/labeling_app/templates/labeling.html
ADDED
|
@@ -0,0 +1,660 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="pl">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>Labeling Aukcji Antyków</title>
|
| 7 |
+
<style>
|
| 8 |
+
* {
|
| 9 |
+
margin: 0;
|
| 10 |
+
padding: 0;
|
| 11 |
+
box-sizing: border-box;
|
| 12 |
+
}
|
| 13 |
+
|
| 14 |
+
body {
|
| 15 |
+
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
|
| 16 |
+
background: #f5f5f5;
|
| 17 |
+
padding: 20px;
|
| 18 |
+
}
|
| 19 |
+
|
| 20 |
+
.container {
|
| 21 |
+
max-width: 1200px;
|
| 22 |
+
margin: 0 auto;
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
header {
|
| 26 |
+
background: white;
|
| 27 |
+
padding: 20px;
|
| 28 |
+
border-radius: 8px;
|
| 29 |
+
margin-bottom: 20px;
|
| 30 |
+
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
| 31 |
+
display: flex;
|
| 32 |
+
justify-content: space-between;
|
| 33 |
+
align-items: center;
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
h1 {
|
| 37 |
+
font-size: 28px;
|
| 38 |
+
color: #333;
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
+
.progress-bar {
|
| 42 |
+
width: 300px;
|
| 43 |
+
height: 8px;
|
| 44 |
+
background: #e0e0e0;
|
| 45 |
+
border-radius: 4px;
|
| 46 |
+
overflow: hidden;
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
.progress-fill {
|
| 50 |
+
height: 100%;
|
| 51 |
+
background: #4CAF50;
|
| 52 |
+
transition: width 0.3s;
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
.progress-text {
|
| 56 |
+
font-size: 14px;
|
| 57 |
+
color: #666;
|
| 58 |
+
margin-top: 8px;
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
.main-content {
|
| 62 |
+
display: grid;
|
| 63 |
+
grid-template-columns: 2fr 1fr;
|
| 64 |
+
gap: 20px;
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
.auction-card {
|
| 68 |
+
background: white;
|
| 69 |
+
border-radius: 8px;
|
| 70 |
+
padding: 20px;
|
| 71 |
+
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
.auction-header {
|
| 75 |
+
margin-bottom: 20px;
|
| 76 |
+
border-bottom: 1px solid #e0e0e0;
|
| 77 |
+
padding-bottom: 15px;
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
.auction-title {
|
| 81 |
+
font-size: 20px;
|
| 82 |
+
font-weight: 600;
|
| 83 |
+
color: #333;
|
| 84 |
+
margin-bottom: 8px;
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
.auction-meta {
|
| 88 |
+
display: flex;
|
| 89 |
+
gap: 15px;
|
| 90 |
+
font-size: 12px;
|
| 91 |
+
color: #888;
|
| 92 |
+
}
|
| 93 |
+
|
| 94 |
+
.badge {
|
| 95 |
+
background: #f0f0f0;
|
| 96 |
+
padding: 4px 8px;
|
| 97 |
+
border-radius: 4px;
|
| 98 |
+
}
|
| 99 |
+
|
| 100 |
+
.images-carousel {
|
| 101 |
+
margin-bottom: 20px;
|
| 102 |
+
border-radius: 8px;
|
| 103 |
+
overflow: hidden;
|
| 104 |
+
background: #f9f9f9;
|
| 105 |
+
max-height: 400px;
|
| 106 |
+
display: flex;
|
| 107 |
+
align-items: center;
|
| 108 |
+
justify-content: center;
|
| 109 |
+
}
|
| 110 |
+
|
| 111 |
+
.images-carousel img {
|
| 112 |
+
max-width: 100%;
|
| 113 |
+
max-height: 400px;
|
| 114 |
+
object-fit: contain;
|
| 115 |
+
}
|
| 116 |
+
|
| 117 |
+
.description {
|
| 118 |
+
background: #f9f9f9;
|
| 119 |
+
padding: 15px;
|
| 120 |
+
border-radius: 6px;
|
| 121 |
+
margin-bottom: 20px;
|
| 122 |
+
line-height: 1.6;
|
| 123 |
+
color: #555;
|
| 124 |
+
font-size: 14px;
|
| 125 |
+
}
|
| 126 |
+
|
| 127 |
+
.parameters {
|
| 128 |
+
background: #f9f9f9;
|
| 129 |
+
padding: 15px;
|
| 130 |
+
border-radius: 6px;
|
| 131 |
+
margin-bottom: 20px;
|
| 132 |
+
}
|
| 133 |
+
|
| 134 |
+
.parameters h3 {
|
| 135 |
+
font-size: 14px;
|
| 136 |
+
color: #333;
|
| 137 |
+
margin-bottom: 10px;
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
+
.param-item {
|
| 141 |
+
display: flex;
|
| 142 |
+
justify-content: space-between;
|
| 143 |
+
padding: 8px 0;
|
| 144 |
+
border-bottom: 1px solid #e0e0e0;
|
| 145 |
+
font-size: 13px;
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
.param-item:last-child {
|
| 149 |
+
border-bottom: none;
|
| 150 |
+
}
|
| 151 |
+
|
| 152 |
+
.param-key {
|
| 153 |
+
color: #666;
|
| 154 |
+
font-weight: 500;
|
| 155 |
+
}
|
| 156 |
+
|
| 157 |
+
.param-val {
|
| 158 |
+
color: #333;
|
| 159 |
+
}
|
| 160 |
+
|
| 161 |
+
.sidebar {
|
| 162 |
+
display: flex;
|
| 163 |
+
flex-direction: column;
|
| 164 |
+
gap: 20px;
|
| 165 |
+
}
|
| 166 |
+
|
| 167 |
+
.rating-section {
|
| 168 |
+
background: white;
|
| 169 |
+
padding: 20px;
|
| 170 |
+
border-radius: 8px;
|
| 171 |
+
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
|
| 172 |
+
}
|
| 173 |
+
|
| 174 |
+
.rating-section h2 {
|
| 175 |
+
font-size: 16px;
|
| 176 |
+
margin-bottom: 15px;
|
| 177 |
+
color: #333;
|
| 178 |
+
}
|
| 179 |
+
|
| 180 |
+
.button-group {
|
| 181 |
+
display: flex;
|
| 182 |
+
flex-direction: column;
|
| 183 |
+
gap: 10px;
|
| 184 |
+
margin-bottom: 20px;
|
| 185 |
+
}
|
| 186 |
+
|
| 187 |
+
.btn {
|
| 188 |
+
padding: 12px 16px;
|
| 189 |
+
border: none;
|
| 190 |
+
border-radius: 6px;
|
| 191 |
+
font-size: 14px;
|
| 192 |
+
font-weight: 600;
|
| 193 |
+
cursor: pointer;
|
| 194 |
+
transition: all 0.2s;
|
| 195 |
+
}
|
| 196 |
+
|
| 197 |
+
.btn-original {
|
| 198 |
+
background: #4CAF50;
|
| 199 |
+
color: white;
|
| 200 |
+
}
|
| 201 |
+
|
| 202 |
+
.btn-original:hover {
|
| 203 |
+
background: #45a049;
|
| 204 |
+
transform: translateY(-2px);
|
| 205 |
+
}
|
| 206 |
+
|
| 207 |
+
.btn-original.active {
|
| 208 |
+
box-shadow: 0 4px 12px rgba(76, 175, 80, 0.4);
|
| 209 |
+
}
|
| 210 |
+
|
| 211 |
+
.btn-scam {
|
| 212 |
+
background: #f44336;
|
| 213 |
+
color: white;
|
| 214 |
+
}
|
| 215 |
+
|
| 216 |
+
.btn-scam:hover {
|
| 217 |
+
background: #da190b;
|
| 218 |
+
transform: translateY(-2px);
|
| 219 |
+
}
|
| 220 |
+
|
| 221 |
+
.btn-scam.active {
|
| 222 |
+
box-shadow: 0 4px 12px rgba(244, 67, 54, 0.4);
|
| 223 |
+
}
|
| 224 |
+
|
| 225 |
+
.btn-replica {
|
| 226 |
+
background: #FF9800;
|
| 227 |
+
color: white;
|
| 228 |
+
}
|
| 229 |
+
|
| 230 |
+
.btn-replica:hover {
|
| 231 |
+
background: #e68900;
|
| 232 |
+
transform: translateY(-2px);
|
| 233 |
+
}
|
| 234 |
+
|
| 235 |
+
.btn-replica.active {
|
| 236 |
+
box-shadow: 0 4px 12px rgba(255, 152, 0, 0.4);
|
| 237 |
+
}
|
| 238 |
+
|
| 239 |
+
.confidence-section {
|
| 240 |
+
background: #f9f9f9;
|
| 241 |
+
padding: 15px;
|
| 242 |
+
border-radius: 6px;
|
| 243 |
+
margin-bottom: 15px;
|
| 244 |
+
}
|
| 245 |
+
|
| 246 |
+
.confidence-label {
|
| 247 |
+
font-size: 13px;
|
| 248 |
+
color: #666;
|
| 249 |
+
margin-bottom: 10px;
|
| 250 |
+
display: flex;
|
| 251 |
+
justify-content: space-between;
|
| 252 |
+
}
|
| 253 |
+
|
| 254 |
+
.confidence-value {
|
| 255 |
+
font-weight: 600;
|
| 256 |
+
color: #333;
|
| 257 |
+
}
|
| 258 |
+
|
| 259 |
+
.confidence-slider {
|
| 260 |
+
width: 100%;
|
| 261 |
+
}
|
| 262 |
+
|
| 263 |
+
.btn-save {
|
| 264 |
+
background: #2196F3;
|
| 265 |
+
color: white;
|
| 266 |
+
width: 100%;
|
| 267 |
+
padding: 14px;
|
| 268 |
+
font-size: 15px;
|
| 269 |
+
font-weight: 600;
|
| 270 |
+
}
|
| 271 |
+
|
| 272 |
+
.btn-save:hover {
|
| 273 |
+
background: #0b7dda;
|
| 274 |
+
}
|
| 275 |
+
|
| 276 |
+
.btn-save:disabled {
|
| 277 |
+
background: #ccc;
|
| 278 |
+
cursor: not-allowed;
|
| 279 |
+
}
|
| 280 |
+
|
| 281 |
+
.stats-card {
|
| 282 |
+
background: white;
|
| 283 |
+
padding: 15px;
|
| 284 |
+
border-radius: 8px;
|
| 285 |
+
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
|
| 286 |
+
}
|
| 287 |
+
|
| 288 |
+
.stat-item {
|
| 289 |
+
display: flex;
|
| 290 |
+
justify-content: space-between;
|
| 291 |
+
padding: 10px 0;
|
| 292 |
+
border-bottom: 1px solid #e0e0e0;
|
| 293 |
+
font-size: 13px;
|
| 294 |
+
}
|
| 295 |
+
|
| 296 |
+
.stat-item:last-child {
|
| 297 |
+
border-bottom: none;
|
| 298 |
+
}
|
| 299 |
+
|
| 300 |
+
.stat-label {
|
| 301 |
+
color: #666;
|
| 302 |
+
}
|
| 303 |
+
|
| 304 |
+
.stat-value {
|
| 305 |
+
font-weight: 600;
|
| 306 |
+
color: #333;
|
| 307 |
+
}
|
| 308 |
+
|
| 309 |
+
.message {
|
| 310 |
+
padding: 15px;
|
| 311 |
+
border-radius: 6px;
|
| 312 |
+
margin-bottom: 20px;
|
| 313 |
+
text-align: center;
|
| 314 |
+
font-weight: 500;
|
| 315 |
+
}
|
| 316 |
+
|
| 317 |
+
.message.success {
|
| 318 |
+
background: #d4edda;
|
| 319 |
+
color: #155724;
|
| 320 |
+
}
|
| 321 |
+
|
| 322 |
+
.message.error {
|
| 323 |
+
background: #f8d7da;
|
| 324 |
+
color: #721c24;
|
| 325 |
+
}
|
| 326 |
+
.images-container {
|
| 327 |
+
margin-bottom: 20px;
|
| 328 |
+
}
|
| 329 |
+
|
| 330 |
+
.images-carousel {
|
| 331 |
+
margin-bottom: 10px;
|
| 332 |
+
border-radius: 8px;
|
| 333 |
+
overflow: hidden;
|
| 334 |
+
background: #f9f9f9;
|
| 335 |
+
max-height: 400px;
|
| 336 |
+
display: flex;
|
| 337 |
+
align-items: center;
|
| 338 |
+
justify-content: center;
|
| 339 |
+
}
|
| 340 |
+
|
| 341 |
+
.images-carousel img {
|
| 342 |
+
max-width: 100%;
|
| 343 |
+
max-height: 400px;
|
| 344 |
+
object-fit: contain;
|
| 345 |
+
}
|
| 346 |
+
|
| 347 |
+
.images-controls {
|
| 348 |
+
display: flex;
|
| 349 |
+
justify-content: space-between;
|
| 350 |
+
align-items: center;
|
| 351 |
+
margin-bottom: 10px;
|
| 352 |
+
}
|
| 353 |
+
|
| 354 |
+
.nav-btn {
|
| 355 |
+
background: #2196F3;
|
| 356 |
+
color: white;
|
| 357 |
+
border: none;
|
| 358 |
+
padding: 8px 12px;
|
| 359 |
+
border-radius: 4px;
|
| 360 |
+
cursor: pointer;
|
| 361 |
+
font-size: 13px;
|
| 362 |
+
}
|
| 363 |
+
|
| 364 |
+
.nav-btn:hover {
|
| 365 |
+
background: #0b7dda;
|
| 366 |
+
}
|
| 367 |
+
|
| 368 |
+
.nav-btn:disabled {
|
| 369 |
+
background: #ccc;
|
| 370 |
+
cursor: not-allowed;
|
| 371 |
+
}
|
| 372 |
+
|
| 373 |
+
.image-counter {
|
| 374 |
+
font-size: 12px;
|
| 375 |
+
color: #666;
|
| 376 |
+
font-weight: 600;
|
| 377 |
+
}
|
| 378 |
+
|
| 379 |
+
.thumbnails {
|
| 380 |
+
display: flex;
|
| 381 |
+
gap: 8px;
|
| 382 |
+
overflow-x: auto;
|
| 383 |
+
padding: 10px 0;
|
| 384 |
+
}
|
| 385 |
+
|
| 386 |
+
.thumbnail {
|
| 387 |
+
width: 60px;
|
| 388 |
+
height: 60px;
|
| 389 |
+
border: 2px solid #e0e0e0;
|
| 390 |
+
border-radius: 4px;
|
| 391 |
+
cursor: pointer;
|
| 392 |
+
overflow: hidden;
|
| 393 |
+
flex-shrink: 0;
|
| 394 |
+
}
|
| 395 |
+
|
| 396 |
+
.thumbnail img {
|
| 397 |
+
width: 100%;
|
| 398 |
+
height: 100%;
|
| 399 |
+
object-fit: cover;
|
| 400 |
+
}
|
| 401 |
+
|
| 402 |
+
.thumbnail.active {
|
| 403 |
+
border-color: #2196F3;
|
| 404 |
+
}
|
| 405 |
+
|
| 406 |
+
|
| 407 |
+
</style>
|
| 408 |
+
</head>
|
| 409 |
+
<body>
|
| 410 |
+
<div class="container">
|
| 411 |
+
<header>
|
| 412 |
+
<h1>🏺 Labeling Aukcji Antyków</h1>
|
| 413 |
+
<div>
|
| 414 |
+
<div class="progress-bar">
|
| 415 |
+
<div class="progress-fill" id="progressFill"></div>
|
| 416 |
+
</div>
|
| 417 |
+
<div class="progress-text">
|
| 418 |
+
<span id="progressText">0 / 0</span>
|
| 419 |
+
</div>
|
| 420 |
+
</div>
|
| 421 |
+
</header>
|
| 422 |
+
|
| 423 |
+
<div class="main-content">
|
| 424 |
+
<div>
|
| 425 |
+
<div id="message"></div>
|
| 426 |
+
|
| 427 |
+
<div class="auction-card">
|
| 428 |
+
<div class="auction-header">
|
| 429 |
+
<div class="auction-title" id="title">Ładowanie...</div>
|
| 430 |
+
<div class="auction-meta">
|
| 431 |
+
<span class="badge" id="platform">-</span>
|
| 432 |
+
<span class="badge" id="position">-</span>
|
| 433 |
+
</div>
|
| 434 |
+
</div>
|
| 435 |
+
|
| 436 |
+
<div class="images-container">
|
| 437 |
+
<div class="images-carousel" id="imagesCarousel">
|
| 438 |
+
<img id="currentImage" src="" alt="Zdjęcie aukcji">
|
| 439 |
+
</div>
|
| 440 |
+
<div class="images-controls">
|
| 441 |
+
<button id="prevBtn" class="nav-btn">← Poprzednie</button>
|
| 442 |
+
<span id="imageCounter" class="image-counter">1 / 1</span>
|
| 443 |
+
<button id="nextBtn" class="nav-btn">Następne →</button>
|
| 444 |
+
</div>
|
| 445 |
+
<div class="thumbnails" id="thumbnails"></div>
|
| 446 |
+
</div>
|
| 447 |
+
|
| 448 |
+
<div class="description" id="description">-</div>
|
| 449 |
+
|
| 450 |
+
<div class="parameters" id="parametersDiv">
|
| 451 |
+
<h3>Parametry:</h3>
|
| 452 |
+
<div id="paramsList"></div>
|
| 453 |
+
</div>
|
| 454 |
+
|
| 455 |
+
<a id="auctionLink" target="_blank" style="color: #2196F3; text-decoration: none;">
|
| 456 |
+
→ Otwórz aukcję
|
| 457 |
+
</a>
|
| 458 |
+
</div>
|
| 459 |
+
</div>
|
| 460 |
+
|
| 461 |
+
<div class="sidebar">
|
| 462 |
+
<div class="rating-section">
|
| 463 |
+
<h2>Ocena autentyczności</h2>
|
| 464 |
+
|
| 465 |
+
<div class="button-group">
|
| 466 |
+
<button class="btn btn-original" data-label="0">✓ ORYGINAŁ</button>
|
| 467 |
+
<button class="btn btn-scam" data-label="1">✗ SCAM</button>
|
| 468 |
+
<button class="btn btn-replica" data-label="2">⚙ REPLIKA</button>
|
| 469 |
+
</div>
|
| 470 |
+
|
| 471 |
+
<div class="confidence-section">
|
| 472 |
+
<div class="confidence-label">
|
| 473 |
+
<span>Pewność oceny:</span>
|
| 474 |
+
<span class="confidence-value" id="confidenceValue">0%</span>
|
| 475 |
+
</div>
|
| 476 |
+
<input type="range" min="0" max="5" value="0" class="confidence-slider" id="confidenceSlider">
|
| 477 |
+
<div style="display: flex; justify-content: space-between; font-size: 11px; color: #999; margin-top: 5px;">
|
| 478 |
+
<span>Niska</span>
|
| 479 |
+
<span>Bardzo wysoka</span>
|
| 480 |
+
</div>
|
| 481 |
+
</div>
|
| 482 |
+
|
| 483 |
+
<button class="btn btn-save" id="saveBtn" disabled>💾 Zapisz i Dalej</button>
|
| 484 |
+
</div>
|
| 485 |
+
|
| 486 |
+
<div class="stats-card">
|
| 487 |
+
<h3 style="margin-bottom: 15px; color: #333;">Statystyki</h3>
|
| 488 |
+
<div class="stat-item">
|
| 489 |
+
<span class="stat-label">Razem:</span>
|
| 490 |
+
<span class="stat-value" id="statTotal">0</span>
|
| 491 |
+
</div>
|
| 492 |
+
<div class="stat-item">
|
| 493 |
+
<span class="stat-label">Etykietowane:</span>
|
| 494 |
+
<span class="stat-value" id="statLabeled">0</span>
|
| 495 |
+
</div>
|
| 496 |
+
<div class="stat-item">
|
| 497 |
+
<span class="stat-label">Do zrobienia:</span>
|
| 498 |
+
<span class="stat-value" id="statUnlabeled">0</span>
|
| 499 |
+
</div>
|
| 500 |
+
<div class="stat-item">
|
| 501 |
+
<span class="stat-label">🏛 Oryginały:</span>
|
| 502 |
+
<span class="stat-value" id="statOriginal">0</span>
|
| 503 |
+
</div>
|
| 504 |
+
<div class="stat-item">
|
| 505 |
+
<span class="stat-label">🚫 Scamy:</span>
|
| 506 |
+
<span class="stat-value" id="statScam">0</span>
|
| 507 |
+
</div>
|
| 508 |
+
<div class="stat-item">
|
| 509 |
+
<span class="stat-label">⚙ Repliki:</span>
|
| 510 |
+
<span class="stat-value" id="statReplica">0</span>
|
| 511 |
+
</div>
|
| 512 |
+
</div>
|
| 513 |
+
</div>
|
| 514 |
+
</div>
|
| 515 |
+
</div>
|
| 516 |
+
|
| 517 |
+
<script>
|
| 518 |
+
let currentAuction = null;
|
| 519 |
+
let selectedLabel = null;
|
| 520 |
+
let selectedConfidence = 0;
|
| 521 |
+
let currentImageIndex = 0;
|
| 522 |
+
|
| 523 |
+
async function loadNextAuction() {
|
| 524 |
+
const res = await fetch('/api/next_unlabeled');
|
| 525 |
+
const data = await res.json();
|
| 526 |
+
|
| 527 |
+
if (data.error) {
|
| 528 |
+
showMessage(data.error, 'success');
|
| 529 |
+
document.querySelector('.auction-card').style.display = 'none';
|
| 530 |
+
return;
|
| 531 |
+
}
|
| 532 |
+
|
| 533 |
+
currentAuction = data;
|
| 534 |
+
currentImageIndex = 0;
|
| 535 |
+
selectedLabel = null;
|
| 536 |
+
selectedConfidence = 0;
|
| 537 |
+
|
| 538 |
+
// Pokaż dane
|
| 539 |
+
document.getElementById('title').textContent = data.title;
|
| 540 |
+
document.getElementById('platform').textContent = data.platform.toUpperCase();
|
| 541 |
+
document.getElementById('position').textContent = `${data.current} / ${data.total}`;
|
| 542 |
+
document.getElementById('description').textContent = data.description;
|
| 543 |
+
document.getElementById('auctionLink').href = data.link;
|
| 544 |
+
|
| 545 |
+
// Parametry
|
| 546 |
+
const paramsList = document.getElementById('paramsList');
|
| 547 |
+
paramsList.innerHTML = '';
|
| 548 |
+
Object.entries(data.parameters).forEach(([key, val]) => {
|
| 549 |
+
const item = document.createElement('div');
|
| 550 |
+
item.className = 'param-item';
|
| 551 |
+
item.innerHTML = `<span class="param-key">${key}:</span><span class="param-val">${val}</span>`;
|
| 552 |
+
paramsList.appendChild(item);
|
| 553 |
+
});
|
| 554 |
+
|
| 555 |
+
// Thumbnails
|
| 556 |
+
const thumbnails = document.getElementById('thumbnails');
|
| 557 |
+
thumbnails.innerHTML = '';
|
| 558 |
+
data.images.forEach((imgUrl, idx) => {
|
| 559 |
+
const thumb = document.createElement('div');
|
| 560 |
+
thumb.className = 'thumbnail' + (idx === 0 ? ' active' : '');
|
| 561 |
+
thumb.innerHTML = `<img src="${imgUrl}" alt="Thumbnail ${idx+1}">`;
|
| 562 |
+
thumb.addEventListener('click', () => showImage(idx));
|
| 563 |
+
thumbnails.appendChild(thumb);
|
| 564 |
+
});
|
| 565 |
+
|
| 566 |
+
showImage(0);
|
| 567 |
+
updateSaveButton();
|
| 568 |
+
updateStats();
|
| 569 |
+
}
|
| 570 |
+
|
| 571 |
+
function showImage(index) {
|
| 572 |
+
if (index < 0 || index >= currentAuction.images.length) return;
|
| 573 |
+
|
| 574 |
+
currentImageIndex = index;
|
| 575 |
+
document.getElementById('currentImage').src = currentAuction.images[index];
|
| 576 |
+
document.getElementById('imageCounter').textContent = `${index + 1} / ${currentAuction.images.length}`;
|
| 577 |
+
|
| 578 |
+
// Zaznacz thumbnail
|
| 579 |
+
document.querySelectorAll('.thumbnail').forEach((t, i) => {
|
| 580 |
+
t.classList.toggle('active', i === index);
|
| 581 |
+
});
|
| 582 |
+
|
| 583 |
+
// Włącz/wyłącz przyciski
|
| 584 |
+
document.getElementById('prevBtn').disabled = index === 0;
|
| 585 |
+
document.getElementById('nextBtn').disabled = index === currentAuction.images.length - 1;
|
| 586 |
+
}
|
| 587 |
+
|
| 588 |
+
document.getElementById('prevBtn').addEventListener('click', () => {
|
| 589 |
+
showImage(currentImageIndex - 1);
|
| 590 |
+
});
|
| 591 |
+
|
| 592 |
+
document.getElementById('nextBtn').addEventListener('click', () => {
|
| 593 |
+
showImage(currentImageIndex + 1);
|
| 594 |
+
});
|
| 595 |
+
|
| 596 |
+
document.querySelectorAll('.btn-original, .btn-scam, .btn-replica').forEach(btn => {
|
| 597 |
+
btn.addEventListener('click', function() {
|
| 598 |
+
selectedLabel = parseInt(this.dataset.label);
|
| 599 |
+
document.querySelectorAll('.btn-original, .btn-scam, .btn-replica').forEach(b => b.classList.remove('active'));
|
| 600 |
+
this.classList.add('active');
|
| 601 |
+
updateSaveButton();
|
| 602 |
+
});
|
| 603 |
+
});
|
| 604 |
+
|
| 605 |
+
document.getElementById('confidenceSlider').addEventListener('input', function() {
|
| 606 |
+
selectedConfidence = parseInt(this.value);
|
| 607 |
+
const labels = ['0%', '20%', '40%', '60%', '80%', '100%'];
|
| 608 |
+
document.getElementById('confidenceValue').textContent = labels[selectedConfidence];
|
| 609 |
+
updateSaveButton();
|
| 610 |
+
});
|
| 611 |
+
|
| 612 |
+
function updateSaveButton() {
|
| 613 |
+
const canSave = selectedLabel !== null && selectedConfidence > 0;
|
| 614 |
+
document.getElementById('saveBtn').disabled = !canSave;
|
| 615 |
+
}
|
| 616 |
+
|
| 617 |
+
document.getElementById('saveBtn').addEventListener('click', async function() {
|
| 618 |
+
const res = await fetch('/api/save_label', {
|
| 619 |
+
method: 'POST',
|
| 620 |
+
headers: {'Content-Type': 'application/json'},
|
| 621 |
+
body: JSON.stringify({
|
| 622 |
+
auction_index: currentAuction.index,
|
| 623 |
+
label: selectedLabel,
|
| 624 |
+
confidence: selectedConfidence
|
| 625 |
+
})
|
| 626 |
+
});
|
| 627 |
+
|
| 628 |
+
if (res.ok) {
|
| 629 |
+
loadNextAuction();
|
| 630 |
+
}
|
| 631 |
+
});
|
| 632 |
+
|
| 633 |
+
async function updateStats() {
|
| 634 |
+
const res = await fetch('/api/stats');
|
| 635 |
+
const stats = await res.json();
|
| 636 |
+
|
| 637 |
+
document.getElementById('progressText').textContent = `${stats.labeled} / ${stats.total}`;
|
| 638 |
+
document.getElementById('progressFill').style.width = stats.progress + '%';
|
| 639 |
+
|
| 640 |
+
document.getElementById('statTotal').textContent = stats.total;
|
| 641 |
+
document.getElementById('statLabeled').textContent = stats.labeled;
|
| 642 |
+
document.getElementById('statUnlabeled').textContent = stats.unlabeled;
|
| 643 |
+
document.getElementById('statOriginal').textContent = stats.by_label.ORIGINAL;
|
| 644 |
+
document.getElementById('statScam').textContent = stats.by_label.SCAM;
|
| 645 |
+
document.getElementById('statReplica').textContent = stats.by_label.REPLICA;
|
| 646 |
+
}
|
| 647 |
+
|
| 648 |
+
function showMessage(text, type) {
|
| 649 |
+
const msgDiv = document.getElementById('message');
|
| 650 |
+
msgDiv.textContent = text;
|
| 651 |
+
msgDiv.className = `message ${type}`;
|
| 652 |
+
msgDiv.style.display = 'block';
|
| 653 |
+
setTimeout(() => msgDiv.style.display = 'none', 3000);
|
| 654 |
+
}
|
| 655 |
+
|
| 656 |
+
loadNextAuction();
|
| 657 |
+
|
| 658 |
+
</script>
|
| 659 |
+
</body>
|
| 660 |
+
</html>
|
code/model.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# model.py
|
| 2 |
+
import torch
|
| 3 |
+
import torch.nn as nn
|
| 4 |
+
from transformers import DistilBertTokenizer, DistilBertModel
|
| 5 |
+
from torchvision.models import efficientnet_b0
|
| 6 |
+
|
| 7 |
+
class AuctionAuthenticityModel(nn.Module):
|
| 8 |
+
def __init__(self, num_classes=3, device='cpu'): # 3 klasy!
|
| 9 |
+
super().__init__()
|
| 10 |
+
self.device = device
|
| 11 |
+
|
| 12 |
+
# Vision
|
| 13 |
+
self.vision_model = efficientnet_b0(pretrained=True)
|
| 14 |
+
self.vision_model.classifier = nn.Identity()
|
| 15 |
+
vision_out_dim = 1280
|
| 16 |
+
|
| 17 |
+
# Text
|
| 18 |
+
self.text_model = DistilBertModel.from_pretrained(
|
| 19 |
+
'distilbert-base-multilingual-cased'
|
| 20 |
+
)
|
| 21 |
+
text_out_dim = 768
|
| 22 |
+
|
| 23 |
+
self.tokenizer = DistilBertTokenizer.from_pretrained(
|
| 24 |
+
'distilbert-base-multilingual-cased'
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
# Fusion (bez BatchNorm!)
|
| 28 |
+
hidden_dim = 256
|
| 29 |
+
self.fusion = nn.Sequential(
|
| 30 |
+
nn.Linear(vision_out_dim + text_out_dim, hidden_dim),
|
| 31 |
+
nn.ReLU(),
|
| 32 |
+
nn.Dropout(0.3),
|
| 33 |
+
nn.Linear(hidden_dim, 128),
|
| 34 |
+
nn.ReLU(),
|
| 35 |
+
nn.Dropout(0.2),
|
| 36 |
+
nn.Linear(128, num_classes)
|
| 37 |
+
)
|
| 38 |
+
|
| 39 |
+
def forward(self, images, texts):
|
| 40 |
+
vision_features = self.vision_model(images)
|
| 41 |
+
tokens = self.tokenizer(
|
| 42 |
+
texts, padding=True, truncation=True, max_length=512, return_tensors='pt'
|
| 43 |
+
).to(self.device)
|
| 44 |
+
text_outputs = self.text_model(**tokens)
|
| 45 |
+
text_features = text_outputs.last_hidden_state[:, 0, :]
|
| 46 |
+
|
| 47 |
+
combined = torch.cat([vision_features, text_features], dim=1)
|
| 48 |
+
logits = self.fusion(combined)
|
| 49 |
+
return logits
|
| 50 |
+
|
| 51 |
+
def count_parameters(self):
|
| 52 |
+
return sum(p.numel() for p in self.parameters() if p.requires_grad)
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
if __name__ == '__main__':
|
| 56 |
+
print("Testowanie modelu...")
|
| 57 |
+
|
| 58 |
+
device = torch.device('cpu')
|
| 59 |
+
model = AuctionAuthenticityModel(device=device).to(device)
|
| 60 |
+
|
| 61 |
+
print(f"✓ Model stworzony")
|
| 62 |
+
print(f" - Parametrów: {model.count_parameters():,}")
|
| 63 |
+
|
| 64 |
+
# Dummy test
|
| 65 |
+
dummy_img = torch.randn(2, 3, 224, 224).to(device)
|
| 66 |
+
dummy_texts = ["Silver spoon antique", "Polish silverware 19th century"]
|
| 67 |
+
|
| 68 |
+
with torch.no_grad():
|
| 69 |
+
output = model(dummy_img, dummy_texts)
|
| 70 |
+
|
| 71 |
+
print(f"✓ Forward pass: {output.shape}")
|
| 72 |
+
print(f" - Output: {output}")
|
| 73 |
+
|
| 74 |
+
# Estimate model size
|
| 75 |
+
print(f"\n📊 Rozmiar modelu:")
|
| 76 |
+
torch.save(model.state_dict(), 'temp_model.pt')
|
| 77 |
+
import os
|
| 78 |
+
size_mb = os.path.getsize('temp_model.pt') / (1024*1024)
|
| 79 |
+
print(f" - {size_mb:.1f} MB")
|
| 80 |
+
os.remove('temp_model.pt')
|
code/parse_auction_data.py
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
from typing import Dict, List
|
| 5 |
+
|
| 6 |
+
def parse_info_txt(info_path: str) -> Dict:
|
| 7 |
+
"""
|
| 8 |
+
Parsuje info.txt z aukcji
|
| 9 |
+
"""
|
| 10 |
+
with open(info_path, 'r', encoding='utf-8') as f:
|
| 11 |
+
content = f.read()
|
| 12 |
+
|
| 13 |
+
metadata = {}
|
| 14 |
+
|
| 15 |
+
# TITLE
|
| 16 |
+
if 'TITLE:' in content:
|
| 17 |
+
title_start = content.find('TITLE:') + len('TITLE:')
|
| 18 |
+
title_end = content.find('\n', title_start)
|
| 19 |
+
metadata['title'] = content[title_start:title_end].strip()
|
| 20 |
+
else:
|
| 21 |
+
metadata['title'] = 'Unknown'
|
| 22 |
+
|
| 23 |
+
# LINK
|
| 24 |
+
if 'LINK:' in content:
|
| 25 |
+
link_start = content.find('LINK:') + len('LINK:')
|
| 26 |
+
link_end = content.find('\n', link_start)
|
| 27 |
+
metadata['link'] = content[link_start:link_end].strip()
|
| 28 |
+
else:
|
| 29 |
+
metadata['link'] = ''
|
| 30 |
+
|
| 31 |
+
# PARAMETERS
|
| 32 |
+
metadata['parameters'] = {}
|
| 33 |
+
if 'PARAMETERS:' in content:
|
| 34 |
+
params_start = content.find('PARAMETERS:') + len('PARAMETERS:')
|
| 35 |
+
params_end = content.find('----', params_start)
|
| 36 |
+
if params_end == -1:
|
| 37 |
+
params_end = content.find('DESCRIPTION:', params_start)
|
| 38 |
+
|
| 39 |
+
params_text = content[params_start:params_end]
|
| 40 |
+
|
| 41 |
+
for line in params_text.split('\n'):
|
| 42 |
+
if line.strip().startswith('*'):
|
| 43 |
+
line_clean = line.strip()[2:]
|
| 44 |
+
if ':' in line_clean:
|
| 45 |
+
key, value = line_clean.split(':', 1)
|
| 46 |
+
metadata['parameters'][key.strip()] = value.strip()
|
| 47 |
+
|
| 48 |
+
# DESCRIPTION
|
| 49 |
+
if 'DESCRIPTION:' in content:
|
| 50 |
+
desc_start = content.find('DESCRIPTION:') + len('DESCRIPTION:')
|
| 51 |
+
metadata['description'] = content[desc_start:].strip()
|
| 52 |
+
else:
|
| 53 |
+
metadata['description'] = ''
|
| 54 |
+
|
| 55 |
+
return metadata
|
| 56 |
+
|
| 57 |
+
def organize_dataset(root_dir: str, output_json: str = 'dataset/dataset.json'):
|
| 58 |
+
"""
|
| 59 |
+
Skanuje strukturę i tworzy dataset.json
|
| 60 |
+
"""
|
| 61 |
+
root = Path(root_dir)
|
| 62 |
+
dataset = []
|
| 63 |
+
|
| 64 |
+
for platform_dir in sorted(root.iterdir()):
|
| 65 |
+
if not platform_dir.is_dir():
|
| 66 |
+
continue
|
| 67 |
+
|
| 68 |
+
platform_name = platform_dir.name
|
| 69 |
+
print(f"\n📁 Platform: {platform_name}")
|
| 70 |
+
|
| 71 |
+
for auction_dir in sorted(platform_dir.iterdir()):
|
| 72 |
+
if not auction_dir.is_dir():
|
| 73 |
+
continue
|
| 74 |
+
|
| 75 |
+
auction_id = auction_dir.name
|
| 76 |
+
info_txt = auction_dir / 'info.txt'
|
| 77 |
+
|
| 78 |
+
if not info_txt.exists():
|
| 79 |
+
print(f" ⚠️ {auction_id} - brak info.txt")
|
| 80 |
+
continue
|
| 81 |
+
|
| 82 |
+
try:
|
| 83 |
+
metadata = parse_info_txt(str(info_txt))
|
| 84 |
+
except Exception as e:
|
| 85 |
+
print(f" ❌ {auction_id} - błąd: {e}")
|
| 86 |
+
continue
|
| 87 |
+
|
| 88 |
+
# Zbierz zdjęcia
|
| 89 |
+
images = sorted([
|
| 90 |
+
img.name for img in auction_dir.glob('*.jpg')
|
| 91 |
+
])
|
| 92 |
+
images += sorted([
|
| 93 |
+
img.name for img in auction_dir.glob('*.png')
|
| 94 |
+
])
|
| 95 |
+
|
| 96 |
+
if not images:
|
| 97 |
+
print(f" ⚠️ {auction_id} - brak zdjęć")
|
| 98 |
+
continue
|
| 99 |
+
|
| 100 |
+
entry = {
|
| 101 |
+
'id': f"{platform_name}_{auction_id}",
|
| 102 |
+
'platform': platform_name,
|
| 103 |
+
'folder_path': str(auction_dir.relative_to(root)),
|
| 104 |
+
'image_count': len(images),
|
| 105 |
+
'images': images,
|
| 106 |
+
'label': 0, # Default: authentic
|
| 107 |
+
'label_confidence': 0.0, # Do ręcznego wypełnienia
|
| 108 |
+
**metadata
|
| 109 |
+
}
|
| 110 |
+
|
| 111 |
+
dataset.append(entry)
|
| 112 |
+
print(f" ✓ {auction_id} ({len(images)} zdjęć)")
|
| 113 |
+
|
| 114 |
+
# Zapis
|
| 115 |
+
os.makedirs(os.path.dirname(output_json), exist_ok=True)
|
| 116 |
+
with open(output_json, 'w', encoding='utf-8') as f:
|
| 117 |
+
json.dump(dataset, f, ensure_ascii=False, indent=2)
|
| 118 |
+
|
| 119 |
+
print(f"\n✅ Dataset wczytany: {len(dataset)} aukcji")
|
| 120 |
+
print(f"💾 Zapisano: {output_json}")
|
| 121 |
+
|
| 122 |
+
return dataset
|
| 123 |
+
|
| 124 |
+
if __name__ == '__main__':
|
| 125 |
+
dataset = organize_dataset('dataset/raw_data')
|
| 126 |
+
|
| 127 |
+
if dataset:
|
| 128 |
+
print("\n" + "="*60)
|
| 129 |
+
print("PRZYKŁAD PIERWSZEJ AUKCJI:")
|
| 130 |
+
print("="*60)
|
| 131 |
+
print(json.dumps(dataset[0], indent=2, ensure_ascii=False)[:800])
|
code/train.py
ADDED
|
@@ -0,0 +1,156 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torch.nn.functional as F
|
| 3 |
+
from torch.optim import AdamW
|
| 4 |
+
from torch.utils.data import DataLoader, random_split
|
| 5 |
+
from tqdm import tqdm
|
| 6 |
+
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
|
| 7 |
+
from model import AuctionAuthenticityModel
|
| 8 |
+
from dataset_loader import AuctionDatasetFromJSON, get_transforms
|
| 9 |
+
import json
|
| 10 |
+
|
| 11 |
+
def train_epoch(model, loader, optimizer, device, epoch):
|
| 12 |
+
model.train()
|
| 13 |
+
total_loss = 0
|
| 14 |
+
progress_bar = tqdm(loader, desc=f"Epoch {epoch} [TRAIN]")
|
| 15 |
+
|
| 16 |
+
for batch in progress_bar:
|
| 17 |
+
images = batch['image'].to(device)
|
| 18 |
+
texts = batch['text']
|
| 19 |
+
labels = batch['label'].to(device)
|
| 20 |
+
|
| 21 |
+
optimizer.zero_grad()
|
| 22 |
+
|
| 23 |
+
logits = model(images, texts)
|
| 24 |
+
loss = F.cross_entropy(logits, labels)
|
| 25 |
+
|
| 26 |
+
loss.backward()
|
| 27 |
+
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
|
| 28 |
+
optimizer.step()
|
| 29 |
+
|
| 30 |
+
total_loss += loss.item()
|
| 31 |
+
progress_bar.set_postfix(loss=f'{loss.item():.4f}')
|
| 32 |
+
|
| 33 |
+
return total_loss / len(loader)
|
| 34 |
+
|
| 35 |
+
def validate(model, loader, device, epoch):
|
| 36 |
+
model.eval()
|
| 37 |
+
all_preds = []
|
| 38 |
+
all_labels = []
|
| 39 |
+
total_loss = 0
|
| 40 |
+
|
| 41 |
+
with torch.no_grad():
|
| 42 |
+
progress_bar = tqdm(loader, desc=f"Epoch {epoch} [VAL]")
|
| 43 |
+
for batch in progress_bar:
|
| 44 |
+
images = batch['image'].to(device)
|
| 45 |
+
texts = batch['text']
|
| 46 |
+
labels = batch['label'].to(device)
|
| 47 |
+
|
| 48 |
+
logits = model(images, texts)
|
| 49 |
+
loss = F.cross_entropy(logits, labels)
|
| 50 |
+
total_loss += loss.item()
|
| 51 |
+
|
| 52 |
+
preds = torch.argmax(logits, dim=1).cpu().numpy()
|
| 53 |
+
|
| 54 |
+
all_preds.extend(preds)
|
| 55 |
+
all_labels.extend(labels.cpu().numpy())
|
| 56 |
+
|
| 57 |
+
acc = accuracy_score(all_labels, all_preds)
|
| 58 |
+
prec = precision_score(all_labels, all_preds, zero_division=0)
|
| 59 |
+
rec = recall_score(all_labels, all_preds, zero_division=0)
|
| 60 |
+
f1 = f1_score(all_labels, all_preds, zero_division=0)
|
| 61 |
+
|
| 62 |
+
return {
|
| 63 |
+
'loss': total_loss / len(loader),
|
| 64 |
+
'accuracy': acc,
|
| 65 |
+
'precision': prec,
|
| 66 |
+
'recall': rec,
|
| 67 |
+
'f1': f1
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
def main():
|
| 71 |
+
# Konfiguracja
|
| 72 |
+
BATCH_SIZE = 4
|
| 73 |
+
EPOCHS = 5
|
| 74 |
+
LEARNING_RATE = 2e-5
|
| 75 |
+
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
| 76 |
+
|
| 77 |
+
print(f"🖥️ Device: {DEVICE}")
|
| 78 |
+
print(f"📦 Batch size: {BATCH_SIZE}")
|
| 79 |
+
print(f"📚 Epochs: {EPOCHS}")
|
| 80 |
+
|
| 81 |
+
# Załaduj dataset
|
| 82 |
+
print("\n📥 Ładowanie datasetu...")
|
| 83 |
+
dataset = AuctionDatasetFromJSON(
|
| 84 |
+
json_path='../dataset/dataset.json',
|
| 85 |
+
root_dir='../dataset/raw_data',
|
| 86 |
+
transform=get_transforms()
|
| 87 |
+
)
|
| 88 |
+
|
| 89 |
+
print(f"✓ {len(dataset)} aukcji załadowanych")
|
| 90 |
+
|
| 91 |
+
# Split: 80% train, 20% val
|
| 92 |
+
train_size = int(0.8 * len(dataset))
|
| 93 |
+
val_size = len(dataset) - train_size
|
| 94 |
+
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
|
| 95 |
+
|
| 96 |
+
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
|
| 97 |
+
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)
|
| 98 |
+
|
| 99 |
+
print(f" - Train: {len(train_dataset)}")
|
| 100 |
+
print(f" - Val: {len(val_dataset)}")
|
| 101 |
+
|
| 102 |
+
# Model
|
| 103 |
+
print("\n🧠 Inicjalizacja modelu...")
|
| 104 |
+
model = AuctionAuthenticityModel(device=DEVICE).to(DEVICE)
|
| 105 |
+
print(f"✓ Model gotowy ({model.count_parameters():,} parametrów)")
|
| 106 |
+
|
| 107 |
+
# Optimizer
|
| 108 |
+
optimizer = AdamW(model.parameters(), lr=LEARNING_RATE)
|
| 109 |
+
|
| 110 |
+
# Training loop
|
| 111 |
+
print("\n🚀 Rozpoczynam trening...\n")
|
| 112 |
+
|
| 113 |
+
history = {
|
| 114 |
+
'train_loss': [],
|
| 115 |
+
'val_loss': [],
|
| 116 |
+
'val_accuracy': [],
|
| 117 |
+
'val_f1': []
|
| 118 |
+
}
|
| 119 |
+
|
| 120 |
+
for epoch in range(EPOCHS):
|
| 121 |
+
# Train
|
| 122 |
+
train_loss = train_epoch(model, train_loader, optimizer, DEVICE, epoch+1)
|
| 123 |
+
|
| 124 |
+
# Validate
|
| 125 |
+
val_metrics = validate(model, val_loader, DEVICE, epoch+1)
|
| 126 |
+
|
| 127 |
+
# Log
|
| 128 |
+
history['train_loss'].append(train_loss)
|
| 129 |
+
history['val_loss'].append(val_metrics['loss'])
|
| 130 |
+
history['val_accuracy'].append(val_metrics['accuracy'])
|
| 131 |
+
history['val_f1'].append(val_metrics['f1'])
|
| 132 |
+
|
| 133 |
+
print(f"\n{'='*60}")
|
| 134 |
+
print(f"Epoch {epoch+1}/{EPOCHS}")
|
| 135 |
+
print(f" Train Loss: {train_loss:.4f}")
|
| 136 |
+
print(f" Val Loss: {val_metrics['loss']:.4f}")
|
| 137 |
+
print(f" Val Acc: {val_metrics['accuracy']:.4f}")
|
| 138 |
+
print(f" Val Prec: {val_metrics['precision']:.4f}")
|
| 139 |
+
print(f" Val Rec: {val_metrics['recall']:.4f}")
|
| 140 |
+
print(f" Val F1: {val_metrics['f1']:.4f}")
|
| 141 |
+
print(f"{'='*60}\n")
|
| 142 |
+
|
| 143 |
+
# Zapis modelu
|
| 144 |
+
print("\n💾 Zapis modelu...")
|
| 145 |
+
torch.save(model.state_dict(), '../weights/auction_model.pt')
|
| 146 |
+
print("✓ Zapisano: weights/auction_model.pt")
|
| 147 |
+
|
| 148 |
+
# Zapis historii
|
| 149 |
+
with open('../weights/training_history.json', 'w') as f:
|
| 150 |
+
json.dump(history, f, indent=2)
|
| 151 |
+
print("✓ Zapisano: weights/training_history.json")
|
| 152 |
+
|
| 153 |
+
print("\n✅ Trening ukończony!")
|
| 154 |
+
|
| 155 |
+
if __name__ == '__main__':
|
| 156 |
+
main()
|
code/web_scraper_allegro.py
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# scrape_allegro_offer.py
|
| 2 |
+
import undetected_chromedriver as uc
|
| 3 |
+
from selenium.webdriver.common.by import By
|
| 4 |
+
import time
|
| 5 |
+
import requests
|
| 6 |
+
|
| 7 |
+
def sanitize_folder_name(text): # helper function
|
| 8 |
+
polish_chars = {
|
| 9 |
+
"ą": "a", "ć": "c", "ę": "e", "ł": "l", "ń": "n",
|
| 10 |
+
"ó": "o", "ś": "s", "ź": "z", "ż": "z"
|
| 11 |
+
}
|
| 12 |
+
text = text.lower()
|
| 13 |
+
result = ""
|
| 14 |
+
for char in text:
|
| 15 |
+
if char in polish_chars:
|
| 16 |
+
result += polish_chars[char]
|
| 17 |
+
elif char.isalnum():
|
| 18 |
+
result += char
|
| 19 |
+
else:
|
| 20 |
+
result += "_"
|
| 21 |
+
while "__" in result:
|
| 22 |
+
result = result.replace("__", "_")
|
| 23 |
+
return result.strip("_")
|
| 24 |
+
|
| 25 |
+
def scrape_allegro_offer(url: str):
|
| 26 |
+
"""Zwraca dane aukcji bez zapisywania na dysk"""
|
| 27 |
+
options = uc.ChromeOptions()
|
| 28 |
+
options.add_argument("--window-position=-3000,0")
|
| 29 |
+
driver = uc.Chrome(use_subprocess=True, options=options)
|
| 30 |
+
|
| 31 |
+
try:
|
| 32 |
+
print(f"🔍 Allegro: {url}")
|
| 33 |
+
driver.get(url)
|
| 34 |
+
time.sleep(10)
|
| 35 |
+
|
| 36 |
+
# TITLE
|
| 37 |
+
try:
|
| 38 |
+
title_element = driver.find_element(By.TAG_NAME, "h1")
|
| 39 |
+
title_str = title_element.text.strip()
|
| 40 |
+
except:
|
| 41 |
+
title_str = "untitled"
|
| 42 |
+
|
| 43 |
+
# PARAMETERS
|
| 44 |
+
parameter_list = []
|
| 45 |
+
try:
|
| 46 |
+
rows = driver.find_elements(By.CSS_SELECTOR, "tr")
|
| 47 |
+
for row in rows:
|
| 48 |
+
cells = row.find_elements(By.TAG_NAME, "td")
|
| 49 |
+
if len(cells) == 2:
|
| 50 |
+
name = cells[0].text.strip()
|
| 51 |
+
value = cells[1].text.strip()
|
| 52 |
+
if name and value:
|
| 53 |
+
parameter_list.append(f"{name}: {value}")
|
| 54 |
+
except:
|
| 55 |
+
pass
|
| 56 |
+
|
| 57 |
+
# DESCRIPTION
|
| 58 |
+
try:
|
| 59 |
+
description_element = driver.find_element(By.CSS_SELECTOR, "div._0d3bd_am0a-")
|
| 60 |
+
description_content = description_element.text
|
| 61 |
+
except:
|
| 62 |
+
description_content = "No description"
|
| 63 |
+
|
| 64 |
+
# IMAGES
|
| 65 |
+
unique_links = set()
|
| 66 |
+
try:
|
| 67 |
+
images = driver.find_elements(By.CSS_SELECTOR, ".msub_80.m9tr_5r._07951_IOf8s")
|
| 68 |
+
allowed_sizes = ["/s128/", "/s360/", "/s512/", "/s720/", "/s1024/", "/s1440/", "/original/"]
|
| 69 |
+
for img in images:
|
| 70 |
+
src = img.get_attribute("src")
|
| 71 |
+
if src and "allegroimg.com" in src:
|
| 72 |
+
if not any(size in src for size in allowed_sizes):
|
| 73 |
+
continue
|
| 74 |
+
for size in allowed_sizes:
|
| 75 |
+
src = src.replace(size, "/original/")
|
| 76 |
+
unique_links.add(src)
|
| 77 |
+
except Exception as e:
|
| 78 |
+
print(f"Image error: {e}")
|
| 79 |
+
|
| 80 |
+
return {
|
| 81 |
+
"platform": "allegro",
|
| 82 |
+
"url": url,
|
| 83 |
+
"title": title_str,
|
| 84 |
+
"description": description_content,
|
| 85 |
+
"parameters": parameter_list,
|
| 86 |
+
"image_urls": list(unique_links)
|
| 87 |
+
}
|
| 88 |
+
|
| 89 |
+
finally:
|
| 90 |
+
driver.quit()
|
| 91 |
+
|
| 92 |
+
if __name__ == "__main__":
|
| 93 |
+
url = input("Allegro URL: ")
|
| 94 |
+
result = scrape_allegro_offer(url)
|
| 95 |
+
print(result)
|
code/web_scraper_ebay.py
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# scrape_ebay_offer.py
|
| 2 |
+
import undetected_chromedriver as uc
|
| 3 |
+
from selenium.webdriver.common.by import By
|
| 4 |
+
import time
|
| 5 |
+
import requests
|
| 6 |
+
|
| 7 |
+
def scrape_ebay_offer(url: str):
|
| 8 |
+
"""Zwraca dane aukcji bez zapisywania na dysk"""
|
| 9 |
+
print(f"🔍 eBay: {url}")
|
| 10 |
+
options = uc.ChromeOptions()
|
| 11 |
+
options.add_argument("--window-position=-3000,0")
|
| 12 |
+
driver = uc.Chrome(use_subprocess=True, options=options)
|
| 13 |
+
|
| 14 |
+
try:
|
| 15 |
+
driver.get(url)
|
| 16 |
+
time.sleep(4)
|
| 17 |
+
|
| 18 |
+
# TITLE
|
| 19 |
+
try:
|
| 20 |
+
title_element = driver.find_element(By.CSS_SELECTOR, "h1.x-item-title__mainTitle")
|
| 21 |
+
title_str = title_element.text.strip()
|
| 22 |
+
except:
|
| 23 |
+
title_str = "untitled_ebay"
|
| 24 |
+
|
| 25 |
+
# PARAMETERS
|
| 26 |
+
parameter_list = []
|
| 27 |
+
try:
|
| 28 |
+
rows = driver.find_elements(By.CSS_SELECTOR, ".ux-labels-values")
|
| 29 |
+
for row in rows:
|
| 30 |
+
try:
|
| 31 |
+
label = row.find_element(By.CSS_SELECTOR, ".ux-labels-values__labels").text.strip()
|
| 32 |
+
value = row.find_element(By.CSS_SELECTOR, ".ux-labels-values__values").text.strip()
|
| 33 |
+
if label and value:
|
| 34 |
+
parameter_list.append(f"{label}: {value}")
|
| 35 |
+
except:
|
| 36 |
+
continue
|
| 37 |
+
except:
|
| 38 |
+
pass
|
| 39 |
+
|
| 40 |
+
# DESCRIPTION
|
| 41 |
+
description_content = "No description"
|
| 42 |
+
try:
|
| 43 |
+
frame = driver.find_element(By.ID, "desc_ifr")
|
| 44 |
+
driver.switch_to.frame(frame)
|
| 45 |
+
description_content = driver.find_element(By.TAG_NAME, "body").text.strip()
|
| 46 |
+
driver.switch_to.default_content()
|
| 47 |
+
except:
|
| 48 |
+
pass
|
| 49 |
+
|
| 50 |
+
# IMAGES
|
| 51 |
+
unique_links = set()
|
| 52 |
+
try:
|
| 53 |
+
thumbnails = driver.find_elements(By.CSS_SELECTOR, ".ux-image-grid-item img")
|
| 54 |
+
for img in thumbnails:
|
| 55 |
+
src = img.get_attribute("src") or img.get_attribute("data-src")
|
| 56 |
+
if src and "ebayimg.com" in src:
|
| 57 |
+
# Zamień na HD
|
| 58 |
+
hd_link = src.replace("/s-l64/", "/s-l1600").replace("/s-l140/", "/s-l1600")
|
| 59 |
+
unique_links.add(hd_link)
|
| 60 |
+
except:
|
| 61 |
+
pass
|
| 62 |
+
|
| 63 |
+
return {
|
| 64 |
+
"platform": "ebay",
|
| 65 |
+
"url": url,
|
| 66 |
+
"title": title_str,
|
| 67 |
+
"description": description_content,
|
| 68 |
+
"parameters": parameter_list,
|
| 69 |
+
"image_urls": list(unique_links)
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
finally:
|
| 73 |
+
driver.quit()
|
| 74 |
+
|
| 75 |
+
if __name__ == "__main__":
|
| 76 |
+
url = input("eBay URL: ")
|
| 77 |
+
result = scrape_ebay_offer(url)
|
| 78 |
+
print(result)
|
code/web_scraper_olx.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# scrape_olx_offer.py
|
| 2 |
+
import requests
|
| 3 |
+
from bs4 import BeautifulSoup
|
| 4 |
+
|
| 5 |
+
def scrape_olx_offer(url: str):
|
| 6 |
+
"""Zwraca dane aukcji bez zapisywania na dysk"""
|
| 7 |
+
headers = {
|
| 8 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36"
|
| 9 |
+
}
|
| 10 |
+
|
| 11 |
+
print(f"🔍 OLX: {url}")
|
| 12 |
+
response = requests.get(url, headers=headers)
|
| 13 |
+
|
| 14 |
+
if response.status_code != 200:
|
| 15 |
+
raise ValueError(f"OLX error: {response.status_code}")
|
| 16 |
+
|
| 17 |
+
soup = BeautifulSoup(response.content, "html.parser")
|
| 18 |
+
|
| 19 |
+
# TITLE
|
| 20 |
+
title_element = soup.find("h4", class_="css-1au435n")
|
| 21 |
+
title = title_element.get_text().strip() if title_element else "untitled"
|
| 22 |
+
|
| 23 |
+
# DESCRIPTION
|
| 24 |
+
description_element = soup.find("div", class_="css-19duwlz")
|
| 25 |
+
description = description_element.get_text(separator="\n").strip() if description_element else "No description"
|
| 26 |
+
|
| 27 |
+
# PARAMETERS
|
| 28 |
+
parameter_list = []
|
| 29 |
+
parameters_container = soup.find("div", attrs={"data-testid": "ad-parameters-container"})
|
| 30 |
+
if parameters_container:
|
| 31 |
+
params = parameters_container.find_all("p", class_="css-13x8d99")
|
| 32 |
+
for p in params:
|
| 33 |
+
parameter_list.append(p.get_text().strip())
|
| 34 |
+
|
| 35 |
+
# IMAGES
|
| 36 |
+
images = soup.select('img[data-testid^="swiper-image"]')
|
| 37 |
+
unique_links = set()
|
| 38 |
+
for img in images:
|
| 39 |
+
link = img.get("src")
|
| 40 |
+
if link:
|
| 41 |
+
unique_links.add(link)
|
| 42 |
+
|
| 43 |
+
return {
|
| 44 |
+
"platform": "olx",
|
| 45 |
+
"url": url,
|
| 46 |
+
"title": title,
|
| 47 |
+
"description": description,
|
| 48 |
+
"parameters": parameter_list,
|
| 49 |
+
"image_urls": list(unique_links)
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
if __name__ == "__main__":
|
| 53 |
+
url = input("OLX URL: ")
|
| 54 |
+
result = scrape_olx_offer(url)
|
| 55 |
+
print(result)
|
requirements.txt
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
torch
|
| 2 |
+
torchvision
|
| 3 |
+
transformers
|
| 4 |
+
pillow
|
| 5 |
+
numpy
|
| 6 |
+
scikit-learn
|
| 7 |
+
tqdm
|
| 8 |
+
fastapi
|
| 9 |
+
uvicorn
|
| 10 |
+
python-multipart
|
| 11 |
+
undetected_chromedriver
|
| 12 |
+
bs4
|
| 13 |
+
requests
|
| 14 |
+
flask
|
weights/auction_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2dc1ae0fd83c1ebf39b2aad59f554c404398b73b33fc2411c3d2db0dea26b64e
|
| 3 |
+
size 557543075
|
weights/training_history.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"train_loss": [
|
| 3 |
+
0.8840779519081116,
|
| 4 |
+
0.4452889025211334,
|
| 5 |
+
0.24018713772296907,
|
| 6 |
+
0.12335345685482026,
|
| 7 |
+
0.05679535768926144
|
| 8 |
+
],
|
| 9 |
+
"val_loss": [
|
| 10 |
+
0.6094270433698382,
|
| 11 |
+
0.30430711592946735,
|
| 12 |
+
0.15748658563409532,
|
| 13 |
+
0.07173337734171323,
|
| 14 |
+
0.03333232658249991
|
| 15 |
+
],
|
| 16 |
+
"val_accuracy": [
|
| 17 |
+
1.0,
|
| 18 |
+
1.0,
|
| 19 |
+
1.0,
|
| 20 |
+
1.0,
|
| 21 |
+
1.0
|
| 22 |
+
],
|
| 23 |
+
"val_f1": [
|
| 24 |
+
0.0,
|
| 25 |
+
0.0,
|
| 26 |
+
0.0,
|
| 27 |
+
0.0,
|
| 28 |
+
0.0
|
| 29 |
+
]
|
| 30 |
+
}
|