Spaces:

hatamo
/

Antique_Auth_API

Running

App Files Files Community

hatamo commited on 3 days ago

Commit

422c1f3

0 Parent(s):

Fresh repo

Browse files

Files changed (10) hide show

Dockerfile +36 -0
README.md +11 -0
app.py +34 -0
code/app.py +216 -0
code/config.py +34 -0
code/model.py +158 -0
code/web_scraper_allegro.py +142 -0
code/web_scraper_ebay.py +93 -0
code/web_scraper_olx.py +55 -0
requirements.txt +17 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,36 @@

+FROM python:3.10
+WORKDIR /app
+# Install system dependencies for Chrome
+RUN apt-get update && apt-get install -y \
+    wget \
+    curl \
+    unzip \
+    && rm -rf /var/lib/apt/lists/*
+# Add Google Chrome repository and install Chrome (modern approach without apt-key)
+RUN curl https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - 2>/dev/null || \
+    (mkdir -p /etc/apt/keyrings && \
+    curl -fsSL https://dl-ssl.google.com/linux/linux_signing_key.pub | gpg --dearmor -o /etc/apt/keyrings/google-chrome.gpg && \
+    echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/google-chrome.gpg] http://dl.google.com/linux/chrome/deb/ stable main" > /etc/apt/sources.list.d/google-chrome.list) && \
+    apt-get update && apt-get install -y google-chrome-stable && \
+    rm -rf /var/lib/apt/lists/*
+# Copy requirements first for better caching
+COPY requirements.txt .
+# Install dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy the entire project
+COPY . .
+# Expose port 7860 (Hugging Face Spaces default)
+EXPOSE 7860
+# Set environment variables
+ENV PYTHONUNBUFFERED=1
+# Run the app
+CMD ["python", "app.py"]

README.md ADDED Viewed

	@@ -0,0 +1,11 @@

+---
+title: Antique Auth API
+emoji: 🏆
+colorFrom: red
+colorTo: green
+sdk: docker
+pinned: false
+short_description: Api used for my team project classes
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,34 @@

+# app.py - Main entry point for Hugging Face Spaces
+import os
+import sys
+import importlib.util
+# Load the FastAPI app from code/app.py via importlib to avoid
+# conflicts with the standard-library module named `code`.
+HERE = os.path.dirname(__file__)
+CODE_DIR = os.path.join(HERE, "code")
+app_path = os.path.join(CODE_DIR, "app.py")
+# Ensure the `code/` directory is on sys.path so relative imports like
+# `from model import ...` inside `code/app.py` resolve correctly.
+if CODE_DIR not in sys.path:
+    sys.path.insert(0, CODE_DIR)
+spec = importlib.util.spec_from_file_location("antique_auth_code_app", app_path)
+module = importlib.util.module_from_spec(spec)
+spec.loader.exec_module(module)
+# Optionally: remove CODE_DIR from sys.path after loading to avoid side effects
+try:
+    # remove the first occurrence we added
+    if sys.path[0] == CODE_DIR:
+        sys.path.pop(0)
+except Exception:
+    pass
+# The FastAPI `app` object expected inside code/app.py
+app = getattr(module, "app")
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=7860)

code/app.py ADDED Viewed

	@@ -0,0 +1,216 @@

+from fastapi import FastAPI, UploadFile, Form, File
+from fastapi.responses import JSONResponse
+from fastapi.middleware.cors import CORSMiddleware
+import torch
+from PIL import Image
+import io
+from model import AuctionAuthenticityModel
+from config import (
+    AUTHENTICITY_CLASSES,
+    CATEGORIES,
+    UNCERTAINTY_CONFIDENCE_THRESHOLD,
+    UNCERTAINTY_MARGIN_THRESHOLD,
+    UNCERTAIN_CATEGORY,
+)
+from torchvision import transforms
+import os
+import numpy as np
+from huggingface_hub import hf_hub_download
+app = FastAPI(
+    title="Antique Auction Authenticity API",
+    description="AI model for antique auction authenticity evaluation",
+    version="1.0.0",
+)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+DEVICE = torch.device("cpu")
+MODEL_REPO_ID = os.getenv("MODEL_REPO_ID", "hatamo/auction-authenticity-model")
+MODEL_FILENAME = "auction_model.pt"  # whatever you pushed
+authenticity_model = None
+transform = transforms.Compose(
+    [
+        transforms.Resize((224, 224)),
+        transforms.ToTensor(),
+        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+    ]
+)
+@app.on_event("startup")
+async def load_model():
+    global authenticity_model
+    print("🚀 Loading model...")
+    # download from HF Hub to /root/.cache/huggingface/hub/...
+    local_model_path = hf_hub_download(
+        repo_id=MODEL_REPO_ID,
+        filename=MODEL_FILENAME,
+    )
+    authenticity_model = AuctionAuthenticityModel(device=DEVICE).to(DEVICE)
+    state_dict = torch.load(local_model_path, map_location=DEVICE)
+    authenticity_model.load_state_dict(state_dict)
+    authenticity_model.eval()
+    print("✓ Model ready")
+def predict_single(img_tensor, text):
+    with torch.no_grad():
+        outputs = authenticity_model(img_tensor, [text])
+        auth_probs = outputs["auth_probs"][0].cpu().numpy()
+        cat_probs = outputs["cat_probs"][0].cpu().numpy()
+    return auth_probs, cat_probs
+def build_verdict(probs, labels):
+    probs_dict = {labels[i]: float(probs[i]) for i in range(len(labels))}
+    best_label = max(probs_dict, key=probs_dict.get)
+    best_prob = probs_dict[best_label]
+    sorted_probs = sorted(probs_dict.values(), reverse=True)
+    margin = sorted_probs[0] - sorted_probs[1]
+    uncertain = (
+        best_prob < UNCERTAINTY_CONFIDENCE_THRESHOLD
+        or margin < UNCERTAINTY_MARGIN_THRESHOLD
+    )
+    return probs_dict, best_label, best_prob, margin, uncertain
+@app.post("/validate_url")
+async def validate_url(url: str = Form(...), max_images: int = Form(3)):
+    try:
+        from io import BytesIO
+        import requests
+        max_images = max(1, min(max_images, 10))
+        if "allegro.pl" in url:
+            from web_scraper_allegro import scrape_allegro_offer
+            auction = scrape_allegro_offer(url)
+        elif "olx.pl" in url:
+            from web_scraper_olx import scrape_olx_offer
+            auction = scrape_olx_offer(url)
+        elif "ebay." in url:
+            from web_scraper_ebay import scrape_ebay_offer
+            auction = scrape_ebay_offer(url)
+        else:
+            return JSONResponse({"error": "Unsupported platform"}, status_code=400)
+        if not auction.get("image_urls"):
+            return JSONResponse({"error": "No images"}, status_code=400)
+        images_to_use = min(max_images, len(auction["image_urls"]))
+        auth_probs_list = []
+        cat_probs_list = []
+        text = auction["title"] + " " + auction.get("description", "")
+        for img_url in auction["image_urls"][:images_to_use]:
+            img_resp = requests.get(img_url, timeout=15)
+            img_resp.raise_for_status()
+            img = Image.open(BytesIO(img_resp.content)).convert("RGB")
+            img_tensor = transform(img).unsqueeze(0).to(DEVICE)
+            auth_probs, cat_probs = predict_single(img_tensor, text)
+            auth_probs_list.append(auth_probs)
+            cat_probs_list.append(cat_probs)
+        avg_auth_probs = np.mean(auth_probs_list, axis=0)
+        avg_cat_probs = np.mean(cat_probs_list, axis=0)
+        auth_dict, best_auth, best_auth_prob, auth_margin, auth_uncertain = build_verdict(
+            avg_auth_probs, AUTHENTICITY_CLASSES
+        )
+        cat_dict, best_cat, best_cat_prob, cat_margin, cat_uncertain = build_verdict(
+            avg_cat_probs, CATEGORIES
+        )
+        auth_verdict = "UNCERTAIN" if auth_uncertain else best_auth
+        category_verdict = UNCERTAIN_CATEGORY if cat_uncertain else best_cat
+        return JSONResponse(
+            {
+                "status": "success",
+                "evaluation": {
+                    "title": auction["title"],
+                    "image_urls": auction["image_urls"][:images_to_use],
+                    "price": auction["price"],
+                    "category": None
+                    if category_verdict == UNCERTAIN_CATEGORY
+                    else category_verdict,
+                    "evaluation_status": auth_verdict,
+                    "confidence": round(best_auth_prob, 3),
+                },
+                "details": {
+                    "url": url,
+                    "platform": auction["platform"],
+                    "image_count_used": images_to_use,
+                    "authenticity": {
+                        "verdict": auth_verdict,
+                        "confidence": round(best_auth_prob, 3),
+                        "margin": round(auth_margin, 3),
+                        "probabilities": {
+                            k: round(v, 3) for k, v in auth_dict.items()
+                        },
+                    },
+                    "category": {
+                        "verdict": category_verdict,
+                        "label": best_cat,
+                        "confidence": round(best_cat_prob, 3),
+                        "margin": round(cat_margin, 3),
+                        "probabilities": {
+                            k: round(v, 3) for k, v in cat_dict.items()
+                        },
+                    },
+                },
+            }
+        )
+    except Exception as e:
+        import traceback
+        return JSONResponse(
+            {"status": "error", "error": str(e), "traceback": traceback.format_exc()},
+            status_code=500,
+        )
+@app.get("/health")
+def health():
+    return {"status": "ok", "message": "API running"}
+@app.get("/")
+def root():
+    return {
+        "name": "Antique Auction Authenticity API",
+        "version": "1.0.0",
+        "endpoints": {"POST /predict": "Evaluate auction", "GET /health": "Health check"},
+    }
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=7860)

code/config.py ADDED Viewed

	@@ -0,0 +1,34 @@

+# config.py
+"""
+Configuration for authenticity classes and categories
+This allows elastic modification without code changes
+"""
+# Authenticity Status (main model - requires retraining if changed)
+AUTHENTICITY_CLASSES = {
+    0: "ORIGINAL",
+    1: "SCAM",
+    2: "REPLICA"
+}
+AUTHENTICITY_TO_ID = {v: k for k, v in AUTHENTICITY_CLASSES.items()}
+# Category/Type (separate classifier - can be extended without retraining main model)
+# 5 specific categories + UNCERTAIN added automatically when confidence is low
+# Add/remove categories here as needed (edit these 5, UNCERTAIN is automatic)
+CATEGORIES = {
+    0: "Clocks",
+    1: "Furniture",
+    2: "Numismatics",
+    3: "Sabers",
+    4: "Tableware"
+}
+CATEGORY_TO_ID = {v: k for k, v in CATEGORIES.items()}
+# Special uncertainty category (added automatically, not in model output)
+UNCERTAIN_CATEGORY = "Uncertain"
+# Uncertainty thresholds
+UNCERTAINTY_CONFIDENCE_THRESHOLD = 0.6
+UNCERTAINTY_MARGIN_THRESHOLD = 0.15

code/model.py ADDED Viewed

	@@ -0,0 +1,158 @@

+# model.py
+import torch
+import torch.nn as nn
+from transformers import DistilBertTokenizer, DistilBertModel
+from torchvision.models import efficientnet_b0
+from config import AUTHENTICITY_CLASSES, CATEGORIES
+class AuctionAuthenticityModel(nn.Module):
+    def __init__(self, num_classes=None, device='cpu'):
+        # If num_classes not specified, use config
+        if num_classes is None:
+            num_classes = len(AUTHENTICITY_CLASSES)
+        # Category classes (separate head)
+        num_categories = len(CATEGORIES)
+        super().__init__()
+        self.device = device
+        # Vision
+        self.vision_model = efficientnet_b0(pretrained=True)
+        self.vision_model.classifier = nn.Identity()
+        vision_out_dim = 1280
+        # Text
+        self.text_model = DistilBertModel.from_pretrained(
+            'distilbert-base-multilingual-cased'
+        )
+        text_out_dim = 768
+        self.tokenizer = DistilBertTokenizer.from_pretrained(
+            'distilbert-base-multilingual-cased'
+        )
+        # Fusion encoder (shared) -> then two heads (authenticity + category)
+        hidden_dim = 256
+        self.fusion_encoder = nn.Sequential(
+            nn.Linear(vision_out_dim + text_out_dim, hidden_dim),
+            nn.ReLU(),
+            nn.Dropout(0.3),
+            nn.Linear(hidden_dim, 128),
+            nn.ReLU(),
+            nn.Dropout(0.2),
+        )
+        # Heads
+        self.auth_head = nn.Linear(128, num_classes)
+        self.cat_head = nn.Linear(128, num_categories)
+        # store sizes for reference
+        self.num_classes = num_classes
+        self.num_categories = num_categories
+    def forward(self, images, texts):
+        vision_features = self.vision_model(images)
+        tokens = self.tokenizer(
+            texts, padding=True, truncation=True, max_length=512, return_tensors='pt'
+        ).to(self.device)
+        text_outputs = self.text_model(**tokens)
+        text_features = text_outputs.last_hidden_state[:, 0, :]
+        combined = torch.cat([vision_features, text_features], dim=1)
+        shared = self.fusion_encoder(combined)
+        auth_logits = self.auth_head(shared)
+        cat_logits = self.cat_head(shared)
+        # probabilities
+        auth_probs = torch.softmax(auth_logits, dim=1)
+        cat_probs = torch.softmax(cat_logits, dim=1)
+        return {
+            'auth_logits': auth_logits,
+            'auth_probs': auth_probs,
+            'cat_logits': cat_logits,
+            'cat_probs': cat_probs,
+        }
+    def compute_loss(self, outputs, auth_labels=None, cat_labels=None, auth_weight=1.0, cat_weight=1.0):
+        """Compute combined loss for two heads. Labels should be LongTensors on same device.
+        Returns combined scalar loss and a dict with individual losses.
+        """
+        losses = {}
+        loss = 0.0
+        criterion = nn.CrossEntropyLoss()
+        if auth_labels is not None:
+            l_auth = criterion(outputs['auth_logits'], auth_labels)
+            losses['auth_loss'] = l_auth
+            loss = loss + auth_weight * l_auth
+        if cat_labels is not None:
+            # Allow sentinel -1 for unknown/uncertain categories and ignore them
+            if cat_labels.dim() == 1:
+                mask = cat_labels >= 0
+            else:
+                mask = (cat_labels.squeeze(-1) >= 0)
+            if mask.sum().item() > 0:
+                selected_logits = outputs['cat_logits'][mask]
+                selected_labels = cat_labels[mask]
+                l_cat = criterion(selected_logits, selected_labels)
+                losses['cat_loss'] = l_cat
+                loss = loss + cat_weight * l_cat
+            else:
+                # No valid category labels in batch
+                losses['cat_loss'] = torch.tensor(0.0, device=self.device)
+        return loss, losses
+    def count_parameters(self):
+        return sum(p.numel() for p in self.parameters() if p.requires_grad)
+if __name__ == '__main__':
+    print("Testowanie modelu...")
+    device = torch.device('cpu')
+    model = AuctionAuthenticityModel(device=device).to(device)
+    print(f"✓ Model stworzony")
+    print(f"  - Parametrów: {model.count_parameters():,}")
+    # Dummy test
+    dummy_img = torch.randn(2, 3, 224, 224).to(device)
+    dummy_texts = ["Silver spoon antique", "Polish silverware 19th century"]
+    with torch.no_grad():
+        output = model(dummy_img, dummy_texts)
+    # Print shapes
+    print("✓ Forward pass:")
+    print(f"  - auth_logits: {output['auth_logits'].shape}")
+    print(f"  - auth_probs: {output['auth_probs'].shape}")
+    print(f"  - cat_logits: {output['cat_logits'].shape}")
+    print(f"  - cat_probs: {output['cat_probs'].shape}")
+    # Show predicted labels and top probabilities
+    auth_pred = torch.argmax(output['auth_probs'], dim=1)
+    cat_pred = torch.argmax(output['cat_probs'], dim=1)
+    for i in range(output['auth_probs'].shape[0]):
+        a_idx = int(auth_pred[i].item())
+        a_prob = float(output['auth_probs'][i, a_idx].item())
+        c_idx = int(cat_pred[i].item())
+        c_prob = float(output['cat_probs'][i, c_idx].item())
+        a_name = AUTHENTICITY_CLASSES.get(a_idx, str(a_idx))
+        c_name = CATEGORIES.get(c_idx, str(c_idx))
+        print(f"\nSample {i}:")
+        print(f"  - Authenticity: {a_name} ({a_prob:.3f})")
+        print(f"  - Category: {c_name} ({c_prob:.3f})")
+    # Estimate model size
+    print(f"\n📊 Rozmiar modelu:")
+    torch.save(model.state_dict(), 'temp_model.pt')
+    import os
+    size_mb = os.path.getsize('temp_model.pt') / (1024*1024)
+    print(f"  - {size_mb:.1f} MB")
+    os.remove('temp_model.pt')

code/web_scraper_allegro.py ADDED Viewed

	@@ -0,0 +1,142 @@

+from apify_client import ApifyClient
+import os
+import re
+def sanitize_folder_name(text):
+    """Helper function to sanitize folder names"""
+    polish_chars = {
+        "ą": "a", "ć": "c", "ę": "e", "ł": "l", "ń": "n",
+        "ó": "o", "ś": "s", "ź": "z", "ż": "z"
+    }
+    text = text.lower()
+    result = ""
+    for char in text:
+        if char in polish_chars:
+            result += polish_chars[char]
+        elif char.isalnum():
+            result += char
+        else:
+            result += "_"
+    while "__" in result:
+        result = result.replace("__", "_")
+    return result.strip("_")
+def extract_price(price_str):
+    """Extract numeric price from various formats"""
+    if not price_str:
+        return None
+    match = re.search(r'(\d+[.,]\d{2}|\d+)', str(price_str))
+    if match:
+        return match.group(1).replace(',', '.')
+    return price_str
+def extract_images_from_apify(item_data):
+    """Extract and normalize image URLs from Apify response"""
+    unique_links = set()
+    allowed_sizes = ["/s128/", "/s360/", "/s512/", "/s720/", "/s1024/", "/s1440/", "/original/"]
+    image_sources = []
+    if 'images' in item_data and item_data['images']:
+        if isinstance(item_data['images'], list):
+            image_sources.extend(item_data['images'])
+        else:
+            image_sources.append(item_data['images'])
+    if 'image' in item_data and item_data['image']:
+        image_sources.append(item_data['image'])
+    if 'imageUrl' in item_data and item_data['imageUrl']:
+        image_sources.append(item_data['imageUrl'])
+    for img_url in image_sources:
+        if img_url and isinstance(img_url, str):
+            if "allegroimg.com" in img_url or "img" in img_url:
+                for size in allowed_sizes:
+                    img_url = img_url.replace(size, "/original/")
+                unique_links.add(img_url)
+    return list(unique_links)
+def scrape_allegro_offer(url: str):
+    """Scrape single Allegro product using Apify E-commerce Tool"""
+    api_token = os.getenv('APIFY_API_TOKEN')
+    if not api_token:
+        raise ValueError("APIFY_API_TOKEN environment variable not set")
+    client = ApifyClient(api_token)
+    # Correct input format for E-commerce Scraping Tool
+    run_input = {
+        "startUrls": [
+            url
+        ]
+    }
+    print(f"🔍 Scraping: {url}")
+    try:
+        actor_call = client.actor("e-commerce/allegro-product-detail-scraper").call(
+            run_input=run_input
+        )
+        dataset_client = client.dataset(actor_call['defaultDatasetId'])
+        items = list(dataset_client.iterate_items())
+        if not items:
+            print("⚠️  No data returned from Apify")
+            return {
+                "platform": "allegro",
+                "url": url,
+                "title": "untitled",
+                "description": "No description",
+                "price": None,
+                "image_urls": []
+            }
+        item = items[0]
+        print(f"✅ Success! Found: {item.get('productTitle', 'untitled')}")
+        image_urls = extract_images_from_apify(item)
+        if not image_urls:
+            thumbnail = item.get("thumbnail")
+            if thumbnail:
+                image_urls = [thumbnail]
+        return {
+            "platform": "allegro",
+            "url": item.get('url', url),
+            "title": item.get('productTitle', 'untitled').strip(),
+            "description": item.get('description', 'No description'),
+            "price": extract_price(item.get('price', item.get('currentPrice'))),
+            "image_urls": image_urls
+        }
+    except Exception as e:
+        print(f"❌ Error: {e}")
+        return {
+            "platform": "allegro",
+            "url": url,
+            "title": "error",
+            "description": str(e),
+            "price": None,
+            "image_urls": []
+        }
+# Example usage
+if __name__ == "__main__":
+    url = input("Allegro URL: ")
+    result = scrape_allegro_offer(url)
+    print("\n✅ Scraping result:")
+    print(f"Title: {result['title']}")
+    print(f"Price: {result['price']}")
+    print(f"Description: {result['description'][:100]}..." if len(result['description']) > 100 else f"Description: {result['description']}")
+    print(f"Images: {len(result['image_urls'])} found")
+    for img in result['image_urls'][:3]:
+        print(f"  - {img}")

code/web_scraper_ebay.py ADDED Viewed

	@@ -0,0 +1,93 @@

+# scrape_ebay_offer.py
+import undetected_chromedriver as uc
+from selenium.webdriver.common.by import By
+from webdriver_manager.chrome import ChromeDriverManager
+from selenium.webdriver.chrome.service import Service
+import time
+import requests
+import os
+def scrape_ebay_offer(url: str):
+    """Zwraca dane aukcji bez zapisywania na dysk"""
+    print(f"🔍 eBay: {url}")
+    options = uc.ChromeOptions()
+    options.add_argument("--window-position=-3000,0")
+    options.add_argument("--headless")
+    options.add_argument("--no-sandbox")
+    options.add_argument("--disable-dev-shm-usage")
+    # Ustawienie binarki Chrome'a
+    if os.path.exists('/usr/bin/google-chrome'):
+        options.binary_location = '/usr/bin/google-chrome'
+    driver = uc.Chrome(
+        service=Service(ChromeDriverManager().install()),
+        options=options,
+        use_subprocess=True
+    )
+    try:
+        driver.get(url)
+        time.sleep(4)
+        # TITLE
+        try:
+            title_element = driver.find_element(By.CSS_SELECTOR, "h1.x-item-title__mainTitle")
+            title_str = title_element.text.strip()
+        except:
+            title_str = "untitled_ebay"
+        # PARAMETERS
+        parameter_list = []
+        try:
+            rows = driver.find_elements(By.CSS_SELECTOR, ".ux-labels-values")
+            for row in rows:
+                try:
+                    label = row.find_element(By.CSS_SELECTOR, ".ux-labels-values__labels").text.strip()
+                    value = row.find_element(By.CSS_SELECTOR, ".ux-labels-values__values").text.strip()
+                    if label and value:
+                        parameter_list.append(f"{label}: {value}")
+                except:
+                    continue
+        except:
+            pass
+        # DESCRIPTION
+        description_content = "No description"
+        try:
+            frame = driver.find_element(By.ID, "desc_ifr")
+            driver.switch_to.frame(frame)
+            description_content = driver.find_element(By.TAG_NAME, "body").text.strip()
+            driver.switch_to.default_content()
+        except:
+            pass
+        # IMAGES
+        unique_links = set()
+        try:
+            thumbnails = driver.find_elements(By.CSS_SELECTOR, ".ux-image-grid-item img")
+            for img in thumbnails:
+                src = img.get_attribute("src") or img.get_attribute("data-src")
+                if src and "ebayimg.com" in src:
+                    # Zamień na HD
+                    hd_link = src.replace("/s-l64/", "/s-l1600").replace("/s-l140/", "/s-l1600")
+                    unique_links.add(hd_link)
+        except:
+            pass
+        return {
+            "platform": "ebay",
+            "url": url,
+            "title": title_str,
+            "description": description_content,
+            "parameters": parameter_list,
+            "image_urls": list(unique_links)
+        }
+    finally:
+        driver.quit()
+if __name__ == "__main__":
+    url = input("eBay URL: ")
+    result = scrape_ebay_offer(url)
+    print(result)

code/web_scraper_olx.py ADDED Viewed

	@@ -0,0 +1,55 @@

+# scrape_olx_offer.py
+import requests
+from bs4 import BeautifulSoup
+def scrape_olx_offer(url: str):
+    """Zwraca dane aukcji bez zapisywania na dysk"""
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36"
+    }
+    print(f"🔍 OLX: {url}")
+    response = requests.get(url, headers=headers)
+    if response.status_code != 200:
+        raise ValueError(f"OLX error: {response.status_code}")
+    soup = BeautifulSoup(response.content, "html.parser")
+    # TITLE
+    title_element = soup.find("h4", class_="css-1au435n")
+    title = title_element.get_text().strip() if title_element else "untitled"
+    # DESCRIPTION
+    description_element = soup.find("div", class_="css-19duwlz")
+    description = description_element.get_text(separator="\n").strip() if description_element else "No description"
+    # PARAMETERS
+    parameter_list = []
+    parameters_container = soup.find("div", attrs={"data-testid": "ad-parameters-container"})
+    if parameters_container:
+        params = parameters_container.find_all("p", class_="css-13x8d99")
+        for p in params:
+            parameter_list.append(p.get_text().strip())
+    # IMAGES
+    images = soup.select('img[data-testid^="swiper-image"]')
+    unique_links = set()
+    for img in images:
+        link = img.get("src")
+        if link:
+            unique_links.add(link)
+    return {
+        "platform": "olx",
+        "url": url,
+        "title": title,
+        "description": description,
+        "parameters": parameter_list,
+        "image_urls": list(unique_links)
+    }
+if __name__ == "__main__":
+    url = input("OLX URL: ")
+    result = scrape_olx_offer(url)
+    print(result)

requirements.txt ADDED Viewed

	@@ -0,0 +1,17 @@

+torch
+torchvision
+transformers
+pillow
+numpy
+scikit-learn
+tqdm
+fastapi
+uvicorn
+python-multipart
+undetected_chromedriver
+webdriver-manager
+bs4
+requests
+flask
+selenium>=4.0
+huggingface_hub