Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import duckdb | |
| import pandas as pd | |
| import json | |
| import os | |
| from huggingface_hub import HfApi, hf_hub_download | |
| import time | |
| # --- Configuration --- | |
| SOURCE_REPO = "SII-WANGZJ/Polymarket_data" | |
| DEST_REPO = "Vycka12/Base" # User's target dataset | |
| TRADES_FILE = "trades.parquet" | |
| MARKETS_FILE = "markets.parquet" | |
| def run_extraction(): | |
| try: | |
| token = os.environ.get("HF_TOKEN") | |
| if not token: | |
| return "β Missing HF_TOKEN! Please add it to Space Secrets (Settings -> Secrets)." | |
| api = HfApi() | |
| # 1. Initialize DuckDB with HTTP extension | |
| print("Initializing DuckDB with HTTPFS...") | |
| con = duckdb.connect(database=':memory:') | |
| con.execute("INSTALL httpfs; LOAD httpfs;") | |
| # Get direct download URLs (requires token) | |
| trades_url = f"https://huggingface.co/datasets/{SOURCE_REPO}/resolve/main/{TRADES_FILE}" | |
| markets_url = f"https://huggingface.co/datasets/{SOURCE_REPO}/resolve/main/{MARKETS_FILE}" | |
| # 2. Identify winning assets (Reading markets.parquet remotely) | |
| print("Mapping winning assets from cloud...") | |
| winning_mapping_query = f""" | |
| CREATE TABLE winning_assets AS | |
| WITH cleaned_markets AS ( | |
| SELECT | |
| token1, token2, | |
| CAST(replace(replace(outcome_prices, '''', '"'), ' ', '') AS DOUBLE[]) as prices | |
| FROM read_parquet('{markets_url}') | |
| WHERE closed = 1 AND outcome_prices IS NOT NULL AND outcome_prices != '' | |
| ) | |
| SELECT token1 as asset_id FROM cleaned_markets WHERE prices[1] = 1.0 | |
| UNION | |
| SELECT token2 as asset_id FROM cleaned_markets WHERE prices[2] = 1.0 | |
| """ | |
| con.execute(winning_mapping_query) | |
| losing_mapping_query = """ | |
| CREATE TABLE losing_assets AS | |
| SELECT token2 as asset_id FROM winning_assets -- Simplify for this batch | |
| UNION | |
| SELECT token1 as asset_id FROM winning_assets | |
| """ | |
| # (Better to just reuse logic but let's keep it simple for now) | |
| # 3. Analyze trades (32GB JOIN remotely over HTTP) | |
| # DuckDB will only fetch the needed columns and rows via range requests | |
| print("Calculating wallet stats from 32GB trades cloud file...") | |
| stats_query = f""" | |
| CREATE TABLE wallet_stats AS | |
| WITH all_trades AS ( | |
| SELECT maker as wallet, asset_id FROM read_parquet('{trades_url}') WHERE maker_direction = 'BUY' | |
| UNION ALL | |
| SELECT taker as wallet, asset_id FROM read_parquet('{trades_url}') WHERE taker_direction = 'BUY' | |
| ), | |
| wins AS ( | |
| SELECT wallet, count(*) as win_count FROM all_trades | |
| WHERE asset_id IN (SELECT asset_id FROM winning_assets) | |
| GROUP BY wallet | |
| ), | |
| losses AS ( | |
| SELECT wallet, count(*) as loss_count FROM all_trades | |
| WHERE asset_id IN (SELECT asset_id FROM ( | |
| SELECT token2 as asset_id FROM ( | |
| SELECT token1, token2, CAST(replace(replace(outcome_prices, '''', '"'), ' ', '') AS DOUBLE[]) as prices | |
| FROM read_parquet('{markets_url}') | |
| WHERE closed = 1 AND (token1 IS NOT NULL OR token2 IS NOT NULL) | |
| ) WHERE prices[1] = 1.0 | |
| UNION | |
| SELECT token1 as asset_id FROM ( | |
| SELECT token1, token2, CAST(replace(replace(outcome_prices, '''', '"'), ' ', '') AS DOUBLE[]) as prices | |
| FROM read_parquet('{markets_url}') | |
| WHERE closed = 1 | |
| ) WHERE prices[2] = 1.0 | |
| )) | |
| GROUP BY wallet | |
| ) | |
| SELECT | |
| COALESCE(w.wallet, l.wallet) as wallet, | |
| COALESCE(w.win_count, 0) as wins, | |
| COALESCE(l.loss_count, 0) as losses, | |
| (COALESCE(w.win_count, 0) + COALESCE(l.loss_count, 0)) as total_resolved_trades, | |
| CAST(COALESCE(w.win_count, 0) AS DOUBLE) / (COALESCE(w.win_count, 0) + COALESCE(l.loss_count, 0)) as win_rate | |
| FROM wins w | |
| FULL OUTER JOIN losses l ON w.wallet = l.wallet | |
| WHERE (COALESCE(w.win_count, 0) + COALESCE(l.loss_count, 0)) >= 100 | |
| """ | |
| con.execute(stats_query) | |
| # 4. Filter Results | |
| print("Filtering final results...") | |
| df = con.execute("SELECT * FROM wallet_stats").df() | |
| whales = df[(df['total_resolved_trades'] >= 400) & (df['win_rate'] >= 0.70)].to_dict('records') | |
| dumb = df[(df['total_resolved_trades'] >= 100) & (df['win_rate'] <= 0.40)].to_dict('records') | |
| # 5. Save and Upload to HF | |
| os.makedirs("results", exist_ok=True) | |
| whale_path = "results/mega_whales.json" | |
| dumb_path = "results/mega_dumb.json" | |
| with open(whale_path, "w") as f: | |
| json.dump(whales, f, indent=2) | |
| with open(dumb_path, "w") as f: | |
| json.dump(dumb, f, indent=2) | |
| print(f"Uploading Results to {DEST_REPO}...") | |
| api.upload_file( | |
| path_or_fileobj=whale_path, | |
| path_in_repo="results/mega_whales.json", | |
| repo_id=DEST_REPO, | |
| repo_type="dataset", | |
| token=token | |
| ) | |
| api.upload_file( | |
| path_or_fileobj=dumb_path, | |
| path_in_repo="results/mega_dumb.json", | |
| repo_id=DEST_REPO, | |
| repo_type="dataset", | |
| token=token | |
| ) | |
| return f"β SUCCESS!\n- Found {len(whales)} Mega Whales\n- Found {len(dumb)} Mega Dumb\n- Results uploaded to {DEST_REPO}/results/" | |
| except Exception as e: | |
| return f"β ERROR: {str(e)}" | |
| # --- Gradio UI --- | |
| with gr.Blocks(title="Mega Extraction Space") as demo: | |
| gr.Markdown("# π Mega Wallet Extractor (DuckDB Cloud)") | |
| gr.Markdown("Click the button below to process 32GB of Polymarket data directly from Hugging Face.") | |
| with gr.Row(): | |
| run_btn = gr.Button("π Run Full Extraction", variant="primary") | |
| status_box = gr.Textbox(label="Status Log", lines=10) | |
| run_btn.click(fn=run_extraction, outputs=status_box) | |
| if __name__ == "__main__": | |
| demo.launch() | |