File size: 3,708 Bytes
f52234e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
"""
AIFinder Configuration
Dataset registry, label mappings, and feature parameters.
"""

import os

# --- Paths ---
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
MODEL_DIR = os.path.join(BASE_DIR, "models")

# --- Dataset Registry ---
# Each entry: (hf_dataset_id, provider, model_name, optional_kwargs)
# optional_kwargs: subset name, split, etc.
DATASET_REGISTRY = [
    # Anthropic
    ("TeichAI/claude-4.5-opus-high-reasoning-250x", "Anthropic", "Claude 4.5 Opus", {}),
    ("TeichAI/claude-sonnet-4.5-high-reasoning-250x", "Anthropic", "Claude Sonnet 4.5", {}),
    ("Roman1111111/claude-opus-4.6-10000x", "Anthropic", "Claude Opus 4.6", {"max_samples": 1500}),

    # OpenAI
    ("TeichAI/gpt-5.2-high-reasoning-250x", "OpenAI", "GPT-5.2", {}),
    ("TeichAI/gpt-5.1-high-reasoning-1000x", "OpenAI", "GPT-5.1", {}),
    ("TeichAI/gpt-5.1-codex-max-1000x", "OpenAI", "GPT-5.1 Codex Max", {}),
    ("TeichAI/gpt-5-codex-250x", "OpenAI", "GPT-5 Codex", {}),
    ("TeichAI/gpt-5-codex-1000x", "OpenAI", "GPT-5 Codex", {}),

    # Google
    ("TeichAI/gemini-3-pro-preview-high-reasoning-1000x", "Google", "Gemini 3 Pro", {}),
    ("TeichAI/gemini-3-pro-preview-high-reasoning-250x", "Google", "Gemini 3 Pro", {}),
    ("TeichAI/gemini-2.5-flash-11000x", "Google", "Gemini 2.5 Flash", {"max_samples": 1500}),
    ("TeichAI/Gemini-3-Flash-Preview-VIBE", "Google", "Gemini 3 Flash", {}),
    ("TeichAI/gemini-3-flash-preview-1000x", "Google", "Gemini 3 Flash", {}),
    ("TeichAI/gemini-3-flash-preview-complex-1000x", "Google", "Gemini 3 Flash", {}),

    # xAI
    ("TeichAI/brainstorm-v3.1-grok-4-fast-200x", "xAI", "Grok 4 Fast", {}),
    ("TeichAI/sherlock-thinking-alpha-11000x", "xAI", "Grok 4.1 Fast", {"max_samples": 1500}),
    ("TeichAI/sherlock-dash-alpha-1000x", "xAI", "Grok 4.1 Fast", {}),
    ("TeichAI/sherlock-think-alpha-1000x", "xAI", "Grok 4.1 Fast", {}),
    ("TeichAI/grok-code-fast-1-1000x", "xAI", "Grok Code Fast 1", {}),

    # MoonshotAI
    ("TeichAI/kimi-k2-thinking-250x", "MoonshotAI", "Kimi K2", {}),
    ("TeichAI/kimi-k2-thinking-1000x", "MoonshotAI", "Kimi K2", {}),

    # Mistral
    ("TeichAI/mistral-small-creative-500x", "Mistral", "Mistral Small", {}),

    # MiniMax
    ("TeichAI/MiniMax-M2.1-Code-SFT", "MiniMax", "MiniMax M2.1", {}),
    ("TeichAI/convo-v1", "MiniMax", "MiniMax M2.1", {}),

    # StepFun
    ("TeichAI/Step-3.5-Flash-2600x", "StepFun", "Step 3.5 Flash", {"max_samples": 1500}),

    # Zhipu
    ("TeichAI/Pony-Alpha-15k", "Zhipu", "GLM-5", {"max_samples": 1500}),

    # DeepSeek (TeichAI)
    ("TeichAI/deepseek-v3.2-speciale-1000x", "DeepSeek", "DeepSeek V3.2 Speciale", {}),
    ("TeichAI/deepseek-v3.2-speciale-openr1-math-3k", "DeepSeek", "DeepSeek V3.2 Speciale", {"max_samples": 1500}),
]

# DeepSeek (a-m-team) — different format, handled separately
DEEPSEEK_AM_DATASETS = [
    ("a-m-team/AM-DeepSeek-R1-Distilled-1.4M", "DeepSeek", "DeepSeek R1", {"name": "am_0.9M_sample_1k", "max_samples": 1000}),
]

# --- All providers and models ---
PROVIDERS = [
    "Anthropic", "OpenAI", "Google", "xAI", "MoonshotAI",
    "Mistral", "MiniMax", "StepFun", "Zhipu", "DeepSeek"
]

# --- Feature parameters ---
TFIDF_WORD_PARAMS = {
    "analyzer": "word",
    "ngram_range": (1, 2),
    "max_features": 20000,
    "sublinear_tf": True,
    "min_df": 3,
}

TFIDF_CHAR_PARAMS = {
    "analyzer": "char_wb",
    "ngram_range": (3, 5),
    "max_features": 20000,
    "sublinear_tf": True,
    "min_df": 3,
}

# --- Train/test split ---
TEST_SIZE = 0.2
RANDOM_STATE = 42

# --- Neural Network ---
HIDDEN_DIM = 1024
EMBED_DIM = 256
DROPOUT = 0.3
BATCH_SIZE = 2048
EPOCHS = 50
EARLY_STOP_PATIENCE = 8
LEARNING_RATE = 1e-3
WEIGHT_DECAY = 1e-4