File size: 6,929 Bytes
cf7f643
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
"""
Migration script: transforms api_light_dev/apis/*.py files
to work with the new HF-based LLMClient.
Injects _ask_raw_hf helper, removes OpenAI imports, updates model defaults.
"""
import os
import re

SRC_DIR = r"C:\Users\caris\cursor\DD\api_light_dev\apis"
DST_DIR = r"C:\Users\caris\cursor\DD\api_light_hf\apis"

HF_HELPER = '''

def _ask_raw_hf(messages, model, response_format=None):
    """Compatibility wrapper: routes OpenAI-style messages through HF LLMClient."""
    from src.clients.llm_client import LLMClient
    import json as _json

    client = LLMClient()
    system_prompt = None
    user_text = ""
    images = []
    for msg in messages:
        role = msg.get("role", "")
        c = msg.get("content", "")
        if role == "system":
            if isinstance(c, str):
                system_prompt = c
        elif role == "user":
            if isinstance(c, str):
                user_text = c
            elif isinstance(c, list):
                for part in c:
                    if isinstance(part, dict):
                        if part.get("type") == "text":
                            user_text += part.get("text", "")
                        elif part.get("type") == "image_url":
                            url = part.get("image_url", {}).get("url", "")
                            if url.startswith("data:"):
                                images.append(url.split(",", 1)[1] if "," in url else url)
                            else:
                                images.append(url)

    if response_format is not None and hasattr(response_format, "model_json_schema"):
        result = client.call(
            prompt=user_text,
            schema=response_format,
            model=model,
            system_prompt=system_prompt,
            images=images if images else None,
            temperature=0,
        )
        return _json.dumps(result.model_dump(), ensure_ascii=False)
    else:
        return client.call_raw(
            prompt=user_text,
            model=model,
            system_prompt=system_prompt,
            images=images if images else None,
        )

'''

MODEL_REPLACEMENTS = [
    ('"gpt-4o-2024-08-06"', '"meta-llama/Llama-3.3-70B-Instruct"'),
    ('"gpt-4o-2024-11-20"', '"meta-llama/Llama-3.3-70B-Instruct"'),
    ('"gpt-4o"', '"meta-llama/Llama-3.3-70B-Instruct"'),
    ('"gpt-4o-mini"', '"meta-llama/Llama-3.1-8B-Instruct"'),
    ('"gemini-2.5-pro"', '"Qwen/Qwen2.5-VL-72B-Instruct"'),
    ('"gemini-2.5-flash-image"', '"black-forest-labs/FLUX.1-dev"'),
    ('"gemini-3-pro-image-preview"', '"black-forest-labs/FLUX.1-dev"'),
    ('"gemini-3-flash-preview"', '"Qwen/Qwen2.5-VL-72B-Instruct"'),
    ('"gemini-1.5-flash-preview-0514"', '"meta-llama/Llama-3.2-11B-Vision-Instruct"'),
    ('"ft:gpt-4o-mini-2024-07-18:dlpo-inc:ecinfo-extractor:Cg036C3l"', '"meta-llama/Llama-3.1-8B-Instruct"'),
    ('model="gpt-4o"', 'model="meta-llama/Llama-3.3-70B-Instruct"'),
    ('model="gpt-4o-mini"', 'model="meta-llama/Llama-3.1-8B-Instruct"'),
    ('model="gpt-4o-2024-08-06"', 'model="meta-llama/Llama-3.3-70B-Instruct"'),
    ('model="gemini-3-flash-preview"', 'model="Qwen/Qwen2.5-VL-72B-Instruct"'),
    ('selected_model = model if model else "gpt-4o"', 'selected_model = model if model else "meta-llama/Llama-3.3-70B-Instruct"'),
]


def find_last_import_line(lines):
    last = -1
    for i, line in enumerate(lines):
        if line.startswith("import ") or line.startswith("from "):
            last = i
    return last


def transform_file(content):
    changed = False

    # --- 1. Remove Gradio imports/request params ---
    new = re.sub(r'import gradio as gr\r?\n', '', content)
    new = re.sub(r'from src\.utils\.tracer import \*', 'from src.utils.tracer import customtracer', new)
    new = re.sub(r',?\s*request:\s*gr\.Request', '', new)
    new = re.sub(r'request:\s*gr\.Request,?\s*', '', new)

    # --- 2. Model replacements ---
    for old, rep in MODEL_REPLACEMENTS:
        new = new.replace(old, rep)

    # --- 3. OpenAI → LLMClient ---
    if "from openai import OpenAI" in new or "OpenAI()" in new:
        # Remove openai imports
        new = re.sub(r'import openai\r?\n', '', new)
        new = re.sub(r'from openai import OpenAI\r?\n', '', new)

        # Add LLMClient import if not present
        if "from src.clients" not in new:
            new = re.sub(r'(import os\r?\n)', r'\1from src.clients.llm_client import LLMClient\n', new)

        # Inject helper after last import (only once)
        if "_ask_raw_hf" not in new:
            lines = new.split("\n")
            idx = find_last_import_line(lines)
            if idx >= 0:
                before = "\n".join(lines[:idx+1])
                after = "\n".join(lines[idx+1:])
                new = before + HF_HELPER + after
            else:
                new = HF_HELPER + new

        # Replace ask_raw body (simple def pattern)
        new = re.sub(
            r'def ask_raw\(messages,\s*m\s*\):\s*\n'
            r'(?:\s+[^\n]+\n)*?'
            r'\s+return response\.choices\[0\]\.message\.content',
            'def ask_raw(messages, m):\n    return _ask_raw_hf(messages, m)',
            new,
            flags=re.MULTILINE
        )

        # Replace ask_raw with explicit response_format parameter
        # pattern: def ask_raw(messages, m): ... response_format=SCHEMA ... return ...
        new = re.sub(
            r'def ask_raw\(messages,\s*m\s*\):\s*\n'
            r'(?:\s+[^\n]+\n)*?',
            lambda m: m.group(0),  # preserve – handled by generic above
            new,
            flags=re.MULTILINE
        )

        # Replace any remaining client = OpenAI() 
        new = re.sub(r'client = OpenAI\([^)]*\)', 'client = LLMClient()', new)

        # Replace client.beta.chat.completions.parse calls remaining in function bodies
        # These are in nayose_fv, ecinfo2winningrate, etc.
        new = re.sub(
            r'response = client\.beta\.chat\.completions\.parse\(',
            'response = _ask_raw_hf([{"role":"user","content":p}], model,',
            new
        )
        # Clean up leftover response.choices[0] patterns
        new = re.sub(
            r'response\.choices\[0\]\.message\.(?:content|parsed)',
            'response',
            new
        )

    if new != content:
        changed = True
    return new, changed


def main():
    files = [f for f in os.listdir(DST_DIR) if f.endswith(".py") and not f.startswith("__")]
    updated = 0
    for fname in sorted(files):
        path = os.path.join(DST_DIR, fname)
        with open(path, "r", encoding="utf-8", errors="replace") as f:
            content = f.read()
        new_content, changed = transform_file(content)
        if changed:
            with open(path, "w", encoding="utf-8") as f:
                f.write(new_content)
            updated += 1
    print(f"Updated {updated}/{len(files)} files")


if __name__ == "__main__":
    main()