Spaces:
Running
Running
File size: 6,929 Bytes
cf7f643 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 | """
Migration script: transforms api_light_dev/apis/*.py files
to work with the new HF-based LLMClient.
Injects _ask_raw_hf helper, removes OpenAI imports, updates model defaults.
"""
import os
import re
SRC_DIR = r"C:\Users\caris\cursor\DD\api_light_dev\apis"
DST_DIR = r"C:\Users\caris\cursor\DD\api_light_hf\apis"
HF_HELPER = '''
def _ask_raw_hf(messages, model, response_format=None):
"""Compatibility wrapper: routes OpenAI-style messages through HF LLMClient."""
from src.clients.llm_client import LLMClient
import json as _json
client = LLMClient()
system_prompt = None
user_text = ""
images = []
for msg in messages:
role = msg.get("role", "")
c = msg.get("content", "")
if role == "system":
if isinstance(c, str):
system_prompt = c
elif role == "user":
if isinstance(c, str):
user_text = c
elif isinstance(c, list):
for part in c:
if isinstance(part, dict):
if part.get("type") == "text":
user_text += part.get("text", "")
elif part.get("type") == "image_url":
url = part.get("image_url", {}).get("url", "")
if url.startswith("data:"):
images.append(url.split(",", 1)[1] if "," in url else url)
else:
images.append(url)
if response_format is not None and hasattr(response_format, "model_json_schema"):
result = client.call(
prompt=user_text,
schema=response_format,
model=model,
system_prompt=system_prompt,
images=images if images else None,
temperature=0,
)
return _json.dumps(result.model_dump(), ensure_ascii=False)
else:
return client.call_raw(
prompt=user_text,
model=model,
system_prompt=system_prompt,
images=images if images else None,
)
'''
MODEL_REPLACEMENTS = [
('"gpt-4o-2024-08-06"', '"meta-llama/Llama-3.3-70B-Instruct"'),
('"gpt-4o-2024-11-20"', '"meta-llama/Llama-3.3-70B-Instruct"'),
('"gpt-4o"', '"meta-llama/Llama-3.3-70B-Instruct"'),
('"gpt-4o-mini"', '"meta-llama/Llama-3.1-8B-Instruct"'),
('"gemini-2.5-pro"', '"Qwen/Qwen2.5-VL-72B-Instruct"'),
('"gemini-2.5-flash-image"', '"black-forest-labs/FLUX.1-dev"'),
('"gemini-3-pro-image-preview"', '"black-forest-labs/FLUX.1-dev"'),
('"gemini-3-flash-preview"', '"Qwen/Qwen2.5-VL-72B-Instruct"'),
('"gemini-1.5-flash-preview-0514"', '"meta-llama/Llama-3.2-11B-Vision-Instruct"'),
('"ft:gpt-4o-mini-2024-07-18:dlpo-inc:ecinfo-extractor:Cg036C3l"', '"meta-llama/Llama-3.1-8B-Instruct"'),
('model="gpt-4o"', 'model="meta-llama/Llama-3.3-70B-Instruct"'),
('model="gpt-4o-mini"', 'model="meta-llama/Llama-3.1-8B-Instruct"'),
('model="gpt-4o-2024-08-06"', 'model="meta-llama/Llama-3.3-70B-Instruct"'),
('model="gemini-3-flash-preview"', 'model="Qwen/Qwen2.5-VL-72B-Instruct"'),
('selected_model = model if model else "gpt-4o"', 'selected_model = model if model else "meta-llama/Llama-3.3-70B-Instruct"'),
]
def find_last_import_line(lines):
last = -1
for i, line in enumerate(lines):
if line.startswith("import ") or line.startswith("from "):
last = i
return last
def transform_file(content):
changed = False
# --- 1. Remove Gradio imports/request params ---
new = re.sub(r'import gradio as gr\r?\n', '', content)
new = re.sub(r'from src\.utils\.tracer import \*', 'from src.utils.tracer import customtracer', new)
new = re.sub(r',?\s*request:\s*gr\.Request', '', new)
new = re.sub(r'request:\s*gr\.Request,?\s*', '', new)
# --- 2. Model replacements ---
for old, rep in MODEL_REPLACEMENTS:
new = new.replace(old, rep)
# --- 3. OpenAI → LLMClient ---
if "from openai import OpenAI" in new or "OpenAI()" in new:
# Remove openai imports
new = re.sub(r'import openai\r?\n', '', new)
new = re.sub(r'from openai import OpenAI\r?\n', '', new)
# Add LLMClient import if not present
if "from src.clients" not in new:
new = re.sub(r'(import os\r?\n)', r'\1from src.clients.llm_client import LLMClient\n', new)
# Inject helper after last import (only once)
if "_ask_raw_hf" not in new:
lines = new.split("\n")
idx = find_last_import_line(lines)
if idx >= 0:
before = "\n".join(lines[:idx+1])
after = "\n".join(lines[idx+1:])
new = before + HF_HELPER + after
else:
new = HF_HELPER + new
# Replace ask_raw body (simple def pattern)
new = re.sub(
r'def ask_raw\(messages,\s*m\s*\):\s*\n'
r'(?:\s+[^\n]+\n)*?'
r'\s+return response\.choices\[0\]\.message\.content',
'def ask_raw(messages, m):\n return _ask_raw_hf(messages, m)',
new,
flags=re.MULTILINE
)
# Replace ask_raw with explicit response_format parameter
# pattern: def ask_raw(messages, m): ... response_format=SCHEMA ... return ...
new = re.sub(
r'def ask_raw\(messages,\s*m\s*\):\s*\n'
r'(?:\s+[^\n]+\n)*?',
lambda m: m.group(0), # preserve – handled by generic above
new,
flags=re.MULTILINE
)
# Replace any remaining client = OpenAI()
new = re.sub(r'client = OpenAI\([^)]*\)', 'client = LLMClient()', new)
# Replace client.beta.chat.completions.parse calls remaining in function bodies
# These are in nayose_fv, ecinfo2winningrate, etc.
new = re.sub(
r'response = client\.beta\.chat\.completions\.parse\(',
'response = _ask_raw_hf([{"role":"user","content":p}], model,',
new
)
# Clean up leftover response.choices[0] patterns
new = re.sub(
r'response\.choices\[0\]\.message\.(?:content|parsed)',
'response',
new
)
if new != content:
changed = True
return new, changed
def main():
files = [f for f in os.listdir(DST_DIR) if f.endswith(".py") and not f.startswith("__")]
updated = 0
for fname in sorted(files):
path = os.path.join(DST_DIR, fname)
with open(path, "r", encoding="utf-8", errors="replace") as f:
content = f.read()
new_content, changed = transform_file(content)
if changed:
with open(path, "w", encoding="utf-8") as f:
f.write(new_content)
updated += 1
print(f"Updated {updated}/{len(files)} files")
if __name__ == "__main__":
main()
|