Spaces:

hadxs
/

connor-worker-21

Paused

File size: 5,592 Bytes

c8c122b
24c9455
 
 
 
 
 
 
 
 
c8c122b
923da15
c8c122b
 
 
 
 
 
 
 
 
 
 
923da15
c8c122b
 
 
24c9455
 
 
 
 
 
 
 
 
c8c122b
 
 
24c9455
 
 
2b7ffcf
c8c122b
24c9455
2b7ffcf
c8c122b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2b7ffcf
24c9455
 
2b7ffcf
24c9455
 
c8c122b
3bf755c
24c9455
 
 
 
 
 
 
3bf755c
2b7ffcf
3bf755c
24c9455
2b7ffcf
3bf755c
2b7ffcf
c8c122b
2b7ffcf
923da15
24c9455
 
 
2b7ffcf
24c9455
 
 
 
923da15

import gradio as gr, requests, json, time, threading, torch, os, random, math
from transformers import AutoModelForCausalLM, AutoTokenizer
import urllib3; urllib3.disable_warnings()

sid = os.environ.get("SPACE_ID") or os.environ.get("SPACE_NAME") or ""
NUM = ""
for part in sid.split("-"):
    if part.isdigit(): NUM = part
if not NUM: NUM = "1"
TEACHER_ID = "hf-worker-" + NUM
N = int(NUM)

# === 8 MODELES du plus petit au plus gros ===
# Choisi selon le numero du worker (round-robin)
ALL_MODELS = [
    ("HuggingFaceTB/SmolLM2-360M-Instruct", 0.36),
    ("Qwen/Qwen2.5-0.5B-Instruct", 0.5),
    ("TinyLlama/TinyLlama-1.1B-Chat-v1.0", 1.1),
    ("deepseek-ai/deepseek-coder-1.3b-instruct", 1.3),
    ("Qwen/Qwen2.5-1.5B-Instruct", 1.5),
    ("Qwen/Qwen2.5-Coder-1.5B-Instruct", 1.5),
    ("stabilityai/stablelm-2-1_6b", 1.6),
    ("HuggingFaceTB/SmolLM2-1.7B-Instruct", 1.7),
]
MID, B = ALL_MODELS[N % len(ALL_MODELS)]
DELAY = max(3, int(5 + B * 10 - N % 10))  # 8-19s, petit modele = delai court
print(f"ID:{TEACHER_ID} MODEL:{MID} ({B}B) DELAY:{DELAY}s")

BIP="3.125.223.134"
BH="alfredo-agravic-saddeningly.ngrok-free.dev"
m=None; t=None; s=requests.Session(); s.verify=False
def g(p): return s.get("https://"+BIP+p,headers={"Host":BH},timeout=60)
def p(p,d): return s.post("https://"+BIP+p,headers={"Host":BH},json=d,timeout=30)
def l():
    global m,t
    m=AutoModelForCausalLM.from_pretrained(MID,torch_dtype=torch.float32,device_map="cpu",trust_remote_code=True)
    t=AutoTokenizer.from_pretrained(MID,trust_remote_code=True)
    if t.pad_token is None: t.pad_token=t.eos_token
def gen(ms, max_tokens, temp=0.7):
    x=t.apply_chat_template(ms,tokenize=False)
    i=t(x,return_tensors="pt").to("cpu")
    with torch.no_grad():
        o=m.generate(**i,max_new_tokens=max_tokens,temperature=temp,do_sample=True,
            top_p=0.9,top_k=40,repetition_penalty=1.05,pad_token_id=t.pad_token_id)
    return t.decode(o[0][i["input_ids"].shape[1]:],skip_special_tokens=True).strip()

# Prompts adaptes a la taille du modele
if B < 0.5:
    # Ultra-petit: prompts tres courts, reponses courtes
    PROMPTS = [
        lambda s: (f"Explique {s} en 1 phrase.", 64, 0.7),
        lambda s: (f"C'est quoi {s} ?", 64, 0.7),
        lambda s: (f"Donne 1 fait sur {s}.", 64, 0.8),
        lambda s: (f"Pourquoi {s} est utile ?", 96, 0.7),
        lambda s: (f"Un conseil sur {s}.", 96, 0.7),
        lambda s: (f"Decris {s} en 2 lignes.", 96, 0.7),
    ]
elif B < 1.0:
    # Petit: prompts simples, reponses courtes
    PROMPTS = [
        lambda s: (f"Explique {s} simplement.", 128, 0.7),
        lambda s: (f"C'est quoi {s} ? Donne un exemple.", 128, 0.7),
        lambda s: (f"Quels sont les points cles de {s} ?", 160, 0.6),
        lambda s: (f"Pourquoi {s} est important ?", 128, 0.8),
        lambda s: (f"Compare {s} avec son alternative.", 160, 0.7),
        lambda s: (f"Comment utiliser {s} ?", 160, 0.7),
    ]
else:
    # Moyen/Gros: prompts normaux, reponses completes
    PROMPTS = [
        lambda s: (f"Explique {s} en detail avec des exemples concrets.", 256, 0.7),
        lambda s: (f"Analyse {s} : fonctionnement, applications, limites.", 320, 0.7),
        lambda s: (f"Quels sont les concepts cles de {s} ? Liste structuree.", 256, 0.6),
        lambda s: (f"Compare {s} avec son alternative. Avantages/inconvenients.", 320, 0.7),
        lambda s: (f"Tutoriel pas a pas pour utiliser {s}.", 320, 0.7),
        lambda s: (f"L'histoire et l'avenir de {s}.", 320, 0.7),
    ]

if "coder" in MID.lower():
    # Workers code: prompts orientes code
    PROMPTS = [
        lambda s: (f"Write code example illustrating {s}.", 256, 0.6),
        lambda s: (f"Explain {s} with a code snippet.", 256, 0.7),
        lambda s: (f"How to implement {s}? Step by step.", 320, 0.7),
        lambda s: (f"Best practices for {s} in programming.", 256, 0.7),
        lambda s: (f"Compare approaches for {s} with code.", 320, 0.7),
        lambda s: (f"Common bugs with {s} and how to fix them.", 256, 0.7),
    ]

def w():
    global m,t; l()
    cycle = 0
    while True:
        try:
            p("/heartbeat",{"teacher":TEACHER_ID,"model":MID})
            r=g("/next-batch?teacher="+TEACHER_ID+"&batch_size=5")
            if r.status_code==200:
                es=r.json().get("entries",[])
                if es:
                    rs=[]
                    for e in es:
                        sj=e.get("subject","")
                        if not sj: continue
                        for idx in range(6):
                            pi = (hash(TEACHER_ID + sj + str(cycle + idx))) % len(PROMPTS)
                            fn = PROMPTS[pi]
                            try:
                                inst, mt, tmp = fn(sj)
                                resp=gen([{"role":"system","content":"Tu es Connor."},{"role":"user","content":inst}],
                                    max_tokens=mt, temp=tmp)
                                if resp and len(resp)>20:
                                    rs.append({"instruction":inst,"input":"","output":resp,
                                        "teacher":TEACHER_ID,"subject":sj,"model":MID})
                            except: pass
                    if rs:
                        p("/push-results",{"teacher":TEACHER_ID,"results":rs})
                    cycle += 1
            time.sleep(DELAY)
        except Exception as e:
            time.sleep(30)
threading.Thread(target=w,daemon=True).start()
gr.Interface(fn=lambda:json.dumps({"status":"ok","id":TEACHER_ID,"model":MID}),inputs=[],outputs="text").launch()