Spaces:
Paused
Paused
| import gradio as gr, requests, json, time, threading, torch, os, random, math | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| import urllib3; urllib3.disable_warnings() | |
| sid = os.environ.get("SPACE_ID") or os.environ.get("SPACE_NAME") or "" | |
| NUM = "" | |
| for part in sid.split("-"): | |
| if part.isdigit(): NUM = part | |
| if not NUM: NUM = "1" | |
| TEACHER_ID = "hf-worker-" + NUM | |
| N = int(NUM) | |
| # === 8 MODELES du plus petit au plus gros === | |
| # Choisi selon le numero du worker (round-robin) | |
| ALL_MODELS = [ | |
| ("HuggingFaceTB/SmolLM2-360M-Instruct", 0.36), | |
| ("Qwen/Qwen2.5-0.5B-Instruct", 0.5), | |
| ("TinyLlama/TinyLlama-1.1B-Chat-v1.0", 1.1), | |
| ("deepseek-ai/deepseek-coder-1.3b-instruct", 1.3), | |
| ("Qwen/Qwen2.5-1.5B-Instruct", 1.5), | |
| ("Qwen/Qwen2.5-Coder-1.5B-Instruct", 1.5), | |
| ("stabilityai/stablelm-2-1_6b", 1.6), | |
| ("HuggingFaceTB/SmolLM2-1.7B-Instruct", 1.7), | |
| ] | |
| MID, B = ALL_MODELS[N % len(ALL_MODELS)] | |
| DELAY = max(3, int(5 + B * 10 - N % 10)) # 8-19s, petit modele = delai court | |
| print(f"ID:{TEACHER_ID} MODEL:{MID} ({B}B) DELAY:{DELAY}s") | |
| BIP="3.125.223.134" | |
| BH="alfredo-agravic-saddeningly.ngrok-free.dev" | |
| m=None; t=None; s=requests.Session(); s.verify=False | |
| def g(p): return s.get("https://"+BIP+p,headers={"Host":BH},timeout=60) | |
| def p(p,d): return s.post("https://"+BIP+p,headers={"Host":BH},json=d,timeout=30) | |
| def l(): | |
| global m,t | |
| m=AutoModelForCausalLM.from_pretrained(MID,torch_dtype=torch.float32,device_map="cpu",trust_remote_code=True) | |
| t=AutoTokenizer.from_pretrained(MID,trust_remote_code=True) | |
| if t.pad_token is None: t.pad_token=t.eos_token | |
| def gen(ms, max_tokens, temp=0.7): | |
| x=t.apply_chat_template(ms,tokenize=False) | |
| i=t(x,return_tensors="pt").to("cpu") | |
| with torch.no_grad(): | |
| o=m.generate(**i,max_new_tokens=max_tokens,temperature=temp,do_sample=True, | |
| top_p=0.9,top_k=40,repetition_penalty=1.05,pad_token_id=t.pad_token_id) | |
| return t.decode(o[0][i["input_ids"].shape[1]:],skip_special_tokens=True).strip() | |
| # Prompts adaptes a la taille du modele | |
| if B < 0.5: | |
| # Ultra-petit: prompts tres courts, reponses courtes | |
| PROMPTS = [ | |
| lambda s: (f"Explique {s} en 1 phrase.", 64, 0.7), | |
| lambda s: (f"C'est quoi {s} ?", 64, 0.7), | |
| lambda s: (f"Donne 1 fait sur {s}.", 64, 0.8), | |
| lambda s: (f"Pourquoi {s} est utile ?", 96, 0.7), | |
| lambda s: (f"Un conseil sur {s}.", 96, 0.7), | |
| lambda s: (f"Decris {s} en 2 lignes.", 96, 0.7), | |
| ] | |
| elif B < 1.0: | |
| # Petit: prompts simples, reponses courtes | |
| PROMPTS = [ | |
| lambda s: (f"Explique {s} simplement.", 128, 0.7), | |
| lambda s: (f"C'est quoi {s} ? Donne un exemple.", 128, 0.7), | |
| lambda s: (f"Quels sont les points cles de {s} ?", 160, 0.6), | |
| lambda s: (f"Pourquoi {s} est important ?", 128, 0.8), | |
| lambda s: (f"Compare {s} avec son alternative.", 160, 0.7), | |
| lambda s: (f"Comment utiliser {s} ?", 160, 0.7), | |
| ] | |
| else: | |
| # Moyen/Gros: prompts normaux, reponses completes | |
| PROMPTS = [ | |
| lambda s: (f"Explique {s} en detail avec des exemples concrets.", 256, 0.7), | |
| lambda s: (f"Analyse {s} : fonctionnement, applications, limites.", 320, 0.7), | |
| lambda s: (f"Quels sont les concepts cles de {s} ? Liste structuree.", 256, 0.6), | |
| lambda s: (f"Compare {s} avec son alternative. Avantages/inconvenients.", 320, 0.7), | |
| lambda s: (f"Tutoriel pas a pas pour utiliser {s}.", 320, 0.7), | |
| lambda s: (f"L'histoire et l'avenir de {s}.", 320, 0.7), | |
| ] | |
| if "coder" in MID.lower(): | |
| # Workers code: prompts orientes code | |
| PROMPTS = [ | |
| lambda s: (f"Write code example illustrating {s}.", 256, 0.6), | |
| lambda s: (f"Explain {s} with a code snippet.", 256, 0.7), | |
| lambda s: (f"How to implement {s}? Step by step.", 320, 0.7), | |
| lambda s: (f"Best practices for {s} in programming.", 256, 0.7), | |
| lambda s: (f"Compare approaches for {s} with code.", 320, 0.7), | |
| lambda s: (f"Common bugs with {s} and how to fix them.", 256, 0.7), | |
| ] | |
| def w(): | |
| global m,t; l() | |
| cycle = 0 | |
| while True: | |
| try: | |
| p("/heartbeat",{"teacher":TEACHER_ID,"model":MID}) | |
| r=g("/next-batch?teacher="+TEACHER_ID+"&batch_size=5") | |
| if r.status_code==200: | |
| es=r.json().get("entries",[]) | |
| if es: | |
| rs=[] | |
| for e in es: | |
| sj=e.get("subject","") | |
| if not sj: continue | |
| for idx in range(6): | |
| pi = (hash(TEACHER_ID + sj + str(cycle + idx))) % len(PROMPTS) | |
| fn = PROMPTS[pi] | |
| try: | |
| inst, mt, tmp = fn(sj) | |
| resp=gen([{"role":"system","content":"Tu es Connor."},{"role":"user","content":inst}], | |
| max_tokens=mt, temp=tmp) | |
| if resp and len(resp)>20: | |
| rs.append({"instruction":inst,"input":"","output":resp, | |
| "teacher":TEACHER_ID,"subject":sj,"model":MID}) | |
| except: pass | |
| if rs: | |
| p("/push-results",{"teacher":TEACHER_ID,"results":rs}) | |
| cycle += 1 | |
| time.sleep(DELAY) | |
| except Exception as e: | |
| time.sleep(30) | |
| threading.Thread(target=w,daemon=True).start() | |
| gr.Interface(fn=lambda:json.dumps({"status":"ok","id":TEACHER_ID,"model":MID}),inputs=[],outputs="text").launch() | |