sky-meilin's picture
Rename App.py to app.py
c1c84d0 verified
raw
history blame
2.67 kB
# app.py – Qwen-Qwen3-Coder-30B-A3B-Instruct Space
# Autor: Sky Meilin
# Zweck: Code-Instruct Modell als Hugging Face Space (Gradio)
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import json
# ----------------------------
# 1. Konfiguration laden
# ----------------------------
with open("config.json", "r", encoding="utf-8") as f:
config = json.load(f)
MODEL_NAME = config["model_name"]
MAX_TOKENS_DEFAULT = config.get("max_tokens", 512)
GENERATION_CONFIG = config.get("generation", {})
RUNTIME_CONFIG = config.get("runtime", {})
print(f"Lade Modell: {MODEL_NAME} …")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
torch_dtype=getattr(torch, RUNTIME_CONFIG.get("torch_dtype", "float16")),
device_map=RUNTIME_CONFIG.get("device_map", "auto")
)
model.eval()
print("Modell geladen ✅")
# ----------------------------
# 2. Inferenz-Funktion
# ----------------------------
def generate_code(prompt: str, max_tokens: int) -> str:
if not prompt.strip():
return "Bitte gib eine Anweisung ein."
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=max_tokens,
do_sample=GENERATION_CONFIG.get("do_sample", True),
top_p=GENERATION_CONFIG.get("top_p", 0.95),
temperature=GENERATION_CONFIG.get("temperature", 0.7)
)
return tokenizer.decode(outputs[0], skip_special_tokens=True)
# ----------------------------
# 3. Gradio UI
# ----------------------------
with gr.Blocks(title="Qwen3-Coder Instruct") as demo:
gr.Markdown("## Qwen3 Coder 30B – Instruct Space")
with gr.Row():
prompt_input = gr.Textbox(
label="Prompt / Anweisung",
lines=config.get("ui", {}).get("prompt_lines", 4),
placeholder="Schreibe z. B. ein Python-Skript für Fibonacci …"
)
max_tokens_input = gr.Slider(
64,
1024,
value=MAX_TOKENS_DEFAULT,
step=64,
label="Max Tokens"
)
output_box = gr.Textbox(
label="Generierter Code",
lines=config.get("ui", {}).get("output_lines", 15)
)
generate_btn = gr.Button("Code generieren")
generate_btn.click(
fn=generate_code,
inputs=[prompt_input, max_tokens_input],
outputs=output_box
)
# ----------------------------
# 4. Start
# ----------------------------
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860)