Spaces:
Running
Running
| # app.py – Qwen-Qwen3-Coder-30B-A3B-Instruct Space | |
| # Autor: Sky Meilin | |
| # Zweck: Code-Instruct Modell als Hugging Face Space (Gradio) | |
| import gradio as gr | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| import json | |
| # ---------------------------- | |
| # 1. Konfiguration laden | |
| # ---------------------------- | |
| with open("config.json", "r", encoding="utf-8") as f: | |
| config = json.load(f) | |
| MODEL_NAME = config["model_name"] | |
| MAX_TOKENS_DEFAULT = config.get("max_tokens", 512) | |
| GENERATION_CONFIG = config.get("generation", {}) | |
| RUNTIME_CONFIG = config.get("runtime", {}) | |
| print(f"Lade Modell: {MODEL_NAME} …") | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_NAME, | |
| torch_dtype=getattr(torch, RUNTIME_CONFIG.get("torch_dtype", "float16")), | |
| device_map=RUNTIME_CONFIG.get("device_map", "auto") | |
| ) | |
| model.eval() | |
| print("Modell geladen ✅") | |
| # ---------------------------- | |
| # 2. Inferenz-Funktion | |
| # ---------------------------- | |
| def generate_code(prompt: str, max_tokens: int) -> str: | |
| if not prompt.strip(): | |
| return "Bitte gib eine Anweisung ein." | |
| inputs = tokenizer(prompt, return_tensors="pt").to(model.device) | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=max_tokens, | |
| do_sample=GENERATION_CONFIG.get("do_sample", True), | |
| top_p=GENERATION_CONFIG.get("top_p", 0.95), | |
| temperature=GENERATION_CONFIG.get("temperature", 0.7) | |
| ) | |
| return tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| # ---------------------------- | |
| # 3. Gradio UI | |
| # ---------------------------- | |
| with gr.Blocks(title="Qwen3-Coder Instruct") as demo: | |
| gr.Markdown("## Qwen3 Coder 30B – Instruct Space") | |
| with gr.Row(): | |
| prompt_input = gr.Textbox( | |
| label="Prompt / Anweisung", | |
| lines=config.get("ui", {}).get("prompt_lines", 4), | |
| placeholder="Schreibe z. B. ein Python-Skript für Fibonacci …" | |
| ) | |
| max_tokens_input = gr.Slider( | |
| 64, | |
| 1024, | |
| value=MAX_TOKENS_DEFAULT, | |
| step=64, | |
| label="Max Tokens" | |
| ) | |
| output_box = gr.Textbox( | |
| label="Generierter Code", | |
| lines=config.get("ui", {}).get("output_lines", 15) | |
| ) | |
| generate_btn = gr.Button("Code generieren") | |
| generate_btn.click( | |
| fn=generate_code, | |
| inputs=[prompt_input, max_tokens_input], | |
| outputs=output_box | |
| ) | |
| # ---------------------------- | |
| # 4. Start | |
| # ---------------------------- | |
| if __name__ == "__main__": | |
| demo.launch(server_name="0.0.0.0", server_port=7860) | |