"""HuggingFace Space: PSCT → Lua Gradio UI on top of llama-cpp-python serving the Q4_K_M GGUF of the fine-tuned Qwen2.5-Coder-7B model. Runs on the free CPU tier (2 vCPU, 16 GB RAM) — slow (~60-120s per generation) but free. The GGUF is downloaded from the user's HF model repo on first container start, then cached on the ephemeral disk. """ import os import gradio as gr from huggingface_hub import hf_hub_download from llama_cpp import Llama # ----------------------------------------------------------------------- # Model loading # ----------------------------------------------------------------------- MODEL_REPO = os.environ.get("MODEL_REPO", "serenade87/qwen-coder-7b-psct2lua-gguf") MODEL_FILE = os.environ.get("MODEL_FILE", "qwen-coder-7b-psct2lua-q4_k_m.gguf") print(f"Downloading {MODEL_FILE} from {MODEL_REPO}...") model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE) print(f"Loading {model_path}...") # Conservative settings for free 2-vCPU / 16 GB tier: # n_ctx=4096 — matches training max_length # n_threads=2 — match vCPU count # n_batch=256 — modest, fits CPU comfortably llm = Llama( model_path=model_path, n_ctx=4096, n_threads=2, n_batch=256, verbose=False, ) print("Model ready.") SYSTEM_PROMPT = ( "You are a Yu-Gi-Oh! card scripter for the YGOPro engine. Given the " "English PSCT (Problem-Solving Card Text) of a card, produce a complete " "Lua script that implements it. Use the modern conventions: start with " "`local s,id,o=GetID()` and define `s.initial_effect(c)` plus per-effect " "callback functions. The standard parameter list for callbacks is " "`(e,tp,eg,ep,ev,re,r,rp[,chk[,chkc]])`. Always set Category/Type/Code " "correctly so the engine's chain solver can reason about the effect." ) # ----------------------------------------------------------------------- # Inference # ----------------------------------------------------------------------- def generate(card_name: str, psct: str) -> str: if not psct or not psct.strip(): return "// (paste a card text on the left, then click Generate)" user = f"Card: {card_name or 'Test Card'}\n\nPSCT:\n{psct.strip()}" output = llm.create_chat_completion( messages=[ {"role": "system", "content": SYSTEM_PROMPT}, {"role": "user", "content": user}, ], max_tokens=2048, temperature=0.0, # deterministic; flip to 0.7 for variety stop=["<|im_end|>", ""], ) return output["choices"][0]["message"]["content"].strip() # ----------------------------------------------------------------------- # UI # ----------------------------------------------------------------------- EXAMPLES = [ ["Test Searcher", "When this card is Normal Summoned: You can add 1 Level 4 or lower " "Warrior monster from your Deck to your hand. You can only use this " "effect of \"Test Searcher\" once per turn."], ["Spell Mill", "Send 1 Level 4 or lower Spellcaster monster from your Deck to the GY, " "then draw 1 card. You can only activate 1 \"Spell Mill\" per turn."], ["Pot of Greed (clone)", "Draw 2 cards."], ["Beast Bouncer", "Target 1 monster on the field; return it to the hand. You can only " "activate 1 \"Beast Bouncer\" per turn."], ] with gr.Blocks(title="PSCT → Lua", theme=gr.themes.Soft()) as demo: gr.Markdown( "# 🃏 PSCT → Lua\n" "Paste a Yu-Gi-Oh card's English text (PSCT) and get a working " "YGOPro Lua script. Powered by a fine-tuned Qwen2.5-Coder-7B " "trained on ~13,000 real card scripts.\n\n" "**Heads up:** runs on a free CPU instance — generation takes " "**~60-120 seconds** per card. Be patient. ☕" ) with gr.Row(): with gr.Column(): name = gr.Textbox(label="Card name", value="Test Card", lines=1) psct = gr.Textbox( label="PSCT (card text)", lines=10, placeholder="When this card is Normal Summoned: ...", ) btn = gr.Button("Generate Lua", variant="primary") with gr.Column(): # Gradio 4.44 has no "lua" highlighter — use "python" for similar # syntax (comments, strings, function keyword) without breaking. out = gr.Code(label="Generated Lua", language="python", lines=22) btn.click(fn=generate, inputs=[name, psct], outputs=out) gr.Examples(examples=EXAMPLES, inputs=[name, psct]) if __name__ == "__main__": # Don't pass server_name/port — HF Spaces sets GRADIO_SERVER_NAME and # GRADIO_SERVER_PORT for us. Forcing them fights the platform's binding. demo.queue(max_size=8).launch()