Spaces:

erdemozkan
/

yolo-coder

Sleeping

App Files Files Community

erdemozkan commited on Apr 16

Commit

495bec1

verified ·

1 Parent(s): db90f55

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -52

app.py CHANGED Viewed

@@ -1,64 +1,48 @@
-import spaces
 import gradio as gr
-import torch
-from transformers import Qwen2ForCausalLM, Qwen2Tokenizer, Qwen2Config
 # 1. Configuration
-model_id = "erdemozkan/YOLO-7B-Qwen-Coder"
-print(f"🚀 YOLO CODER: Forcing Qwen2 Engine for {model_id}...")
-# 2. Direct Loading (Bypassing AutoModel/AutoTokenizer)
-# We use the specific Qwen2 classes to avoid the 'Unrecognized' error
-try:
-    tokenizer = Qwen2Tokenizer.from_pretrained(
-        model_id,
-        trust_remote_code=True,
-        use_fast=False # Standard Python backend is more stable for custom weights
-    )
-    model = Qwen2ForCausalLM.from_pretrained(
-        model_id,
-        torch_dtype=torch.bfloat16,
-        device_map="auto",
-        trust_remote_code=True
-    )
-except Exception as e:
-    print(f"❌ Direct load failed: {e}")
-    print("Falling back to Auto-classes with forced config...")
-    # Last ditch effort if Qwen2 classes aren't in the path
-    from transformers import AutoModelForCausalLM, AutoTokenizer
-    tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
-    model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True, device_map="auto")
-# 3. The Core Logic
-@spaces.GPU(duration=60)
 def yoco_heal(broken_code):
-    if not broken_code or not broken_code.strip():
-        return "⚠️ YOLO needs code."
-    prompt = (
-        "### System: You are YOLO CODER (yoco). An elite autonomous agent.\n"
-        f"### Broken Code:\n{broken_code}\n"
-        "### Fixed Code (No talk, just code):"
-    )
-    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
-    with torch.no_grad():
-        outputs = model.generate(
-            **inputs,
-            max_new_tokens=1024,
-            temperature=0.2,
-            do_sample=True,
-            pad_token_id=tokenizer.eos_token_id if tokenizer.eos_token_id else 151643
-        )
-    full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    marker = "### Fixed Code (No talk, just code):"
-    return full_response.split(marker)[1].strip() if marker in full_response else full_response
-# 4. Neon-Hacker UI
 css = """
 .gradio-container { background-color: #000 !important; }
 #header h1 { color: #ffff00 !important; text-shadow: 0 0 10px #ffff00; text-align: center; }
@@ -67,11 +51,12 @@ css = """
 """
 with gr.Blocks(css=css) as demo:
-    gr.Markdown("# YOLO CODER", elem_id="header")
     with gr.Row():
-        in_code = gr.Code(label="INPUT", language="python", lines=15, elem_classes="code-box")
-        out_code = gr.Code(label="HEALED", language="python", lines=15, elem_classes="code-box", interactive=False)
     btn = gr.Button("YOLO IT!", elem_id="yolo-btn")
     btn.click(yoco_heal, in_code, out_code)
 if __name__ == "__main__":

 import gradio as gr
+from llama_cpp import Llama
+from huggingface_hub import hf_hub_download
+import os
 # 1. Configuration
+REPO_ID = "erdemozkan/YOLO-7B-Qwen-Coder"
+GGUF_FILE = "YOLO-7B-Qwen-q4.gguf" # <--- CHANGE THIS to your actual filename
+print(f"📦 Downloading {GGUF_FILE} from {REPO_ID}...")
+# 2. Download the model file from your repo
+model_path = hf_hub_download(repo_id=REPO_ID, filename=GGUF_FILE)
+# 3. Initialize the CPU Engine
+# n_ctx: context window (keep it small for CPU speed)
+# n_threads: use 2 for HF Free tier (it only gives you 2 vCPUs)
+llm = Llama(
+    model_path=model_path,
+    n_ctx=2048,
+    n_threads=2,
+    verbose=False
+)
 def yoco_heal(broken_code):
+    if not broken_code.strip():
+        return "⚠️ Paste some code first."
+    # Persona-driven prompt
+    prompt = f"### System: You are YOLO CODER. Fix the code below.\n### Broken:\n{broken_code}\n### Fixed:"
+    print("🛠️ YOLO is healing on CPU...")
+    # Simple inference
+    response = llm(
+        prompt,
+        max_tokens=1024,
+        stop=["###", "\n\n\n"], # Stop sequences to prevent rambling
+        echo=False
+    )
+    output = response["choices"][0]["text"].strip()
+    return output
+# 4. Neon-Hacker UI (Same as before, still cool)
 css = """
 .gradio-container { background-color: #000 !important; }
 #header h1 { color: #ffff00 !important; text-shadow: 0 0 10px #ffff00; text-align: center; }
 """
 with gr.Blocks(css=css) as demo:
+    gr.Markdown("# YOLO CODER [CPU MODE]", elem_id="header")
     with gr.Row():
+        in_code = gr.Code(label="INPUT", language="python", lines=12, elem_classes="code-box")
+        out_code = gr.Code(label="HEALED", language="python", lines=12, elem_classes="code-box")
     btn = gr.Button("YOLO IT!", elem_id="yolo-btn")
     btn.click(yoco_heal, in_code, out_code)
 if __name__ == "__main__":