erdemozkan commited on
Commit
495bec1
·
verified ·
1 Parent(s): db90f55

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -52
app.py CHANGED
@@ -1,64 +1,48 @@
1
- import spaces
2
  import gradio as gr
3
- import torch
4
- from transformers import Qwen2ForCausalLM, Qwen2Tokenizer, Qwen2Config
 
5
 
6
  # 1. Configuration
7
- model_id = "erdemozkan/YOLO-7B-Qwen-Coder"
 
8
 
9
- print(f"🚀 YOLO CODER: Forcing Qwen2 Engine for {model_id}...")
10
 
11
- # 2. Direct Loading (Bypassing AutoModel/AutoTokenizer)
12
- # We use the specific Qwen2 classes to avoid the 'Unrecognized' error
13
- try:
14
- tokenizer = Qwen2Tokenizer.from_pretrained(
15
- model_id,
16
- trust_remote_code=True,
17
- use_fast=False # Standard Python backend is more stable for custom weights
18
- )
19
 
20
- model = Qwen2ForCausalLM.from_pretrained(
21
- model_id,
22
- torch_dtype=torch.bfloat16,
23
- device_map="auto",
24
- trust_remote_code=True
25
- )
26
- except Exception as e:
27
- print(f"❌ Direct load failed: {e}")
28
- print("Falling back to Auto-classes with forced config...")
29
- # Last ditch effort if Qwen2 classes aren't in the path
30
- from transformers import AutoModelForCausalLM, AutoTokenizer
31
- tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
32
- model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True, device_map="auto")
33
 
34
- # 3. The Core Logic
35
- @spaces.GPU(duration=60)
36
  def yoco_heal(broken_code):
37
- if not broken_code or not broken_code.strip():
38
- return "⚠️ YOLO needs code."
39
 
40
- prompt = (
41
- "### System: You are YOLO CODER (yoco). An elite autonomous agent.\n"
42
- f"### Broken Code:\n{broken_code}\n"
43
- "### Fixed Code (No talk, just code):"
44
- )
45
-
46
- inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
47
 
48
- with torch.no_grad():
49
- outputs = model.generate(
50
- **inputs,
51
- max_new_tokens=1024,
52
- temperature=0.2,
53
- do_sample=True,
54
- pad_token_id=tokenizer.eos_token_id if tokenizer.eos_token_id else 151643
55
- )
56
 
57
- full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
58
- marker = "### Fixed Code (No talk, just code):"
59
- return full_response.split(marker)[1].strip() if marker in full_response else full_response
60
 
61
- # 4. Neon-Hacker UI
62
  css = """
63
  .gradio-container { background-color: #000 !important; }
64
  #header h1 { color: #ffff00 !important; text-shadow: 0 0 10px #ffff00; text-align: center; }
@@ -67,11 +51,12 @@ css = """
67
  """
68
 
69
  with gr.Blocks(css=css) as demo:
70
- gr.Markdown("# YOLO CODER", elem_id="header")
71
  with gr.Row():
72
- in_code = gr.Code(label="INPUT", language="python", lines=15, elem_classes="code-box")
73
- out_code = gr.Code(label="HEALED", language="python", lines=15, elem_classes="code-box", interactive=False)
74
  btn = gr.Button("YOLO IT!", elem_id="yolo-btn")
 
75
  btn.click(yoco_heal, in_code, out_code)
76
 
77
  if __name__ == "__main__":
 
 
1
  import gradio as gr
2
+ from llama_cpp import Llama
3
+ from huggingface_hub import hf_hub_download
4
+ import os
5
 
6
  # 1. Configuration
7
+ REPO_ID = "erdemozkan/YOLO-7B-Qwen-Coder"
8
+ GGUF_FILE = "YOLO-7B-Qwen-q4.gguf" # <--- CHANGE THIS to your actual filename
9
 
10
+ print(f"📦 Downloading {GGUF_FILE} from {REPO_ID}...")
11
 
12
+ # 2. Download the model file from your repo
13
+ model_path = hf_hub_download(repo_id=REPO_ID, filename=GGUF_FILE)
 
 
 
 
 
 
14
 
15
+ # 3. Initialize the CPU Engine
16
+ # n_ctx: context window (keep it small for CPU speed)
17
+ # n_threads: use 2 for HF Free tier (it only gives you 2 vCPUs)
18
+ llm = Llama(
19
+ model_path=model_path,
20
+ n_ctx=2048,
21
+ n_threads=2,
22
+ verbose=False
23
+ )
 
 
 
 
24
 
 
 
25
  def yoco_heal(broken_code):
26
+ if not broken_code.strip():
27
+ return "⚠️ Paste some code first."
28
 
29
+ # Persona-driven prompt
30
+ prompt = f"### System: You are YOLO CODER. Fix the code below.\n### Broken:\n{broken_code}\n### Fixed:"
31
+
32
+ print("🛠️ YOLO is healing on CPU...")
 
 
 
33
 
34
+ # Simple inference
35
+ response = llm(
36
+ prompt,
37
+ max_tokens=1024,
38
+ stop=["###", "\n\n\n"], # Stop sequences to prevent rambling
39
+ echo=False
40
+ )
 
41
 
42
+ output = response["choices"][0]["text"].strip()
43
+ return output
 
44
 
45
+ # 4. Neon-Hacker UI (Same as before, still cool)
46
  css = """
47
  .gradio-container { background-color: #000 !important; }
48
  #header h1 { color: #ffff00 !important; text-shadow: 0 0 10px #ffff00; text-align: center; }
 
51
  """
52
 
53
  with gr.Blocks(css=css) as demo:
54
+ gr.Markdown("# YOLO CODER [CPU MODE]", elem_id="header")
55
  with gr.Row():
56
+ in_code = gr.Code(label="INPUT", language="python", lines=12, elem_classes="code-box")
57
+ out_code = gr.Code(label="HEALED", language="python", lines=12, elem_classes="code-box")
58
  btn = gr.Button("YOLO IT!", elem_id="yolo-btn")
59
+
60
  btn.click(yoco_heal, in_code, out_code)
61
 
62
  if __name__ == "__main__":