Spaces:

SeifElden2342532
/

Code-Optimizer

Sleeping

App Files Files Community

SeifElden2342532 commited on Apr 14

Commit

97d85b0

verified ·

1 Parent(s): cabb5d3

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -46

app.py CHANGED Viewed

@@ -6,58 +6,30 @@ from peft import PeftModel
 base_model_id = "Qwen/Qwen2.5-Coder-7B-Instruct"
 adapter_repo_id = "SeifElden2342532/Code-Optimizer"
-print("Loading model...")
 tokenizer = AutoTokenizer.from_pretrained(base_model_id)
 model = AutoModelForCausalLM.from_pretrained(
     base_model_id,
     torch_dtype=torch.bfloat16,
-    device_map="auto"
 )
-model = PeftModel.from_pretrained(model, adapter_repo_id)
-model = model.merge_and_unload()
-model.eval()
-print("Model ready!")
-SYSTEM_PROMPT = "You are an expert Python code optimizer. Your goal is to take user-provided Python code and optimize it for performance, readability, or conciseness, based on the user's specified category. Provide the optimized code, a brief explanation of the changes, and a complexity comparison table (e.g., time and space complexity before and after optimization)."
-def optimize(code, category):
-    if not code.strip():
-        return "Please enter some Python code."
-    messages = [
-        {"role": "system", "content": SYSTEM_PROMPT},
-        {"role": "user", "content": f"Original Code:\n```python\n{code}\n```\nCategory: {category}"}
-    ]
-    text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-    model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
-    with torch.no_grad():
-        generated_ids = model.generate(**model_inputs, max_new_tokens=1024)
-    # Strip input tokens from output
-    input_len = model_inputs["input_ids"].shape[1]
-    output_ids = generated_ids[0][input_len:]
-    return tokenizer.decode(output_ids, skip_special_tokens=True)
-demo = gr.Interface(
-    fn=optimize,
-    inputs=[
-        gr.Code(language="python", label="Your Python Code", lines=15),
-        gr.Radio(
-            choices=["Performance", "Readability", "Conciseness"],
-            value="Performance",
-            label="Optimization Category"
-        )
-    ],
-    outputs=gr.Textbox(label="Optimized Code & Explanation", lines=20),
-    title="⚡ Python Code Optimizer",
-    description="A QLoRA fine-tuned Qwen2.5-Coder-7B model that optimizes your Python code for performance, readability, or conciseness.",
-    examples=[
-        ["def factorial(n):\n    if n == 0:\n        return 1\n    else:\n        return n * factorial(n-1)", "Performance"],
-        ["result = []\nfor i in range(10):\n    result.append(i * 2)", "Conciseness"],
-    ],
-    flagging_mode="never"
-)
-demo.launch()

 base_model_id = "Qwen/Qwen2.5-Coder-7B-Instruct"
 adapter_repo_id = "SeifElden2342532/Code-Optimizer"
+print("Loading model and tokenizer...")
 tokenizer = AutoTokenizer.from_pretrained(base_model_id)
+# 1. Load the base model explicitly on CPU first or with specific settings
+# We avoid device_map="auto" here to prevent the 'meta' device conflict
 model = AutoModelForCausalLM.from_pretrained(
     base_model_id,
     torch_dtype=torch.bfloat16,
+    trust_remote_code=True,
+    low_cpu_mem_usage=True,
+    device_map={"": "cpu"} # Force initial load to CPU to avoid 'meta'
 )
+# 2. Load the adapter
+print("Applying LoRA adapter...")
+model = PeftModel.from_pretrained(model, adapter_repo_id)
+# 3. Merge and Unload (This flattens the 'base_model.model' nesting)
+print("Merging weights...")
+model = model.merge_and_unload()
+# 4. Move the final merged model to GPU
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model.to(device)
+model.eval()
+print(f"Model ready on {device}!")