SeifElden2342532 commited on
Commit
97d85b0
·
verified ·
1 Parent(s): cabb5d3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -46
app.py CHANGED
@@ -6,58 +6,30 @@ from peft import PeftModel
6
  base_model_id = "Qwen/Qwen2.5-Coder-7B-Instruct"
7
  adapter_repo_id = "SeifElden2342532/Code-Optimizer"
8
 
9
- print("Loading model...")
10
  tokenizer = AutoTokenizer.from_pretrained(base_model_id)
 
 
 
11
  model = AutoModelForCausalLM.from_pretrained(
12
  base_model_id,
13
  torch_dtype=torch.bfloat16,
14
- device_map="auto"
 
 
15
  )
16
- model = PeftModel.from_pretrained(model, adapter_repo_id)
17
- model = model.merge_and_unload()
18
- model.eval()
19
- print("Model ready!")
20
-
21
- SYSTEM_PROMPT = "You are an expert Python code optimizer. Your goal is to take user-provided Python code and optimize it for performance, readability, or conciseness, based on the user's specified category. Provide the optimized code, a brief explanation of the changes, and a complexity comparison table (e.g., time and space complexity before and after optimization)."
22
-
23
- def optimize(code, category):
24
- if not code.strip():
25
- return "Please enter some Python code."
26
 
27
- messages = [
28
- {"role": "system", "content": SYSTEM_PROMPT},
29
- {"role": "user", "content": f"Original Code:\n```python\n{code}\n```\nCategory: {category}"}
30
- ]
31
-
32
- text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
33
- model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
34
-
35
- with torch.no_grad():
36
- generated_ids = model.generate(**model_inputs, max_new_tokens=1024)
37
 
38
- # Strip input tokens from output
39
- input_len = model_inputs["input_ids"].shape[1]
40
- output_ids = generated_ids[0][input_len:]
41
- return tokenizer.decode(output_ids, skip_special_tokens=True)
42
 
43
- demo = gr.Interface(
44
- fn=optimize,
45
- inputs=[
46
- gr.Code(language="python", label="Your Python Code", lines=15),
47
- gr.Radio(
48
- choices=["Performance", "Readability", "Conciseness"],
49
- value="Performance",
50
- label="Optimization Category"
51
- )
52
- ],
53
- outputs=gr.Textbox(label="Optimized Code & Explanation", lines=20),
54
- title="⚡ Python Code Optimizer",
55
- description="A QLoRA fine-tuned Qwen2.5-Coder-7B model that optimizes your Python code for performance, readability, or conciseness.",
56
- examples=[
57
- ["def factorial(n):\n if n == 0:\n return 1\n else:\n return n * factorial(n-1)", "Performance"],
58
- ["result = []\nfor i in range(10):\n result.append(i * 2)", "Conciseness"],
59
- ],
60
- flagging_mode="never"
61
- )
62
 
63
- demo.launch()
 
 
6
  base_model_id = "Qwen/Qwen2.5-Coder-7B-Instruct"
7
  adapter_repo_id = "SeifElden2342532/Code-Optimizer"
8
 
9
+ print("Loading model and tokenizer...")
10
  tokenizer = AutoTokenizer.from_pretrained(base_model_id)
11
+
12
+ # 1. Load the base model explicitly on CPU first or with specific settings
13
+ # We avoid device_map="auto" here to prevent the 'meta' device conflict
14
  model = AutoModelForCausalLM.from_pretrained(
15
  base_model_id,
16
  torch_dtype=torch.bfloat16,
17
+ trust_remote_code=True,
18
+ low_cpu_mem_usage=True,
19
+ device_map={"": "cpu"} # Force initial load to CPU to avoid 'meta'
20
  )
 
 
 
 
 
 
 
 
 
 
21
 
22
+ # 2. Load the adapter
23
+ print("Applying LoRA adapter...")
24
+ model = PeftModel.from_pretrained(model, adapter_repo_id)
 
 
 
 
 
 
 
25
 
26
+ # 3. Merge and Unload (This flattens the 'base_model.model' nesting)
27
+ print("Merging weights...")
28
+ model = model.merge_and_unload()
 
29
 
30
+ # 4. Move the final merged model to GPU
31
+ device = "cuda" if torch.cuda.is_available() else "cpu"
32
+ model.to(device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
+ model.eval()
35
+ print(f"Model ready on {device}!")