import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel

base_model_id = "Qwen/Qwen2.5-Coder-7B-Instruct"
adapter_repo_id = "SeifElden2342532/Code-Optimizer"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

tokenizer = AutoTokenizer.from_pretrained(base_model_id)
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_id,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

model = PeftModel.from_pretrained(base_model, adapter_repo_id)
model.eval()

SYSTEM_PROMPT = "You are an expert Python code optimizer. Your goal is to take user-provided Python code and optimize it for performance, readability, or conciseness, based on the user's specified category. Provide the optimized code, a brief explanation of the changes, and a complexity comparison table."

def optimize(code, category):
    if not code.strip():
        return "Please enter some Python code."
    
    # Using triple quotes f""" here prevents the "unterminated string" error
    user_content = f"""Original Code:
```python
{code}
```
Category: {category}"""

    messages = [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": user_content}
    ]
    
    text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
    
    with torch.no_grad():
        generated_ids = model.generate(
            **model_inputs, 
            max_new_tokens=1024,
            do_sample=True,
            temperature=0.7,
            top_p=0.95,
            pad_token_id=tokenizer.eos_token_id
        )
    
    input_len = model_inputs["input_ids"].shape[1]
    output_ids = generated_ids[0][input_len:]
    return tokenizer.decode(output_ids, skip_special_tokens=True)

demo = gr.Interface(
    fn=optimize,
    inputs=[
        gr.Code(language="python", label="Your Python Code", lines=15),
        gr.Radio(choices=["Performance", "Readability", "Conciseness"], value="Performance", label="Optimization Category")
    ],
    outputs=gr.Textbox(label="Optimized Code & Explanation", lines=20),
    title="⚡ Python Code Optimizer",
    description="QLoRA fine-tuned Qwen2.5-Coder-7B.",
    flagging_mode="never"
)

if __name__ == "__main__":
    demo.launch()