import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoProcessor, AutoConfig
from PIL import Image

model_id = "microsoft/Phi-3.5-vision-instruct"

# 1. Load the config first and FORCE it to 'eager'
config = AutoConfig.from_pretrained(model_id, trust_remote_code=True)
config._attn_implementation = "eager" 

# 2. Load processor
processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)

# 3. Load model using the forced config
# Change this line in your app.py
model = AutoModelForCausalLM.from_pretrained(
    model_id, 
    config=config,
    trust_remote_code=True, 
    torch_dtype=torch.bfloat16, # Changed from float32
    low_cpu_mem_usage=True
).eval()

def chat_with_ai(image, text):
    if image is None:
        return "Please upload a screenshot or use the camera!"
    
    # Prompt format
    messages = [{"role": "user", "content": f"<|image_1|>\n{text}"}]
    prompt = processor.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    
    # Process inputs and ensure they are on the same device as the model
    inputs = processor(text=prompt, images=[image], return_tensors="pt").to(model.device)
    
    with torch.no_grad():
        generate_ids = model.generate(
            **inputs, 
            max_new_tokens=512,
            do_sample=False,
            use_cache=False  # DISABLE CACHE to fix the AttributeError
        )
    
    # Correct slicing for the response
    generate_ids = generate_ids[:, inputs['input_ids'].shape[1]:]
    response = processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
    return response

# UI setup
with gr.Blocks() as demo:
    gr.Markdown("# 👁️ Phi-3.5 Vision Coding Assistant")
    with gr.Row():
        with gr.Column():
            input_img = gr.Image(type="pil", label="Capture/Upload", sources=["upload", "webcam"])
            input_text = gr.Textbox(label="Coding Question", placeholder="Analyze this code...")
            submit_btn = gr.Button("Run AI Analysis")
        output_text = gr.Textbox(label="AI Solution", lines=10)

    submit_btn.click(chat_with_ai, inputs=[input_img, input_text], outputs=output_text)

demo.launch()