import gradio as gr import torch from transformers import AutoModelForCausalLM, AutoProcessor, AutoConfig from PIL import Image model_id = "microsoft/Phi-3.5-vision-instruct" # 1. Load the config first and FORCE it to 'eager' config = AutoConfig.from_pretrained(model_id, trust_remote_code=True) config._attn_implementation = "eager" # 2. Load processor processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True) # 3. Load model using the forced config # Change this line in your app.py model = AutoModelForCausalLM.from_pretrained( model_id, config=config, trust_remote_code=True, torch_dtype=torch.bfloat16, # Changed from float32 low_cpu_mem_usage=True ).eval() def chat_with_ai(image, text): if image is None: return "Please upload a screenshot or use the camera!" # Prompt format messages = [{"role": "user", "content": f"<|image_1|>\n{text}"}] prompt = processor.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) # Process inputs and ensure they are on the same device as the model inputs = processor(text=prompt, images=[image], return_tensors="pt").to(model.device) with torch.no_grad(): generate_ids = model.generate( **inputs, max_new_tokens=512, do_sample=False, use_cache=False # DISABLE CACHE to fix the AttributeError ) # Correct slicing for the response generate_ids = generate_ids[:, inputs['input_ids'].shape[1]:] response = processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] return response # UI setup with gr.Blocks() as demo: gr.Markdown("# 👁️ Phi-3.5 Vision Coding Assistant") with gr.Row(): with gr.Column(): input_img = gr.Image(type="pil", label="Capture/Upload", sources=["upload", "webcam"]) input_text = gr.Textbox(label="Coding Question", placeholder="Analyze this code...") submit_btn = gr.Button("Run AI Analysis") output_text = gr.Textbox(label="AI Solution", lines=10) submit_btn.click(chat_with_ai, inputs=[input_img, input_text], outputs=output_text) demo.launch()