Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoProcessor, AutoConfig | |
| from PIL import Image | |
| model_id = "microsoft/Phi-3.5-vision-instruct" | |
| # 1. Load the config first and FORCE it to 'eager' | |
| config = AutoConfig.from_pretrained(model_id, trust_remote_code=True) | |
| config._attn_implementation = "eager" | |
| # 2. Load processor | |
| processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True) | |
| # 3. Load model using the forced config | |
| # Change this line in your app.py | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_id, | |
| config=config, | |
| trust_remote_code=True, | |
| torch_dtype=torch.bfloat16, # Changed from float32 | |
| low_cpu_mem_usage=True | |
| ).eval() | |
| def chat_with_ai(image, text): | |
| if image is None: | |
| return "Please upload a screenshot or use the camera!" | |
| # Prompt format | |
| messages = [{"role": "user", "content": f"<|image_1|>\n{text}"}] | |
| prompt = processor.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) | |
| # Process inputs and ensure they are on the same device as the model | |
| inputs = processor(text=prompt, images=[image], return_tensors="pt").to(model.device) | |
| with torch.no_grad(): | |
| generate_ids = model.generate( | |
| **inputs, | |
| max_new_tokens=512, | |
| do_sample=False, | |
| use_cache=False # DISABLE CACHE to fix the AttributeError | |
| ) | |
| # Correct slicing for the response | |
| generate_ids = generate_ids[:, inputs['input_ids'].shape[1]:] | |
| response = processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] | |
| return response | |
| # UI setup | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# 👁️ Phi-3.5 Vision Coding Assistant") | |
| with gr.Row(): | |
| with gr.Column(): | |
| input_img = gr.Image(type="pil", label="Capture/Upload", sources=["upload", "webcam"]) | |
| input_text = gr.Textbox(label="Coding Question", placeholder="Analyze this code...") | |
| submit_btn = gr.Button("Run AI Analysis") | |
| output_text = gr.Textbox(label="AI Solution", lines=10) | |
| submit_btn.click(chat_with_ai, inputs=[input_img, input_text], outputs=output_text) | |
| demo.launch() | |