Spaces:

Sachin5112
/

coding-vision-ai

Sleeping

App Files Files Community

coding-vision-ai / app.py

Sachin5112

Update app.py

9105400 verified about 1 month ago

raw

history blame contribute delete

2.24 kB

	import gradio as gr
	import torch
	from transformers import AutoModelForCausalLM, AutoProcessor, AutoConfig
	from PIL import Image

	model_id = "microsoft/Phi-3.5-vision-instruct"

	# 1. Load the config first and FORCE it to 'eager'
	config = AutoConfig.from_pretrained(model_id, trust_remote_code=True)
	config._attn_implementation = "eager"

	# 2. Load processor
	processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)

	# 3. Load model using the forced config
	# Change this line in your app.py
	model = AutoModelForCausalLM.from_pretrained(
	model_id,
	config=config,
	trust_remote_code=True,
	torch_dtype=torch.bfloat16, # Changed from float32
	low_cpu_mem_usage=True
	).eval()

	def chat_with_ai(image, text):
	if image is None:
	return "Please upload a screenshot or use the camera!"

	# Prompt format
	messages = [{"role": "user", "content": f"<\|image_1\|>\n{text}"}]
	prompt = processor.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

	# Process inputs and ensure they are on the same device as the model
	inputs = processor(text=prompt, images=[image], return_tensors="pt").to(model.device)

	with torch.no_grad():
	generate_ids = model.generate(
	**inputs,
	max_new_tokens=512,
	do_sample=False,
	use_cache=False # DISABLE CACHE to fix the AttributeError
	)

	# Correct slicing for the response
	generate_ids = generate_ids[:, inputs['input_ids'].shape[1]:]
	response = processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
	return response

	# UI setup
	with gr.Blocks() as demo:
	gr.Markdown("# 👁️ Phi-3.5 Vision Coding Assistant")
	with gr.Row():
	with gr.Column():
	input_img = gr.Image(type="pil", label="Capture/Upload", sources=["upload", "webcam"])
	input_text = gr.Textbox(label="Coding Question", placeholder="Analyze this code...")
	submit_btn = gr.Button("Run AI Analysis")
	output_text = gr.Textbox(label="AI Solution", lines=10)

	submit_btn.click(chat_with_ai, inputs=[input_img, input_text], outputs=output_text)

	demo.launch()