Spaces:

ignitariumcloud
/

llama3.2

Sleeping

App Files Files Community

llama3.2 / app.py

arjunanand13

Update app.py

5d15a12 verified over 1 year ago

raw

history blame contribute delete

6.07 kB

	import torch
	from PIL import Image
	from transformers import AutoProcessor, AutoModelForPreTraining
	import gradio as gr
	import json
	import traceback
	import os
	import re

	model_name = "meta-llama/Llama-3.2-11B-Vision-Instruct"
	token = os.getenv("HUGGINGFACE_TOKEN").strip()

	processor = AutoProcessor.from_pretrained(model_name, token=token)
	model = AutoModelForPreTraining.from_pretrained(
	model_name,
	quantization_config={"load_in_4bit": True},
	token=token
	)

	if torch.cuda.is_available():
	model = model.to('cuda')

	def analyze_image(image, prompt):
	messages = [
	{"role": "user", "content": [
	{"type": "image"},
	{"type": "text", "text": prompt}
	]}
	]
	input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
	inputs = processor(
	image,
	input_text,
	add_special_tokens=False,
	return_tensors="pt"
	).to(model.device)

	with torch.no_grad():
	output = model.generate(**inputs, max_new_tokens=100)

	full_response = processor.decode(output[0])

	try:
	# Find all JSON-like structures in the response
	json_matches = list(re.finditer(r'\{.*?\}', full_response, re.DOTALL))

	if json_matches:
	# Take the last match
	last_json_str = json_matches[-1].group(0)
	try:
	processed_json = json.loads(last_json_str)
	except json.JSONDecodeError as e:
	processed_json = {"error": f"Invalid JSON in model output: {e}", "full_response": full_response}
	else:
	processed_json = {"error": "No JSON found in model output", "full_response": full_response}
	except Exception as e:
	processed_json = {"error": str(e), "full_response": full_response}

	return full_response, processed_json

	default_prompt = """Analyze this image and determine if it contains a data logger. A data logger is typically a small, black electronic device used to monitor and record data over time, such as voltage, temperature, or current, via external sensors.

	Carefully examine the image and provide a detailed response. If a data logger is present in the image, respond with:
	{"present": true, "reason": "Detailed explanation of why you believe it's a data logger, including specific visual cues you've identified"}

	If no data logger is visible, respond with:
	{"present": false, "reason": "Detailed explanation of why you believe there's no data logger, describing what you see instead"}

	Ensure your response is in valid JSON format """

	iface = gr.Interface(
	fn=analyze_image,
	inputs=[
	gr.Image(type="pil", label="Upload Image"),
	gr.Textbox(label="Prompt", value=default_prompt, lines=10)
	],
	outputs=[
	gr.Textbox(label="Full Response", lines=10),
	gr.JSON(label="Processed JSON")
	],
	title="Llama 3.2 Vision",
	cache_examples=False,
	description=" ",
	examples=[
	["bad.png", default_prompt]
	]
	)

	iface.launch()

	# import torch
	# from PIL import Image
	# from transformers import AutoProcessor, AutoModelForPreTraining
	# import gradio as gr
	# import json
	# import traceback
	# import os
	# import re

	# model_name = "meta-llama/Llama-3.2-11B-Vision-Instruct"
	# token = os.getenv("HUGGINGFACE_TOKEN").strip()

	# processor = AutoProcessor.from_pretrained(model_name, token=token)
	# model = AutoModelForPreTraining.from_pretrained(
	# model_name,
	# quantization_config={"load_in_4bit": True},
	# token=token
	# )

	# if torch.cuda.is_available():
	# model = model.to('cuda')

	# def analyze_image(image, prompt):
	# messages = [
	# {"role": "user", "content": [
	# {"type": "image"},
	# {"type": "text", "text": prompt}
	# ]}
	# ]

	# input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
	# inputs = processor(
	# image,
	# input_text,
	# add_special_tokens=False,
	# return_tensors="pt"
	# ).to(model.device)

	# with torch.no_grad():
	# output = model.generate(**inputs, max_new_tokens=100)

	# full_response = processor.decode(output[0])
	# print("Full response:", full_response) # Debug print

	# # return full_response
	# try:
	# json_match = re.search(r'\{.*?\}', full_response, re.DOTALL)
	# if json_match:
	# json_str = json_match.group(0)
	# try:
	# return json.loads(json_str)
	# except json.JSONDecodeError as e:
	# print(f"JSON decode error: {e}")
	# return {"error": "Invalid JSON in model output", "full_response": full_response}
	# else:
	# return {"error": "No JSON found in model output", "full_response": full_response}
	# except Exception as e:
	# print(f"Error in analyze_image: {e}")
	# return {"Full Response": str(e), "full_response": full_response}


	# default_prompt = """Analyze this image and determine if it contains a data logger.
	# A data logger is typically a small, black electronic device used to monitor and record data
	# over time, such as voltage, temperature, or current, via external sensors.

	# If a data logger is present in the image, respond with:
	# {"present": true, "reason": "Brief explanation of why you believe it's a data logger"}

	# If no data logger is visible, respond with:
	# {"present": false, "reason": "Brief explanation of why you believe there's no data logger"}

	# Ensure your response is in valid JSON format."""

	# iface = gr.Interface(
	# fn=analyze_image,
	# inputs=[
	# gr.Image(type="pil", label="Upload Image"),
	# gr.Textbox(label="Prompt", value=default_prompt, lines=10)
	# ],
	# outputs=gr.JSON(label="Analysis Result"),
	# title="Data Logger Detection using Llama 3.2 Vision",
	# description="Upload an image and customize the prompt to check if it contains a data logger.",
	# examples=[
	# ["bad.png", default_prompt]
	# ]
	# )

	# iface.launch()