Spaces:
Sleeping
Sleeping
| import torch | |
| from PIL import Image | |
| from transformers import AutoProcessor, AutoModelForPreTraining | |
| import gradio as gr | |
| import json | |
| import traceback | |
| import os | |
| import re | |
| model_name = "meta-llama/Llama-3.2-11B-Vision-Instruct" | |
| token = os.getenv("HUGGINGFACE_TOKEN").strip() | |
| processor = AutoProcessor.from_pretrained(model_name, token=token) | |
| model = AutoModelForPreTraining.from_pretrained( | |
| model_name, | |
| quantization_config={"load_in_4bit": True}, | |
| token=token | |
| ) | |
| if torch.cuda.is_available(): | |
| model = model.to('cuda') | |
| def analyze_image(image, prompt): | |
| messages = [ | |
| {"role": "user", "content": [ | |
| {"type": "image"}, | |
| {"type": "text", "text": prompt} | |
| ]} | |
| ] | |
| input_text = processor.apply_chat_template(messages, add_generation_prompt=True) | |
| inputs = processor( | |
| image, | |
| input_text, | |
| add_special_tokens=False, | |
| return_tensors="pt" | |
| ).to(model.device) | |
| with torch.no_grad(): | |
| output = model.generate(**inputs, max_new_tokens=100) | |
| full_response = processor.decode(output[0]) | |
| try: | |
| # Find all JSON-like structures in the response | |
| json_matches = list(re.finditer(r'\{.*?\}', full_response, re.DOTALL)) | |
| if json_matches: | |
| # Take the last match | |
| last_json_str = json_matches[-1].group(0) | |
| try: | |
| processed_json = json.loads(last_json_str) | |
| except json.JSONDecodeError as e: | |
| processed_json = {"error": f"Invalid JSON in model output: {e}", "full_response": full_response} | |
| else: | |
| processed_json = {"error": "No JSON found in model output", "full_response": full_response} | |
| except Exception as e: | |
| processed_json = {"error": str(e), "full_response": full_response} | |
| return full_response, processed_json | |
| default_prompt = """Analyze this image and determine if it contains a data logger. A data logger is typically a small, black electronic device used to monitor and record data over time, such as voltage, temperature, or current, via external sensors. | |
| Carefully examine the image and provide a detailed response. If a data logger is present in the image, respond with: | |
| {"present": true, "reason": "Detailed explanation of why you believe it's a data logger, including specific visual cues you've identified"} | |
| If no data logger is visible, respond with: | |
| {"present": false, "reason": "Detailed explanation of why you believe there's no data logger, describing what you see instead"} | |
| Ensure your response is in valid JSON format """ | |
| iface = gr.Interface( | |
| fn=analyze_image, | |
| inputs=[ | |
| gr.Image(type="pil", label="Upload Image"), | |
| gr.Textbox(label="Prompt", value=default_prompt, lines=10) | |
| ], | |
| outputs=[ | |
| gr.Textbox(label="Full Response", lines=10), | |
| gr.JSON(label="Processed JSON") | |
| ], | |
| title="Llama 3.2 Vision", | |
| cache_examples=False, | |
| description=" ", | |
| examples=[ | |
| ["bad.png", default_prompt] | |
| ] | |
| ) | |
| iface.launch() | |
| # import torch | |
| # from PIL import Image | |
| # from transformers import AutoProcessor, AutoModelForPreTraining | |
| # import gradio as gr | |
| # import json | |
| # import traceback | |
| # import os | |
| # import re | |
| # model_name = "meta-llama/Llama-3.2-11B-Vision-Instruct" | |
| # token = os.getenv("HUGGINGFACE_TOKEN").strip() | |
| # processor = AutoProcessor.from_pretrained(model_name, token=token) | |
| # model = AutoModelForPreTraining.from_pretrained( | |
| # model_name, | |
| # quantization_config={"load_in_4bit": True}, | |
| # token=token | |
| # ) | |
| # if torch.cuda.is_available(): | |
| # model = model.to('cuda') | |
| # def analyze_image(image, prompt): | |
| # messages = [ | |
| # {"role": "user", "content": [ | |
| # {"type": "image"}, | |
| # {"type": "text", "text": prompt} | |
| # ]} | |
| # ] | |
| # input_text = processor.apply_chat_template(messages, add_generation_prompt=True) | |
| # inputs = processor( | |
| # image, | |
| # input_text, | |
| # add_special_tokens=False, | |
| # return_tensors="pt" | |
| # ).to(model.device) | |
| # with torch.no_grad(): | |
| # output = model.generate(**inputs, max_new_tokens=100) | |
| # full_response = processor.decode(output[0]) | |
| # print("Full response:", full_response) # Debug print | |
| # # return full_response | |
| # try: | |
| # json_match = re.search(r'\{.*?\}', full_response, re.DOTALL) | |
| # if json_match: | |
| # json_str = json_match.group(0) | |
| # try: | |
| # return json.loads(json_str) | |
| # except json.JSONDecodeError as e: | |
| # print(f"JSON decode error: {e}") | |
| # return {"error": "Invalid JSON in model output", "full_response": full_response} | |
| # else: | |
| # return {"error": "No JSON found in model output", "full_response": full_response} | |
| # except Exception as e: | |
| # print(f"Error in analyze_image: {e}") | |
| # return {"Full Response": str(e), "full_response": full_response} | |
| # default_prompt = """Analyze this image and determine if it contains a data logger. | |
| # A data logger is typically a small, black electronic device used to monitor and record data | |
| # over time, such as voltage, temperature, or current, via external sensors. | |
| # If a data logger is present in the image, respond with: | |
| # {"present": true, "reason": "Brief explanation of why you believe it's a data logger"} | |
| # If no data logger is visible, respond with: | |
| # {"present": false, "reason": "Brief explanation of why you believe there's no data logger"} | |
| # Ensure your response is in valid JSON format.""" | |
| # iface = gr.Interface( | |
| # fn=analyze_image, | |
| # inputs=[ | |
| # gr.Image(type="pil", label="Upload Image"), | |
| # gr.Textbox(label="Prompt", value=default_prompt, lines=10) | |
| # ], | |
| # outputs=gr.JSON(label="Analysis Result"), | |
| # title="Data Logger Detection using Llama 3.2 Vision", | |
| # description="Upload an image and customize the prompt to check if it contains a data logger.", | |
| # examples=[ | |
| # ["bad.png", default_prompt] | |
| # ] | |
| # ) | |
| # iface.launch() |