arjunanand13 commited on
Commit
d6c8e75
·
verified ·
1 Parent(s): e12f44e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -5
app.py CHANGED
@@ -8,6 +8,7 @@ import os
8
 
9
  model_name = "meta-llama/Llama-3.2-11B-Vision-Instruct"
10
  token = os.getenv("HUGGINGFACE_TOKEN").strip()
 
11
  processor = AutoProcessor.from_pretrained(model_name, token=token)
12
  model = AutoModelForCausalLM.from_pretrained(
13
  model_name,
@@ -27,15 +28,25 @@ def analyze_image(image, prompt):
27
  ]}
28
  ]
29
  input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
 
30
  inputs = processor(
31
  image,
32
  input_text,
33
  add_special_tokens=False,
34
  return_tensors="pt"
35
  ).to(model.device)
 
 
 
 
 
 
 
36
  with torch.no_grad():
37
- output = model.generate(**inputs, max_new_tokens=100)
 
38
  result = processor.decode(output[0], skip_special_tokens=True)
 
39
  try:
40
  return json.loads(result)
41
  except json.JSONDecodeError:
@@ -43,9 +54,7 @@ def analyze_image(image, prompt):
43
  except Exception as e:
44
  return {"error": str(e), "traceback": traceback.format_exc()}
45
 
46
- default_prompt = """Analyze this image and determine if it contains a data logger.
47
- A data logger is typically a small, black electronic device used to monitor and record data
48
- over time, such as voltage, temperature, or current, via external sensors.
49
 
50
  If a data logger is present in the image, respond with:
51
  {"present": true, "reason": "Brief explanation of why you believe it's a data logger"}
@@ -69,4 +78,77 @@ iface = gr.Interface(
69
  ]
70
  )
71
 
72
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  model_name = "meta-llama/Llama-3.2-11B-Vision-Instruct"
10
  token = os.getenv("HUGGINGFACE_TOKEN").strip()
11
+
12
  processor = AutoProcessor.from_pretrained(model_name, token=token)
13
  model = AutoModelForCausalLM.from_pretrained(
14
  model_name,
 
28
  ]}
29
  ]
30
  input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
31
+
32
  inputs = processor(
33
  image,
34
  input_text,
35
  add_special_tokens=False,
36
  return_tensors="pt"
37
  ).to(model.device)
38
+
39
+ # Separate inputs for generate method
40
+ generate_inputs = {
41
+ k: v for k, v in inputs.items()
42
+ if k not in ['pixel_values', 'aspect_ratio_ids', 'aspect_ratio_mask']
43
+ }
44
+
45
  with torch.no_grad():
46
+ output = model.generate(**generate_inputs, max_new_tokens=100)
47
+
48
  result = processor.decode(output[0], skip_special_tokens=True)
49
+
50
  try:
51
  return json.loads(result)
52
  except json.JSONDecodeError:
 
54
  except Exception as e:
55
  return {"error": str(e), "traceback": traceback.format_exc()}
56
 
57
+ default_prompt = """Analyze this image and determine if it contains a data logger. A data logger is typically a small, black electronic device used to monitor and record data over time, such as voltage, temperature, or current, via external sensors.
 
 
58
 
59
  If a data logger is present in the image, respond with:
60
  {"present": true, "reason": "Brief explanation of why you believe it's a data logger"}
 
78
  ]
79
  )
80
 
81
+ iface.launch()
82
+
83
+ # import torch
84
+ # from PIL import Image
85
+ # from transformers import AutoProcessor, AutoModelForCausalLM
86
+ # import gradio as gr
87
+ # import json
88
+ # import traceback
89
+ # import os
90
+
91
+ # model_name = "meta-llama/Llama-3.2-11B-Vision-Instruct"
92
+ # token = os.getenv("HUGGINGFACE_TOKEN").strip()
93
+ # processor = AutoProcessor.from_pretrained(model_name, token=token)
94
+ # model = AutoModelForCausalLM.from_pretrained(
95
+ # model_name,
96
+ # quantization_config={"load_in_4bit": True},
97
+ # token=token
98
+ # )
99
+
100
+ # if torch.cuda.is_available():
101
+ # model = model.to('cuda')
102
+
103
+ # def analyze_image(image, prompt):
104
+ # try:
105
+ # messages = [
106
+ # {"role": "user", "content": [
107
+ # {"type": "image"},
108
+ # {"type": "text", "text": prompt}
109
+ # ]}
110
+ # ]
111
+ # input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
112
+ # inputs = processor(
113
+ # image,
114
+ # input_text,
115
+ # add_special_tokens=False,
116
+ # return_tensors="pt"
117
+ # ).to(model.device)
118
+ # with torch.no_grad():
119
+ # output = model.generate(**inputs, max_new_tokens=100)
120
+ # result = processor.decode(output[0], skip_special_tokens=True)
121
+ # try:
122
+ # return json.loads(result)
123
+ # except json.JSONDecodeError:
124
+ # return {"error": "Failed to parse model output as JSON", "raw_output": result}
125
+ # except Exception as e:
126
+ # return {"error": str(e), "traceback": traceback.format_exc()}
127
+
128
+ # default_prompt = """Analyze this image and determine if it contains a data logger.
129
+ # A data logger is typically a small, black electronic device used to monitor and record data
130
+ # over time, such as voltage, temperature, or current, via external sensors.
131
+
132
+ # If a data logger is present in the image, respond with:
133
+ # {"present": true, "reason": "Brief explanation of why you believe it's a data logger"}
134
+
135
+ # If no data logger is visible, respond with:
136
+ # {"present": false, "reason": "Brief explanation of why you believe there's no data logger"}
137
+
138
+ # Ensure your response is in valid JSON format."""
139
+
140
+ # iface = gr.Interface(
141
+ # fn=analyze_image,
142
+ # inputs=[
143
+ # gr.Image(type="pil", label="Upload Image"),
144
+ # gr.Textbox(label="Prompt", value=default_prompt, lines=10)
145
+ # ],
146
+ # outputs=gr.JSON(label="Analysis Result"),
147
+ # title="Data Logger Detection using Llama 3.2 Vision",
148
+ # description="Upload an image and customize the prompt to check if it contains a data logger.",
149
+ # examples=[
150
+ # ["bad.png", default_prompt]
151
+ # ]
152
+ # )
153
+
154
+ # iface.launch()