bcvilnrotter commited on
Commit
7cd0cdb
·
verified ·
1 Parent(s): e290c3b

Update utils/basic_functions.py

Browse files
Files changed (1) hide show
  1. utils/basic_functions.py +22 -3
utils/basic_functions.py CHANGED
@@ -76,8 +76,9 @@ def gemini_identify_id(url,system_prompt):
76
  # Huggingface repo usage
77
  def huggingface_detect_id_box(model_name,url):
78
  try:
79
- image = get_image(url)
80
-
 
81
  system_prompt = f"""
82
  You are an AI document processing assistant. Analyze the provided image. Identify the ID number in the document.
83
  This is usually identified in a location outside of the main content on the document, and usually on the bottom
@@ -88,7 +89,20 @@ def huggingface_detect_id_box(model_name,url):
88
  """
89
 
90
  processor,model=load_model(model_name)
91
- inputs = processor(images=[image],text=system_prompt,return_tensors="pt").to(model.device)
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  with torch.no_grad():
93
  output = model.generate(**inputs)
94
 
@@ -99,6 +113,11 @@ def huggingface_detect_id_box(model_name,url):
99
  except Exception as e:
100
  print(f"Error parsing bounding box response: {str(e)}")
101
  return None
 
 
 
 
 
102
 
103
  draw = ImageDraw.Draw(image)
104
  draw.rectangle(bbox,outline="red",width=5)
 
76
  # Huggingface repo usage
77
  def huggingface_detect_id_box(model_name,url):
78
  try:
79
+ #image = get_image(url)
80
+ image = Image.open(requests.get(url,stream=True).raw)
81
+
82
  system_prompt = f"""
83
  You are an AI document processing assistant. Analyze the provided image. Identify the ID number in the document.
84
  This is usually identified in a location outside of the main content on the document, and usually on the bottom
 
89
  """
90
 
91
  processor,model=load_model(model_name)
92
+
93
+ conversation = [
94
+ {
95
+ "role":"user",
96
+ "content":[
97
+ {"type":"text","text":system_prompt},
98
+ {"type":"image"},
99
+ ],
100
+ },
101
+ ]
102
+ prompt = processor.apply_chat_template(conversation,add_generation_prompt=True)
103
+ inputs = processor(images=image,text=prompt,return_tensors="pt").to(model.device)
104
+
105
+ """
106
  with torch.no_grad():
107
  output = model.generate(**inputs)
108
 
 
113
  except Exception as e:
114
  print(f"Error parsing bounding box response: {str(e)}")
115
  return None
116
+ """
117
+
118
+ output = model.generate(**inputs,max_new_tokens=200,do_sample=False)
119
+ print(processor.decode(output[0][2:],skip_special_tokens=True))
120
+
121
 
122
  draw = ImageDraw.Draw(image)
123
  draw.rectangle(bbox,outline="red",width=5)