Spaces:

bcvilnrotter
/

id-code-identifier

Sleeping

App Files Files Community

bcvilnrotter commited on Feb 27, 2025

Commit

b6f5cff

verified ·

1 Parent(s): 3c0f5c4

Update utils/basic_functions.py

Browse files

Files changed (1) hide show

utils/basic_functions.py +12 -11

utils/basic_functions.py CHANGED Viewed

@@ -48,7 +48,9 @@ def load_model(model_name):
         ).to(device)
     else:
         model = AutoModelForVision2Seq.from_pretrained(model_name).to(device)
     processor = AutoProcessor.from_pretrained(model_name,use_fast=True)
     return processor,model
 def gemini_identify_id(url,system_prompt):
@@ -78,7 +80,7 @@ def huggingface_detect_id_box(model_name,url):
     try:
         #image = get_image(url)
         image = Image.open(requests.get(url,stream=True).raw)
-        print(image)
         system_prompt = f"""
         You are an AI document processing assistant. Analyze the provided image. Identify the ID number in the document.
@@ -88,10 +90,9 @@ def huggingface_detect_id_box(model_name,url):
         as well but that looks to be rare. The output should only be a string in the format [x0,y0,x1,y1], and the
         values should fit into the image size which is {image.size}.
         """
-        print(system_prompt)
         processor,model=load_model(model_name)
-        print(processor,model)
         conversation = [
             {
@@ -102,13 +103,13 @@ def huggingface_detect_id_box(model_name,url):
                 ],
             },
         ]
-        print(conversation)
         prompt = processor.apply_chat_template(conversation,add_generation_prompt=True)
-        print(prompt)
         inputs = processor(images=image,text=prompt,return_tensors="pt").to(model.device)
-        print(inputs)
         """
         with torch.no_grad():
@@ -124,20 +125,20 @@ def huggingface_detect_id_box(model_name,url):
         """
         output = model.generate(**inputs,max_new_tokens=200,do_sample=False)
-        print(output)
         bbox = processor.decode(output[0][2:],skip_special_tokens=True)
-        print(bbox)
         draw = ImageDraw.Draw(image)
-        print(draw)
         draw.rectangle(bbox,outline="red",width=5)
-        print(image)
         #image.save(f'{os.path.dirname(os.path.dirname(os.path.abspath(__file__)))}\\download\\{dt.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")}.jpg')
-        return image,bbox
     except Exception as e:
         print(f"Error loading model or processing image: {str(e)}")
         return None

         ).to(device)
     else:
         model = AutoModelForVision2Seq.from_pretrained(model_name).to(device)
+    print(f"model: {model}")
     processor = AutoProcessor.from_pretrained(model_name,use_fast=True)
+    print(f"processor: {processor}")
     return processor,model
 def gemini_identify_id(url,system_prompt):
     try:
         #image = get_image(url)
         image = Image.open(requests.get(url,stream=True).raw)
+        print(f"image: {image}")
         system_prompt = f"""
         You are an AI document processing assistant. Analyze the provided image. Identify the ID number in the document.
         as well but that looks to be rare. The output should only be a string in the format [x0,y0,x1,y1], and the
         values should fit into the image size which is {image.size}.
         """
+        print(f"system_prompt: {system_prompt}")
         processor,model=load_model(model_name)
         conversation = [
             {
                 ],
             },
         ]
+        print(f"conversation: {conversation}")
         prompt = processor.apply_chat_template(conversation,add_generation_prompt=True)
+        print(f"prompt: {prompt}")
         inputs = processor(images=image,text=prompt,return_tensors="pt").to(model.device)
+        print(f"inputs: {inputs}")
         """
         with torch.no_grad():
         """
         output = model.generate(**inputs,max_new_tokens=200,do_sample=False)
+        print(f"output: {output}")
         bbox = processor.decode(output[0][2:],skip_special_tokens=True)
+        print(f"bbox: {bbox}")
         draw = ImageDraw.Draw(image)
+        print(f"draw: {draw}")
         draw.rectangle(bbox,outline="red",width=5)
+        print(f"image: {image}")
         #image.save(f'{os.path.dirname(os.path.dirname(os.path.abspath(__file__)))}\\download\\{dt.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")}.jpg')
+        return [image,bbox]
     except Exception as e:
         print(f"Error loading model or processing image: {str(e)}")
         return None