Spaces:

bcvilnrotter
/

id-code-identifier

Sleeping

App Files Files Community

bcvilnrotter commited on Feb 27, 2025

Commit

525217d

verified ·

1 Parent(s): cba8c8b

Update utils/basic_functions.py

Browse files

Files changed (1) hide show

utils/basic_functions.py +24 -17

utils/basic_functions.py CHANGED Viewed

@@ -53,7 +53,27 @@ def load_model(model_name):
     print(f"processor: {processor}")
     return processor,model
-def gemini_identify_id(url,system_prompt):
     # 2. Function to process image with Gemini Pro Vision
     try:
         image = get_image(url)
@@ -73,27 +93,14 @@ def gemini_identify_id(url,system_prompt):
     draw = ImageDraw.Draw(image)
     draw.rectangle(ast.literal_eval(response_text),outline='yellow',width=5)
-    image.save(f'{os.path.dirname(os.path.dirname(os.path.abspath(__file__)))}\\download\\{dt.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")}.jpg')
 # Huggingface repo usage
-def huggingface_detect_id_box(model_name,url):
     try:
         #image = get_image(url)
-        image = Image.open(requests.get(url,stream=True).raw)
-        print(f"image: {image}")
-        system_prompt = f"""
-        You are an AI document processing assistant. Analyze the provided image. Identify the ID number in the document.
-        This is usually identified in a location outside of the main content on the document, and usually on the bottom
-        right or left of the document. The rotation of the number may differ based on images. Furthermore the ID number
-        is usually a string of numbers, around 9 number characters in length. Could possibly have alphabetic characters
-        as well but that looks to be rare. The output should only be a string in the format [x0,y0,x1,y1], and the
-        values should fit into the image size which is {image.size}.
-        """
-        print(f"system_prompt: {system_prompt}")
         processor,model=load_model(model_name)
         conversation = [
             {
                 "role":"user",

     print(f"processor: {processor}")
     return processor,model
+def request_manager(model_name,url):
+    image = get_image(url)
+    print(f"image: {image}")
+    system_prompt = f"""
+    You are an AI document processing assistant. Analyze the provided image. Identify the ID number in the document.
+    This is usually identified in a location outside of the main content on the document, and usually on the bottom
+    right or left of the document. The rotation of the number may differ based on images. Furthermore the ID number
+    is usually a string of numbers, around 9 number characters in length. Could possibly have alphabetic characters
+    as well but that looks to be rare. The output should only be a string in the format [x0,y0,x1,y1], and the
+    values should fit into the image size which is {image.size}.
+    """
+    print(f"system_prompt: {system_prompt}")
+    if 'gemini' in model_name:
+        return_packet = gemini_identify_id(model_name,image,system_prompt)
+    elif 'llava' in model_name:
+        return_packet = huggingface_llava_15_7b_hf(model_name,image,system_prompt)
+    return return_packet
+def gemini_identify_id(model_name,image,system_prompt):
     # 2. Function to process image with Gemini Pro Vision
     try:
         image = get_image(url)
     draw = ImageDraw.Draw(image)
     draw.rectangle(ast.literal_eval(response_text),outline='yellow',width=5)
+    #image.save(f'{os.path.dirname(os.path.dirname(os.path.abspath(__file__)))}\\download\\{dt.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")}.jpg')
+    return [image,response_text]
 # Huggingface repo usage
+def huggingface_llava_15_7b_hf(model_name,image,system_prompt):
     try:
         #image = get_image(url)
         processor,model=load_model(model_name)
         conversation = [
             {
                 "role":"user",