bcvilnrotter commited on
Commit
525217d
·
verified ·
1 Parent(s): cba8c8b

Update utils/basic_functions.py

Browse files
Files changed (1) hide show
  1. utils/basic_functions.py +24 -17
utils/basic_functions.py CHANGED
@@ -53,7 +53,27 @@ def load_model(model_name):
53
  print(f"processor: {processor}")
54
  return processor,model
55
 
56
- def gemini_identify_id(url,system_prompt):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  # 2. Function to process image with Gemini Pro Vision
58
  try:
59
  image = get_image(url)
@@ -73,27 +93,14 @@ def gemini_identify_id(url,system_prompt):
73
 
74
  draw = ImageDraw.Draw(image)
75
  draw.rectangle(ast.literal_eval(response_text),outline='yellow',width=5)
76
- image.save(f'{os.path.dirname(os.path.dirname(os.path.abspath(__file__)))}\\download\\{dt.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")}.jpg')
 
77
 
78
  # Huggingface repo usage
79
- def huggingface_detect_id_box(model_name,url):
80
  try:
81
  #image = get_image(url)
82
- image = Image.open(requests.get(url,stream=True).raw)
83
- print(f"image: {image}")
84
-
85
- system_prompt = f"""
86
- You are an AI document processing assistant. Analyze the provided image. Identify the ID number in the document.
87
- This is usually identified in a location outside of the main content on the document, and usually on the bottom
88
- right or left of the document. The rotation of the number may differ based on images. Furthermore the ID number
89
- is usually a string of numbers, around 9 number characters in length. Could possibly have alphabetic characters
90
- as well but that looks to be rare. The output should only be a string in the format [x0,y0,x1,y1], and the
91
- values should fit into the image size which is {image.size}.
92
- """
93
- print(f"system_prompt: {system_prompt}")
94
-
95
  processor,model=load_model(model_name)
96
-
97
  conversation = [
98
  {
99
  "role":"user",
 
53
  print(f"processor: {processor}")
54
  return processor,model
55
 
56
+ def request_manager(model_name,url):
57
+ image = get_image(url)
58
+ print(f"image: {image}")
59
+
60
+ system_prompt = f"""
61
+ You are an AI document processing assistant. Analyze the provided image. Identify the ID number in the document.
62
+ This is usually identified in a location outside of the main content on the document, and usually on the bottom
63
+ right or left of the document. The rotation of the number may differ based on images. Furthermore the ID number
64
+ is usually a string of numbers, around 9 number characters in length. Could possibly have alphabetic characters
65
+ as well but that looks to be rare. The output should only be a string in the format [x0,y0,x1,y1], and the
66
+ values should fit into the image size which is {image.size}.
67
+ """
68
+ print(f"system_prompt: {system_prompt}")
69
+
70
+ if 'gemini' in model_name:
71
+ return_packet = gemini_identify_id(model_name,image,system_prompt)
72
+ elif 'llava' in model_name:
73
+ return_packet = huggingface_llava_15_7b_hf(model_name,image,system_prompt)
74
+ return return_packet
75
+
76
+ def gemini_identify_id(model_name,image,system_prompt):
77
  # 2. Function to process image with Gemini Pro Vision
78
  try:
79
  image = get_image(url)
 
93
 
94
  draw = ImageDraw.Draw(image)
95
  draw.rectangle(ast.literal_eval(response_text),outline='yellow',width=5)
96
+ #image.save(f'{os.path.dirname(os.path.dirname(os.path.abspath(__file__)))}\\download\\{dt.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")}.jpg')
97
+ return [image,response_text]
98
 
99
  # Huggingface repo usage
100
+ def huggingface_llava_15_7b_hf(model_name,image,system_prompt):
101
  try:
102
  #image = get_image(url)
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  processor,model=load_model(model_name)
 
104
  conversation = [
105
  {
106
  "role":"user",