Spaces:
Sleeping
Sleeping
Update utils/basic_functions.py
Browse files- utils/basic_functions.py +24 -17
utils/basic_functions.py
CHANGED
|
@@ -53,7 +53,27 @@ def load_model(model_name):
|
|
| 53 |
print(f"processor: {processor}")
|
| 54 |
return processor,model
|
| 55 |
|
| 56 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
# 2. Function to process image with Gemini Pro Vision
|
| 58 |
try:
|
| 59 |
image = get_image(url)
|
|
@@ -73,27 +93,14 @@ def gemini_identify_id(url,system_prompt):
|
|
| 73 |
|
| 74 |
draw = ImageDraw.Draw(image)
|
| 75 |
draw.rectangle(ast.literal_eval(response_text),outline='yellow',width=5)
|
| 76 |
-
image.save(f'{os.path.dirname(os.path.dirname(os.path.abspath(__file__)))}\\download\\{dt.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")}.jpg')
|
|
|
|
| 77 |
|
| 78 |
# Huggingface repo usage
|
| 79 |
-
def
|
| 80 |
try:
|
| 81 |
#image = get_image(url)
|
| 82 |
-
image = Image.open(requests.get(url,stream=True).raw)
|
| 83 |
-
print(f"image: {image}")
|
| 84 |
-
|
| 85 |
-
system_prompt = f"""
|
| 86 |
-
You are an AI document processing assistant. Analyze the provided image. Identify the ID number in the document.
|
| 87 |
-
This is usually identified in a location outside of the main content on the document, and usually on the bottom
|
| 88 |
-
right or left of the document. The rotation of the number may differ based on images. Furthermore the ID number
|
| 89 |
-
is usually a string of numbers, around 9 number characters in length. Could possibly have alphabetic characters
|
| 90 |
-
as well but that looks to be rare. The output should only be a string in the format [x0,y0,x1,y1], and the
|
| 91 |
-
values should fit into the image size which is {image.size}.
|
| 92 |
-
"""
|
| 93 |
-
print(f"system_prompt: {system_prompt}")
|
| 94 |
-
|
| 95 |
processor,model=load_model(model_name)
|
| 96 |
-
|
| 97 |
conversation = [
|
| 98 |
{
|
| 99 |
"role":"user",
|
|
|
|
| 53 |
print(f"processor: {processor}")
|
| 54 |
return processor,model
|
| 55 |
|
| 56 |
+
def request_manager(model_name,url):
|
| 57 |
+
image = get_image(url)
|
| 58 |
+
print(f"image: {image}")
|
| 59 |
+
|
| 60 |
+
system_prompt = f"""
|
| 61 |
+
You are an AI document processing assistant. Analyze the provided image. Identify the ID number in the document.
|
| 62 |
+
This is usually identified in a location outside of the main content on the document, and usually on the bottom
|
| 63 |
+
right or left of the document. The rotation of the number may differ based on images. Furthermore the ID number
|
| 64 |
+
is usually a string of numbers, around 9 number characters in length. Could possibly have alphabetic characters
|
| 65 |
+
as well but that looks to be rare. The output should only be a string in the format [x0,y0,x1,y1], and the
|
| 66 |
+
values should fit into the image size which is {image.size}.
|
| 67 |
+
"""
|
| 68 |
+
print(f"system_prompt: {system_prompt}")
|
| 69 |
+
|
| 70 |
+
if 'gemini' in model_name:
|
| 71 |
+
return_packet = gemini_identify_id(model_name,image,system_prompt)
|
| 72 |
+
elif 'llava' in model_name:
|
| 73 |
+
return_packet = huggingface_llava_15_7b_hf(model_name,image,system_prompt)
|
| 74 |
+
return return_packet
|
| 75 |
+
|
| 76 |
+
def gemini_identify_id(model_name,image,system_prompt):
|
| 77 |
# 2. Function to process image with Gemini Pro Vision
|
| 78 |
try:
|
| 79 |
image = get_image(url)
|
|
|
|
| 93 |
|
| 94 |
draw = ImageDraw.Draw(image)
|
| 95 |
draw.rectangle(ast.literal_eval(response_text),outline='yellow',width=5)
|
| 96 |
+
#image.save(f'{os.path.dirname(os.path.dirname(os.path.abspath(__file__)))}\\download\\{dt.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")}.jpg')
|
| 97 |
+
return [image,response_text]
|
| 98 |
|
| 99 |
# Huggingface repo usage
|
| 100 |
+
def huggingface_llava_15_7b_hf(model_name,image,system_prompt):
|
| 101 |
try:
|
| 102 |
#image = get_image(url)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
processor,model=load_model(model_name)
|
|
|
|
| 104 |
conversation = [
|
| 105 |
{
|
| 106 |
"role":"user",
|