Spaces:
Sleeping
Sleeping
| import json | |
| from dotenv import load_dotenv | |
| from openai import BadRequestError, OpenAI | |
| from .constants import ( | |
| EXTRACT_INFO_HUMAN_MESSAGE, | |
| EXTRACT_INFO_SYSTEM_MESSAGE, | |
| FOLLOW_SCHEMA_HUMAN_MESSAGE, | |
| FOLLOW_SCHEMA_SYSTEM_MESSAGE, | |
| ) | |
| from .exceptions import RefusalError | |
| from .utils import get_data_format, get_image_data | |
| load_dotenv(override=True) | |
| client = OpenAI() | |
| def extract_info(img_paths, schema, known_data=None): | |
| print("Extracting info via OpenAI...") | |
| text_content = [ | |
| { | |
| "type": "text", | |
| "text": EXTRACT_INFO_HUMAN_MESSAGE, | |
| }, | |
| ] | |
| if known_data is not None: | |
| text_content.append( | |
| { | |
| "type": "text", | |
| "text": f'\nAlso exploit the known data: \n\n"{known_data}"', | |
| } | |
| ) | |
| image_content = [ | |
| { | |
| "type": "image_url", | |
| "image_url": { | |
| "url": f"data:image/{get_image_data(img_path)};base64,{get_image_data(img_path)}", | |
| }, | |
| } | |
| for img_path in img_paths | |
| ] | |
| response = client.beta.chat.completions.parse( | |
| model="gpt-4o-2024-08-06", | |
| messages=[ | |
| { | |
| "role": "system", | |
| "content": EXTRACT_INFO_SYSTEM_MESSAGE, | |
| }, | |
| { | |
| "role": "user", | |
| "content": text_content + image_content, | |
| }, | |
| ], | |
| max_tokens=1000, | |
| response_format=schema, | |
| logprobs=True, | |
| top_logprobs=2, | |
| temperature=0.0, | |
| # top_p=.0000000000000000000001 | |
| ) | |
| if response.choices[0].message.refusal: | |
| raise RefusalError("OpenAI refused to respond to the request") | |
| content = response.choices[0].message.content | |
| parsed_data = json.loads(content) | |
| model_data = schema.model_validate(parsed_data) | |
| return 200, model_data | |
| def follow_structure(json_info, schema, known_data=None): | |
| print("Following structure via OpenAI...") | |
| text_content = [ | |
| { | |
| "type": "text", | |
| "text": FOLLOW_SCHEMA_HUMAN_MESSAGE.format(json_info=json_info), | |
| }, | |
| ] | |
| if known_data is not None: | |
| text_content.append( | |
| { | |
| "type": "text", | |
| "text": f'\nAlso Exploit the known data: \n\n"{known_data}"', | |
| } | |
| ) | |
| response = client.beta.chat.completions.parse( | |
| model="gpt-4o-2024-08-06", | |
| messages=[ | |
| { | |
| "role": "system", | |
| "content": FOLLOW_SCHEMA_SYSTEM_MESSAGE, | |
| }, | |
| { | |
| "role": "user", | |
| "content": text_content, | |
| }, | |
| ], | |
| max_tokens=1000, | |
| response_format=schema, | |
| logprobs=True, | |
| top_logprobs=2, | |
| temperature=0.0, | |
| # top_p=.0000000000000000000001 | |
| ) | |
| if response.choices[0].message.refusal: | |
| raise RefusalError("OpenAI refused to respond to the request") | |
| content = response.choices[0].message.content | |
| parsed_data = json.loads(content) | |
| model_data = schema.model_validate(parsed_data) | |
| return 200, model_data | |