import json from dotenv import load_dotenv from openai import BadRequestError, OpenAI from .constants import ( EXTRACT_INFO_HUMAN_MESSAGE, EXTRACT_INFO_SYSTEM_MESSAGE, FOLLOW_SCHEMA_HUMAN_MESSAGE, FOLLOW_SCHEMA_SYSTEM_MESSAGE, ) from .exceptions import RefusalError from .utils import get_data_format, get_image_data load_dotenv(override=True) client = OpenAI() def extract_info(img_paths, schema, known_data=None): print("Extracting info via OpenAI...") text_content = [ { "type": "text", "text": EXTRACT_INFO_HUMAN_MESSAGE, }, ] if known_data is not None: text_content.append( { "type": "text", "text": f'\nAlso exploit the known data: \n\n"{known_data}"', } ) image_content = [ { "type": "image_url", "image_url": { "url": f"data:image/{get_image_data(img_path)};base64,{get_image_data(img_path)}", }, } for img_path in img_paths ] response = client.beta.chat.completions.parse( model="gpt-4o-2024-08-06", messages=[ { "role": "system", "content": EXTRACT_INFO_SYSTEM_MESSAGE, }, { "role": "user", "content": text_content + image_content, }, ], max_tokens=1000, response_format=schema, logprobs=True, top_logprobs=2, temperature=0.0, # top_p=.0000000000000000000001 ) if response.choices[0].message.refusal: raise RefusalError("OpenAI refused to respond to the request") content = response.choices[0].message.content parsed_data = json.loads(content) model_data = schema.model_validate(parsed_data) return 200, model_data def follow_structure(json_info, schema, known_data=None): print("Following structure via OpenAI...") text_content = [ { "type": "text", "text": FOLLOW_SCHEMA_HUMAN_MESSAGE.format(json_info=json_info), }, ] if known_data is not None: text_content.append( { "type": "text", "text": f'\nAlso Exploit the known data: \n\n"{known_data}"', } ) response = client.beta.chat.completions.parse( model="gpt-4o-2024-08-06", messages=[ { "role": "system", "content": FOLLOW_SCHEMA_SYSTEM_MESSAGE, }, { "role": "user", "content": text_content, }, ], max_tokens=1000, response_format=schema, logprobs=True, top_logprobs=2, temperature=0.0, # top_p=.0000000000000000000001 ) if response.choices[0].message.refusal: raise RefusalError("OpenAI refused to respond to the request") content = response.choices[0].message.content parsed_data = json.loads(content) model_data = schema.model_validate(parsed_data) return 200, model_data