Spaces:
Sleeping
Sleeping
File size: 3,150 Bytes
7a38b33 8c5ab45 44e85d0 8c5ab45 7a38b33 8c5ab45 7a38b33 44e85d0 7a38b33 8c5ab45 7a38b33 8c5ab45 7a38b33 8c5ab45 7a38b33 8c5ab45 7a38b33 8c5ab45 7a38b33 8c5ab45 7a38b33 8c5ab45 7a38b33 28f26e5 7a38b33 8c5ab45 7a38b33 8c5ab45 7a38b33 8c5ab45 7a38b33 8c5ab45 7a38b33 8c5ab45 7a38b33 8c5ab45 7a38b33 28f26e5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 |
import json
from dotenv import load_dotenv
from openai import BadRequestError, OpenAI
from .constants import (
EXTRACT_INFO_HUMAN_MESSAGE,
EXTRACT_INFO_SYSTEM_MESSAGE,
FOLLOW_SCHEMA_HUMAN_MESSAGE,
FOLLOW_SCHEMA_SYSTEM_MESSAGE,
)
from .exceptions import RefusalError
from .utils import get_data_format, get_image_data
load_dotenv(override=True)
client = OpenAI()
def extract_info(img_paths, schema, known_data=None):
print("Extracting info via OpenAI...")
text_content = [
{
"type": "text",
"text": EXTRACT_INFO_HUMAN_MESSAGE,
},
]
if known_data is not None:
text_content.append(
{
"type": "text",
"text": f'\nAlso exploit the known data: \n\n"{known_data}"',
}
)
image_content = [
{
"type": "image_url",
"image_url": {
"url": f"data:image/{get_image_data(img_path)};base64,{get_image_data(img_path)}",
},
}
for img_path in img_paths
]
response = client.beta.chat.completions.parse(
model="gpt-4o-2024-08-06",
messages=[
{
"role": "system",
"content": EXTRACT_INFO_SYSTEM_MESSAGE,
},
{
"role": "user",
"content": text_content + image_content,
},
],
max_tokens=1000,
response_format=schema,
logprobs=True,
top_logprobs=2,
temperature=0.0,
# top_p=.0000000000000000000001
)
if response.choices[0].message.refusal:
raise RefusalError("OpenAI refused to respond to the request")
content = response.choices[0].message.content
parsed_data = json.loads(content)
model_data = schema.model_validate(parsed_data)
return 200, model_data
def follow_structure(json_info, schema, known_data=None):
print("Following structure via OpenAI...")
text_content = [
{
"type": "text",
"text": FOLLOW_SCHEMA_HUMAN_MESSAGE.format(json_info=json_info),
},
]
if known_data is not None:
text_content.append(
{
"type": "text",
"text": f'\nAlso Exploit the known data: \n\n"{known_data}"',
}
)
response = client.beta.chat.completions.parse(
model="gpt-4o-2024-08-06",
messages=[
{
"role": "system",
"content": FOLLOW_SCHEMA_SYSTEM_MESSAGE,
},
{
"role": "user",
"content": text_content,
},
],
max_tokens=1000,
response_format=schema,
logprobs=True,
top_logprobs=2,
temperature=0.0,
# top_p=.0000000000000000000001
)
if response.choices[0].message.refusal:
raise RefusalError("OpenAI refused to respond to the request")
content = response.choices[0].message.content
parsed_data = json.loads(content)
model_data = schema.model_validate(parsed_data)
return 200, model_data
|