LoRDxdd's picture
Add gpt4free API for Hugging Face
a4b70d9
from typing import BinaryIO, Union, Awaitable
import base64
import mimetypes
import asyncio
from markitdown._stream_info import StreamInfo
def llm_caption(
file_stream: BinaryIO, stream_info: StreamInfo, *, client, model, prompt=None
) -> Union[None, str, Awaitable[str]]:
if prompt is None or prompt.strip() == "":
prompt = "Write a detailed caption for this image."
# Get the content type
content_type = stream_info.mimetype
if not content_type:
content_type, _ = mimetypes.guess_type("_dummy" + (stream_info.extension or ""))
if not content_type:
content_type = "application/octet-stream"
# Convert to base64
cur_pos = file_stream.tell()
try:
base64_image = base64.b64encode(file_stream.read()).decode("utf-8")
except Exception as e:
return None
finally:
file_stream.seek(cur_pos)
# Prepare the data-uri
data_uri = f"data:{content_type};base64,{base64_image}"
# Prepare the OpenAI API request
messages = [
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{
"type": "image_url",
"image_url": {
"url": data_uri,
},
},
],
}
]
# Call the OpenAI API
response = client.chat.completions.create(model=model, messages=messages)
if asyncio.iscoroutine(response):
async def read_content(response):
response = await response
return response.choices[0].message.content
return read_content(response)
return response.choices[0].message.content