irc_daa / app.py
Grady McPeak
Reduce verbosity and reasoning_effort for latency-related reasons.
a303627
Raw
History Blame Contribute Delete
3.48 kB
import os
import io
import base64
import gradio as gr
from pathlib import Path
from typing import List, Optional
from PIL import ImageOps
from openai import OpenAI
from PIL.Image import Image as PILImage
client = OpenAI(api_key=os.environ.get("OPENAI_TOKEN"))
PROMPT_PATH = "./assets/system_prompt.txt"
DOC1_PATH = "./assets/Goodwill-Donation-Value-Guide.txt"
DOC2_PATH = "./assets/Salvation-Army-Donation-Value-Guide.txt"
system_prompt = ""
try:
with open(PROMPT_PATH, 'r') as file:
system_prompt = file.read()
except FileNotFoundError:
print(f"Error: The file '{PROMPT_PATH}' was not found.")
def _open_existing_files(paths: List[str]) -> List[io.BufferedReader]:
"""
Open any files that exist at the given paths in 'rb' mode.
Returns open file handles; caller must close them.
Silently skips any path that doesn't exist/read.
"""
streams = ""
for p in paths:
try:
if Path(p).is_file():
streams = streams + p.read() + "\n"
except Exception:
pass
return streams
def _to_mm_content(text: Optional[str], image: Optional[PILImage | str]):
"""
Build multimodal content list for the Responses API.
Accepts text and either a PIL Image or a data-URL string.
"""
content = []
if text:
content.append({"type": "input_text", "text": text})
if isinstance(image, str) and image.startswith("data:image"):
content.append({"type": "input_image", "image_url": image})
elif isinstance(image, PILImage):
try:
buf = io.BytesIO()
image.save(buf, format="PNG")
b64 = base64.b64encode(buf.getvalue()).decode("utf-8")
content.append(
{"type": "input_image", "image_url": f"data:image/png;base64,{b64}"}
)
except Exception:
pass
return content
def process(a, b):
try:
content = _to_mm_content(a, b)
tools = [{"type": "web_search"}]
print("Generating output...")
response = client.responses.create(
model="gpt-5",
reasoning={"effort": "low"},
text={"verbosity": "low"},
tools=tools,
input=[{"role": "user", "content": content}] if content else a,
)
print("Done!")
return getattr(response, "output_text", None) or str(response)
except Exception as e:
return f"Error calling OpenAI: {e}"
def respond(image):
if image is None:
return "Please upload or take a photo first."
image = ImageOps.exif_transpose(image)
prompt = system_prompt + _open_existing_files([DOC1_PATH, DOC2_PATH])
return process(prompt, image)
CSS = """
#narrow {
max-width: 960px;
margin-left: auto;
margin-right: auto;
}
"""
with gr.Blocks(title="GW+IRC DAA", css=CSS) as demo:
with gr.Column(elem_id="narrow"):
with gr.Column():
gr.Markdown("# GW+IRC Donation Audit Assistant\n Use your camera to take a picture or upload one.")
img = gr.Image(
label="Take or upload a photo",
sources=["webcam", "upload"],
type="pil",
height=460,
width=1080
)
gr.Markdown("### Output:")
out = gr.Markdown(min_height=100)
img.change(respond, inputs=img, outputs=out)
def main():
demo.launch()
if __name__ == "__main__":
demo.launch()