import os import io import base64 import gradio as gr from pathlib import Path from typing import List, Optional from PIL import ImageOps from openai import OpenAI from PIL.Image import Image as PILImage client = OpenAI(api_key=os.environ.get("OPENAI_TOKEN")) PROMPT_PATH = "./assets/system_prompt.txt" DOC1_PATH = "./assets/Goodwill-Donation-Value-Guide.txt" DOC2_PATH = "./assets/Salvation-Army-Donation-Value-Guide.txt" system_prompt = "" try: with open(PROMPT_PATH, 'r') as file: system_prompt = file.read() except FileNotFoundError: print(f"Error: The file '{PROMPT_PATH}' was not found.") def _open_existing_files(paths: List[str]) -> List[io.BufferedReader]: """ Open any files that exist at the given paths in 'rb' mode. Returns open file handles; caller must close them. Silently skips any path that doesn't exist/read. """ streams = "" for p in paths: try: if Path(p).is_file(): streams = streams + p.read() + "\n" except Exception: pass return streams def _to_mm_content(text: Optional[str], image: Optional[PILImage | str]): """ Build multimodal content list for the Responses API. Accepts text and either a PIL Image or a data-URL string. """ content = [] if text: content.append({"type": "input_text", "text": text}) if isinstance(image, str) and image.startswith("data:image"): content.append({"type": "input_image", "image_url": image}) elif isinstance(image, PILImage): try: buf = io.BytesIO() image.save(buf, format="PNG") b64 = base64.b64encode(buf.getvalue()).decode("utf-8") content.append( {"type": "input_image", "image_url": f"data:image/png;base64,{b64}"} ) except Exception: pass return content def process(a, b): try: content = _to_mm_content(a, b) tools = [{"type": "web_search"}] print("Generating output...") response = client.responses.create( model="gpt-5", reasoning={"effort": "low"}, text={"verbosity": "low"}, tools=tools, input=[{"role": "user", "content": content}] if content else a, ) print("Done!") return getattr(response, "output_text", None) or str(response) except Exception as e: return f"Error calling OpenAI: {e}" def respond(image): if image is None: return "Please upload or take a photo first." image = ImageOps.exif_transpose(image) prompt = system_prompt + _open_existing_files([DOC1_PATH, DOC2_PATH]) return process(prompt, image) CSS = """ #narrow { max-width: 960px; margin-left: auto; margin-right: auto; } """ with gr.Blocks(title="GW+IRC DAA", css=CSS) as demo: with gr.Column(elem_id="narrow"): with gr.Column(): gr.Markdown("# GW+IRC Donation Audit Assistant\n Use your camera to take a picture or upload one.") img = gr.Image( label="Take or upload a photo", sources=["webcam", "upload"], type="pil", height=460, width=1080 ) gr.Markdown("### Output:") out = gr.Markdown(min_height=100) img.change(respond, inputs=img, outputs=out) def main(): demo.launch() if __name__ == "__main__": demo.launch()