Spaces:

gradymcpeak
/

irc_daa

Sleeping

Grady McPeak

Reduce verbosity and reasoning_effort for latency-related reasons.

a303627 10 months ago

3.48 kB

	import os
	import io
	import base64
	import gradio as gr
	from pathlib import Path
	from typing import List, Optional
	from PIL import ImageOps
	from openai import OpenAI
	from PIL.Image import Image as PILImage

	client = OpenAI(api_key=os.environ.get("OPENAI_TOKEN"))

	PROMPT_PATH = "./assets/system_prompt.txt"
	DOC1_PATH = "./assets/Goodwill-Donation-Value-Guide.txt"
	DOC2_PATH = "./assets/Salvation-Army-Donation-Value-Guide.txt"
	system_prompt = ""

	try:
	with open(PROMPT_PATH, 'r') as file:
	system_prompt = file.read()
	except FileNotFoundError:
	print(f"Error: The file '{PROMPT_PATH}' was not found.")

	def _open_existing_files(paths: List[str]) -> List[io.BufferedReader]:
	"""
	Open any files that exist at the given paths in 'rb' mode.
	Returns open file handles; caller must close them.
	Silently skips any path that doesn't exist/read.
	"""
	streams = ""
	for p in paths:
	try:
	if Path(p).is_file():
	streams = streams + p.read() + "\n"
	except Exception:
	pass
	return streams

	def _to_mm_content(text: Optional[str], image: Optional[PILImage \| str]):
	"""
	Build multimodal content list for the Responses API.
	Accepts text and either a PIL Image or a data-URL string.
	"""
	content = []
	if text:
	content.append({"type": "input_text", "text": text})

	if isinstance(image, str) and image.startswith("data:image"):
	content.append({"type": "input_image", "image_url": image})
	elif isinstance(image, PILImage):
	try:
	buf = io.BytesIO()
	image.save(buf, format="PNG")
	b64 = base64.b64encode(buf.getvalue()).decode("utf-8")
	content.append(
	{"type": "input_image", "image_url": f"data:image/png;base64,{b64}"}
	)
	except Exception:
	pass

	return content

	def process(a, b):
	try:
	content = _to_mm_content(a, b)

	tools = [{"type": "web_search"}]

	print("Generating output...")
	response = client.responses.create(
	model="gpt-5",
	reasoning={"effort": "low"},
	text={"verbosity": "low"},
	tools=tools,
	input=[{"role": "user", "content": content}] if content else a,
	)
	print("Done!")

	return getattr(response, "output_text", None) or str(response)

	except Exception as e:
	return f"Error calling OpenAI: {e}"

	def respond(image):
	if image is None:
	return "Please upload or take a photo first."

	image = ImageOps.exif_transpose(image)

	prompt = system_prompt + _open_existing_files([DOC1_PATH, DOC2_PATH])

	return process(prompt, image)


	CSS = """
	#narrow {
	max-width: 960px;
	margin-left: auto;
	margin-right: auto;
	}
	"""

	with gr.Blocks(title="GW+IRC DAA", css=CSS) as demo:
	with gr.Column(elem_id="narrow"):
	with gr.Column():
	gr.Markdown("# GW+IRC Donation Audit Assistant\n Use your camera to take a picture or upload one.")
	img = gr.Image(
	label="Take or upload a photo",
	sources=["webcam", "upload"],
	type="pil",
	height=460,
	width=1080
	)
	gr.Markdown("### Output:")
	out = gr.Markdown(min_height=100)

	img.change(respond, inputs=img, outputs=out)


	def main():
	demo.launch()


	if __name__ == "__main__":
	demo.launch()