decodingdatascience's picture
Upload app.py
437a1c7 verified
Raw
History Blame
12.5 kB
import os
import traceback
from typing import Any
import gradio as gr
from openai import OpenAI
GENERATION_MODELS = [
"gpt-4.1-mini",
"gpt-4.1",
"gpt-4o-mini",
"gpt-5.5",
]
REASONING_MODELS = [
"gpt-5.5",
"o4-mini",
"o3-mini",
]
def get_client() -> OpenAI | None:
"""
Hugging Face Spaces exposes Secrets as environment variables.
Add your OpenAI key in Space Settings as OPENAI_API_KEY.
The lowercase fallback is included only to help during local testing.
"""
api_key = os.getenv("OPENAI_API_KEY") or os.getenv("openai_api_key")
if not api_key:
return None
return OpenAI(api_key=api_key)
def extract_output_text(response: Any) -> str:
"""Robustly extract text from an OpenAI Responses API response."""
output_text = getattr(response, "output_text", None)
if output_text:
return output_text.strip()
chunks: list[str] = []
for item in getattr(response, "output", []) or []:
content = getattr(item, "content", None)
if content is None and isinstance(item, dict):
content = item.get("content", [])
for part in content or []:
if isinstance(part, dict):
text = part.get("text") or part.get("output_text")
else:
text = getattr(part, "text", None) or getattr(part, "output_text", None)
if text:
chunks.append(str(text))
return "\n".join(chunks).strip() if chunks else str(response)
def is_gpt5_family(model: str) -> bool:
"""
GPT-5 family models may reject custom sampling controls such as temperature.
To avoid the common 400 error, this app does not send those controls to GPT-5.x models.
"""
return model.strip().lower().startswith("gpt-5")
def format_settings(title: str, settings: dict[str, Any]) -> str:
lines = [f"--- {title} ---"]
for key, value in settings.items():
lines.append(f"{key}: {value}")
lines.append("------------------------\n")
return "\n".join(lines)
def run_generation(
prompt: str,
model: str,
system_message: str,
temperature: float,
top_p: float,
max_output_tokens: int,
frequency_penalty: float,
presence_penalty: float,
show_settings: bool,
) -> str:
client = get_client()
if client is None:
return (
"Missing API key.\n\n"
"In Hugging Face Spaces, go to Settings → Secrets and add:\n"
"Name: OPENAI_API_KEY\n"
"Value: your OpenAI API key"
)
if not prompt or not prompt.strip():
return "Please enter a prompt."
params: dict[str, Any] = {
"model": model,
"instructions": system_message or "You are a helpful assistant.",
"input": prompt,
"max_output_tokens": int(max_output_tokens),
}
settings_note = ""
if is_gpt5_family(model):
settings_note = (
"Note: GPT-5 family models can reject custom sampling controls. "
"Temperature, top_p, frequency_penalty, and presence_penalty were not sent.\n\n"
)
else:
params.update(
{
"temperature": float(temperature),
"top_p": float(top_p),
"frequency_penalty": float(frequency_penalty),
"presence_penalty": float(presence_penalty),
}
)
try:
response = client.responses.create(**params)
text = extract_output_text(response)
if show_settings:
settings = {
"model": model,
"system_message": system_message,
"max_output_tokens": max_output_tokens,
}
if is_gpt5_family(model):
settings.update(
{
"sampling_controls": "not sent for GPT-5 family model",
}
)
else:
settings.update(
{
"temperature": temperature,
"top_p": top_p,
"frequency_penalty": frequency_penalty,
"presence_penalty": presence_penalty,
}
)
return settings_note + format_settings("Generation Settings", settings) + text
return settings_note + text
except Exception as exc:
return (
"OpenAI API error:\n"
f"{exc}\n\n"
"Tip: If you selected a GPT-5 family model, try keeping generation controls at default "
"or use the Reasoning Controls tab.\n\n"
f"Technical details:\n{traceback.format_exc()}"
)
def run_reasoning(
prompt: str,
model: str,
reasoning_effort: str,
max_output_tokens: int,
show_settings: bool,
) -> str:
client = get_client()
if client is None:
return (
"Missing API key.\n\n"
"In Hugging Face Spaces, go to Settings → Secrets and add:\n"
"Name: OPENAI_API_KEY\n"
"Value: your OpenAI API key"
)
if not prompt or not prompt.strip():
return "Please enter a prompt."
params: dict[str, Any] = {
"model": model,
"input": prompt,
"reasoning": {"effort": reasoning_effort},
"max_output_tokens": int(max_output_tokens),
}
try:
response = client.responses.create(**params)
text = extract_output_text(response)
if show_settings:
settings = {
"model": model,
"reasoning_effort": reasoning_effort,
"max_output_tokens": max_output_tokens,
"api": "OpenAI Responses API",
}
return format_settings("Reasoning Settings", settings) + text
return text
except Exception as exc:
return (
"OpenAI API error:\n"
f"{exc}\n\n"
"Tip: Make sure your account has access to the selected model, or try another model "
"from the dropdown.\n\n"
f"Technical details:\n{traceback.format_exc()}"
)
custom_css = """
.gradio-container {
max-width: 1180px !important;
margin: auto !important;
}
#main-title {
text-align: center;
}
.output-box textarea {
font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", monospace;
}
"""
with gr.Blocks(
title="OpenAI LLM Controls",
theme=gr.themes.Soft(),
css=custom_css,
) as demo:
gr.Markdown(
"""
# OpenAI LLM Controls
Experiment with generation settings and reasoning effort using the OpenAI Responses API.
Add your key in Hugging Face Spaces as the secret `OPENAI_API_KEY`.
""",
elem_id="main-title",
)
with gr.Tab("Generation Controls"):
gr.Markdown(
"""
Use this tab to test practical writing and completion tasks.
For GPT-5 family models, the app avoids sending custom sampling controls to prevent unsupported-parameter errors.
"""
)
with gr.Row():
with gr.Column(scale=1):
gen_prompt = gr.Textbox(
lines=7,
label="Prompt",
value="Write a short LinkedIn post explaining why business leaders should learn AI. Maximum 120 words.",
)
gen_model = gr.Dropdown(
GENERATION_MODELS,
label="Model",
value="gpt-4.1-mini",
)
system_message = gr.Textbox(
lines=3,
label="System Message",
value="You are a helpful AI instructor. Keep answers clear and practical.",
)
with gr.Accordion("Advanced Generation Settings", open=True):
temperature = gr.Slider(
minimum=0.0,
maximum=2.0,
step=0.01,
value=0.7,
label="Temperature",
)
top_p = gr.Slider(
minimum=0.0,
maximum=1.0,
step=0.01,
value=1.0,
label="Top P",
)
max_output_tokens_gen = gr.Slider(
minimum=50,
maximum=4000,
step=10,
value=300,
label="Max Output Tokens",
)
frequency_penalty = gr.Slider(
minimum=-2.0,
maximum=2.0,
step=0.01,
value=0.0,
label="Frequency Penalty",
)
presence_penalty = gr.Slider(
minimum=-2.0,
maximum=2.0,
step=0.01,
value=0.0,
label="Presence Penalty",
)
show_settings_gen = gr.Checkbox(value=True, label="Show Settings")
gen_button = gr.Button("Generate", variant="primary")
with gr.Column(scale=1):
gen_output = gr.Textbox(
lines=22,
label="Output",
elem_classes=["output-box"],
show_copy_button=True,
)
gen_button.click(
fn=run_generation,
inputs=[
gen_prompt,
gen_model,
system_message,
temperature,
top_p,
max_output_tokens_gen,
frequency_penalty,
presence_penalty,
show_settings_gen,
],
outputs=gen_output,
)
with gr.Tab("Reasoning Controls"):
gr.Markdown(
"""
Use this tab for analysis, recommendations, technical trade-offs, planning, and decision-making tasks.
"""
)
with gr.Row():
with gr.Column(scale=1):
reason_prompt = gr.Textbox(
lines=9,
label="Prompt",
value=(
"A telecom company wants to build an AI customer support assistant. "
"They have 50,000 past support tickets, a FAQ website, billing policies, "
"and a small developer team. Should they start with: "
"1. Simple prompt-based chatbot 2. RAG chatbot 3. Fine-tuning "
"4. Agent with tools. Give a practical recommendation with trade-offs."
),
)
reason_model = gr.Dropdown(
REASONING_MODELS,
label="Model",
value="gpt-5.5",
)
reasoning_effort = gr.Radio(
["low", "medium", "high"],
label="Reasoning Effort",
value="medium",
)
max_output_tokens_reason = gr.Slider(
minimum=100,
maximum=8000,
step=50,
value=900,
label="Max Output Tokens",
)
show_settings_reason = gr.Checkbox(value=True, label="Show Settings")
reason_button = gr.Button("Reason", variant="primary")
with gr.Column(scale=1):
reason_output = gr.Textbox(
lines=22,
label="Output",
elem_classes=["output-box"],
show_copy_button=True,
)
reason_button.click(
fn=run_reasoning,
inputs=[
reason_prompt,
reason_model,
reasoning_effort,
max_output_tokens_reason,
show_settings_reason,
],
outputs=reason_output,
)
if __name__ == "__main__":
demo.queue()
demo.launch(
server_name="0.0.0.0",
server_port=int(os.getenv("PORT", "7860")),
)