DSPYSimpleData / app.py
eaglelandsonce's picture
Create app.py
80c4f68 verified
import json
import gradio as gr
import pandas as pd
import dspy
# -----------------------------
# DSPy Signature
# -----------------------------
class GenerateQA(dspy.Signature):
"""Generate a simple synthetic question-answer example."""
topic = dspy.InputField(desc="topic for the synthetic example")
difficulty = dspy.InputField(desc="easy, medium, or hard")
question = dspy.OutputField(desc="a clear question about the topic")
answer = dspy.OutputField(desc="a short correct answer")
# -----------------------------
# Core generator
# -----------------------------
def generate_synthetic_data(
openai_api_key: str,
topic: str,
difficulty: str,
num_examples: int
):
if not openai_api_key or not openai_api_key.strip():
return (
pd.DataFrame([{"error": "Please enter your OpenAI API key."}]),
json.dumps({"error": "Missing OpenAI API key."}, indent=2)
)
if not topic or not topic.strip():
return (
pd.DataFrame([{"error": "Please enter a topic."}]),
json.dumps({"error": "Missing topic."}, indent=2)
)
try:
# Configure DSPy with an OpenAI-compatible LM
lm = dspy.LM(
model="openai/gpt-4o-mini",
api_key=openai_api_key.strip()
)
dspy.configure(lm=lm)
generator = dspy.Predict(GenerateQA)
rows = []
for i in range(num_examples):
pred = generator(
topic=topic.strip(),
difficulty=difficulty,
config={"temperature": 1.0, "rollout_id": i + 1}
)
rows.append({
"topic": topic.strip(),
"difficulty": difficulty,
"question": pred.question,
"answer": pred.answer
})
df = pd.DataFrame(rows)
return df, json.dumps(rows, indent=2)
except Exception as e:
error_payload = {"error": str(e)}
return pd.DataFrame([error_payload]), json.dumps(error_payload, indent=2)
# -----------------------------
# Example loader
# -----------------------------
def load_example(example_topic):
return example_topic
# -----------------------------
# Gradio UI
# -----------------------------
EXAMPLE_TOPICS = [
"machine learning",
"prompt engineering",
"financial literacy",
"cybersecurity basics",
"project management"
]
with gr.Blocks(title="DSPy Synthetic Data Creator") as demo:
gr.Markdown(
"""
# DSPy Synthetic Data Creator
Generate simple synthetic Q&A examples using DSPy + OpenAI.
"""
)
with gr.Row():
with gr.Column(scale=1):
api_key = gr.Textbox(
label="OpenAI API Key",
placeholder="Paste your OpenAI API key here",
type="password"
)
topic = gr.Textbox(
label="Topic",
placeholder="Example: machine learning"
)
difficulty = gr.Dropdown(
choices=["easy", "medium", "hard"],
value="easy",
label="Difficulty"
)
num_examples = gr.Slider(
minimum=1,
maximum=20,
value=5,
step=1,
label="Number of Examples"
)
generate_btn = gr.Button("Generate Synthetic Data", variant="primary")
with gr.Column(scale=1):
gr.Markdown("### Example starting inputs")
for item in EXAMPLE_TOPICS:
example_btn = gr.Button(item)
example_btn.click(
fn=load_example,
inputs=gr.State(item),
outputs=topic
)
gr.Markdown("### Generated Table")
output_table = gr.Dataframe(
headers=["topic", "difficulty", "question", "answer"],
datatype=["str", "str", "str", "str"],
interactive=False
)
gr.Markdown("### JSON Output")
output_json = gr.Code(label="JSON", language="json")
generate_btn.click(
fn=generate_synthetic_data,
inputs=[api_key, topic, difficulty, num_examples],
outputs=[output_table, output_json]
)
demo.launch()