import json import gradio as gr import pandas as pd import dspy # ----------------------------- # DSPy Signature # ----------------------------- class GenerateQA(dspy.Signature): """Generate a simple synthetic question-answer example.""" topic = dspy.InputField(desc="topic for the synthetic example") difficulty = dspy.InputField(desc="easy, medium, or hard") question = dspy.OutputField(desc="a clear question about the topic") answer = dspy.OutputField(desc="a short correct answer") # ----------------------------- # Core generator # ----------------------------- def generate_synthetic_data( openai_api_key: str, topic: str, difficulty: str, num_examples: int ): if not openai_api_key or not openai_api_key.strip(): return ( pd.DataFrame([{"error": "Please enter your OpenAI API key."}]), json.dumps({"error": "Missing OpenAI API key."}, indent=2) ) if not topic or not topic.strip(): return ( pd.DataFrame([{"error": "Please enter a topic."}]), json.dumps({"error": "Missing topic."}, indent=2) ) try: # Configure DSPy with an OpenAI-compatible LM lm = dspy.LM( model="openai/gpt-4o-mini", api_key=openai_api_key.strip() ) dspy.configure(lm=lm) generator = dspy.Predict(GenerateQA) rows = [] for i in range(num_examples): pred = generator( topic=topic.strip(), difficulty=difficulty, config={"temperature": 1.0, "rollout_id": i + 1} ) rows.append({ "topic": topic.strip(), "difficulty": difficulty, "question": pred.question, "answer": pred.answer }) df = pd.DataFrame(rows) return df, json.dumps(rows, indent=2) except Exception as e: error_payload = {"error": str(e)} return pd.DataFrame([error_payload]), json.dumps(error_payload, indent=2) # ----------------------------- # Example loader # ----------------------------- def load_example(example_topic): return example_topic # ----------------------------- # Gradio UI # ----------------------------- EXAMPLE_TOPICS = [ "machine learning", "prompt engineering", "financial literacy", "cybersecurity basics", "project management" ] with gr.Blocks(title="DSPy Synthetic Data Creator") as demo: gr.Markdown( """ # DSPy Synthetic Data Creator Generate simple synthetic Q&A examples using DSPy + OpenAI. """ ) with gr.Row(): with gr.Column(scale=1): api_key = gr.Textbox( label="OpenAI API Key", placeholder="Paste your OpenAI API key here", type="password" ) topic = gr.Textbox( label="Topic", placeholder="Example: machine learning" ) difficulty = gr.Dropdown( choices=["easy", "medium", "hard"], value="easy", label="Difficulty" ) num_examples = gr.Slider( minimum=1, maximum=20, value=5, step=1, label="Number of Examples" ) generate_btn = gr.Button("Generate Synthetic Data", variant="primary") with gr.Column(scale=1): gr.Markdown("### Example starting inputs") for item in EXAMPLE_TOPICS: example_btn = gr.Button(item) example_btn.click( fn=load_example, inputs=gr.State(item), outputs=topic ) gr.Markdown("### Generated Table") output_table = gr.Dataframe( headers=["topic", "difficulty", "question", "answer"], datatype=["str", "str", "str", "str"], interactive=False ) gr.Markdown("### JSON Output") output_json = gr.Code(label="JSON", language="json") generate_btn.click( fn=generate_synthetic_data, inputs=[api_key, topic, difficulty, num_examples], outputs=[output_table, output_json] ) demo.launch()