Spaces:
Sleeping
Sleeping
| import json | |
| import gradio as gr | |
| import pandas as pd | |
| import dspy | |
| # ----------------------------- | |
| # DSPy Signature | |
| # ----------------------------- | |
| class GenerateQA(dspy.Signature): | |
| """Generate a simple synthetic question-answer example.""" | |
| topic = dspy.InputField(desc="topic for the synthetic example") | |
| difficulty = dspy.InputField(desc="easy, medium, or hard") | |
| question = dspy.OutputField(desc="a clear question about the topic") | |
| answer = dspy.OutputField(desc="a short correct answer") | |
| # ----------------------------- | |
| # Core generator | |
| # ----------------------------- | |
| def generate_synthetic_data( | |
| openai_api_key: str, | |
| topic: str, | |
| difficulty: str, | |
| num_examples: int | |
| ): | |
| if not openai_api_key or not openai_api_key.strip(): | |
| return ( | |
| pd.DataFrame([{"error": "Please enter your OpenAI API key."}]), | |
| json.dumps({"error": "Missing OpenAI API key."}, indent=2) | |
| ) | |
| if not topic or not topic.strip(): | |
| return ( | |
| pd.DataFrame([{"error": "Please enter a topic."}]), | |
| json.dumps({"error": "Missing topic."}, indent=2) | |
| ) | |
| try: | |
| # Configure DSPy with an OpenAI-compatible LM | |
| lm = dspy.LM( | |
| model="openai/gpt-4o-mini", | |
| api_key=openai_api_key.strip() | |
| ) | |
| dspy.configure(lm=lm) | |
| generator = dspy.Predict(GenerateQA) | |
| rows = [] | |
| for i in range(num_examples): | |
| pred = generator( | |
| topic=topic.strip(), | |
| difficulty=difficulty, | |
| config={"temperature": 1.0, "rollout_id": i + 1} | |
| ) | |
| rows.append({ | |
| "topic": topic.strip(), | |
| "difficulty": difficulty, | |
| "question": pred.question, | |
| "answer": pred.answer | |
| }) | |
| df = pd.DataFrame(rows) | |
| return df, json.dumps(rows, indent=2) | |
| except Exception as e: | |
| error_payload = {"error": str(e)} | |
| return pd.DataFrame([error_payload]), json.dumps(error_payload, indent=2) | |
| # ----------------------------- | |
| # Example loader | |
| # ----------------------------- | |
| def load_example(example_topic): | |
| return example_topic | |
| # ----------------------------- | |
| # Gradio UI | |
| # ----------------------------- | |
| EXAMPLE_TOPICS = [ | |
| "machine learning", | |
| "prompt engineering", | |
| "financial literacy", | |
| "cybersecurity basics", | |
| "project management" | |
| ] | |
| with gr.Blocks(title="DSPy Synthetic Data Creator") as demo: | |
| gr.Markdown( | |
| """ | |
| # DSPy Synthetic Data Creator | |
| Generate simple synthetic Q&A examples using DSPy + OpenAI. | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| api_key = gr.Textbox( | |
| label="OpenAI API Key", | |
| placeholder="Paste your OpenAI API key here", | |
| type="password" | |
| ) | |
| topic = gr.Textbox( | |
| label="Topic", | |
| placeholder="Example: machine learning" | |
| ) | |
| difficulty = gr.Dropdown( | |
| choices=["easy", "medium", "hard"], | |
| value="easy", | |
| label="Difficulty" | |
| ) | |
| num_examples = gr.Slider( | |
| minimum=1, | |
| maximum=20, | |
| value=5, | |
| step=1, | |
| label="Number of Examples" | |
| ) | |
| generate_btn = gr.Button("Generate Synthetic Data", variant="primary") | |
| with gr.Column(scale=1): | |
| gr.Markdown("### Example starting inputs") | |
| for item in EXAMPLE_TOPICS: | |
| example_btn = gr.Button(item) | |
| example_btn.click( | |
| fn=load_example, | |
| inputs=gr.State(item), | |
| outputs=topic | |
| ) | |
| gr.Markdown("### Generated Table") | |
| output_table = gr.Dataframe( | |
| headers=["topic", "difficulty", "question", "answer"], | |
| datatype=["str", "str", "str", "str"], | |
| interactive=False | |
| ) | |
| gr.Markdown("### JSON Output") | |
| output_json = gr.Code(label="JSON", language="json") | |
| generate_btn.click( | |
| fn=generate_synthetic_data, | |
| inputs=[api_key, topic, difficulty, num_examples], | |
| outputs=[output_table, output_json] | |
| ) | |
| demo.launch() |