Spaces:
Running
Running
| #!/usr/bin/env python3 | |
| """ | |
| DSPy Director Bake-Off: A Beginner's Guide to DSPy Programming | |
| This script demonstrates core DSPy concepts through a fun example: | |
| comparing how different movie directors would approach filming the same video idea. | |
| Key DSPy Concepts Demonstrated: | |
| 1. Signatures - Define input/output interfaces for LLM tasks | |
| 2. Modules - Combine multiple signatures into complex workflows | |
| 3. Structured Output - Use Pydantic models for reliable data extraction | |
| 4. Async Processing - Handle multiple LLM calls efficiently | |
| 5. Chain of Thought - Enable reasoning for complex decisions | |
| Author: DSPy Learning Example | |
| """ | |
| # Standard library imports | |
| import os | |
| import sys | |
| import asyncio | |
| from typing import List | |
| # Third-party imports | |
| import dspy # The main DSPy framework for LLM programming | |
| from dotenv import load_dotenv # For loading environment variables from .env file | |
| from pydantic import BaseModel, Field # For structured data validation | |
| # Load environment variables from .env file (contains API keys) | |
| load_dotenv() | |
| # Global variable to store our configured language model | |
| # This will be set up once and reused throughout the application | |
| lm = None | |
| # ========================================================================== | |
| # SECTION 1: LLM SETUP AND CONFIGURATION | |
| # ========================================================================== | |
| def setup_dspy_provider(): | |
| """ | |
| Configure DSPy with an available LLM provider. | |
| DSPy supports many providers (OpenAI, Anthropic, OpenRouter, etc.) | |
| This function tries to connect to OpenRouter, which provides access | |
| to many different models through a single API. | |
| Returns: | |
| str: The name of the provider that was successfully configured | |
| """ | |
| global lm # We'll modify the global lm variable | |
| # Check if we have an OpenRouter API key in our environment variables | |
| if os.getenv('OPENROUTER_API_KEY'): | |
| print("β Configuring DSPy with OpenRouter...") | |
| # Create a DSPy Language Model object | |
| # Format: "provider/model_name" | |
| # Here we use a free model from Moonshot AI via OpenRouter | |
| lm = dspy.LM( | |
| model="openrouter/moonshotai/kimi-k2:free", | |
| api_key=os.getenv('OPENROUTER_API_KEY') | |
| ) | |
| # Configure DSPy to use this language model globally | |
| dspy.configure(lm=lm) | |
| return "openrouter" | |
| else: | |
| print("β No OpenRouter API key found in environment variables.") | |
| print("Please add OPENROUTER_API_KEY to your .env file") | |
| sys.exit(1) | |
| # ========================================================================== | |
| # SECTION 2: DATA STRUCTURES (PYDANTIC MODELS) | |
| # ========================================================================== | |
| class DirectorCut(BaseModel): | |
| """ | |
| π¬ PYDANTIC MODEL: Structured Data for Cinematic Prompts | |
| This is a Pydantic model that defines the structure of our data. | |
| Pydantic ensures that the LLM returns data in exactly the format we expect. | |
| Think of this as a "template" that the LLM must fill out completely. | |
| Each field has a description that helps the LLM understand what to generate. | |
| Why use Pydantic with DSPy? | |
| - Guarantees consistent output format | |
| - Automatic validation of LLM responses | |
| - Type safety for your Python code | |
| - Clear documentation of expected data structure | |
| """ | |
| # Basic information (echoed from input) | |
| director: str = Field( | |
| ..., | |
| description="The director sent as part of input, echoed in the output." | |
| ) | |
| video_idea: str = Field( | |
| ..., | |
| description="The video idea sent as part of input, echoed in the output." | |
| ) | |
| # The seven components of a cinematic prompt | |
| # Each field uses Field(...) where ... means "required" | |
| subject_description: str = Field( | |
| ..., | |
| description="A detailed description of the main subject or character." | |
| ) | |
| action_description: str = Field( | |
| ..., | |
| description="A description of the specific action the subject is performing." | |
| ) | |
| setting_description: str = Field( | |
| ..., | |
| description="A rich description of the environment, location, and time of day." | |
| ) | |
| cinematic_style: str = Field( | |
| ..., | |
| description="The overall visual style or medium (e.g., 'Photorealistic, 8K')." | |
| ) | |
| shot_and_framing: str = Field( | |
| ..., | |
| description="The specific camera shot type and framing (e.g., 'Medium shot')." | |
| ) | |
| camera_movement: str = Field( | |
| ..., | |
| description="The movement of the camera during the shot (e.g., 'Slow dolly shot')." | |
| ) | |
| lighting_and_color: str = Field( | |
| ..., | |
| description="The lighting style and color palette that sets the mood." | |
| ) | |
| def assemble_prompt(self) -> str: | |
| """ | |
| Combines all cinematic components into a single, formatted prompt. | |
| This method takes all the individual pieces and creates a complete | |
| prompt that could be used with video generation AI models. | |
| Returns: | |
| str: A complete, formatted cinematic prompt | |
| """ | |
| # Collect all the cinematic components (excluding director and video_idea) | |
| components = [ | |
| self.subject_description, | |
| self.action_description, | |
| self.setting_description, | |
| self.cinematic_style, | |
| self.shot_and_framing, | |
| self.camera_movement, | |
| self.lighting_and_color, | |
| ] | |
| # Join components with commas, filtering out empty strings | |
| prompt_string = ", ".join(filter(None, [c.strip() for c in components if c])) | |
| # Return empty string if no components | |
| if not prompt_string: | |
| return "" | |
| # Capitalize first letter and add period | |
| return prompt_string[0].upper() + prompt_string[1:] + "." | |
| def pretty_print(self): | |
| """ | |
| Displays the director's interpretation in a nice format. | |
| This is a helper method to make the output more readable | |
| when we're testing or debugging our code. | |
| """ | |
| print(f"--- Director {self.director} ---") | |
| print(f"{self.assemble_prompt()}") | |
| print("--------------------------------") | |
| class ResultClass: | |
| """ | |
| π¦ SIMPLE DATA CONTAINER: Holds All Results | |
| This is a simple class to package up all our results. | |
| We could use a Pydantic model here too, but since this is just | |
| for internal use (not LLM output), a simple class works fine. | |
| """ | |
| def __init__(self, additional_director, director_ideas, director_ranks): | |
| self.additional_director = additional_director # The AI-suggested director | |
| self.director_ideas = director_ideas # List of all DirectorCut objects | |
| self.director_ranks = director_ranks # Ranking results from the judge | |
| # ========================================================================== | |
| # SECTION 3: DSPY SIGNATURES (THE HEART OF DSPY) | |
| # ========================================================================== | |
| """ | |
| π₯ WHAT ARE DSPY SIGNATURES? | |
| DSPy Signatures are like "function signatures" but for LLM tasks. | |
| They define: | |
| 1. What inputs the LLM should expect | |
| 2. What outputs the LLM should produce | |
| 3. The task description (in the docstring) | |
| Think of them as contracts between your code and the LLM. | |
| The LLM will try to fulfill this contract every time. | |
| Key Components: | |
| - InputField: Data going INTO the LLM | |
| - OutputField: Data coming OUT of the LLM | |
| - Docstring: Instructions for the LLM about what to do | |
| """ | |
| class FindDirector(dspy.Signature): | |
| """ | |
| π― SIGNATURE 1: Find Additional Director | |
| This signature asks the LLM to suggest one additional director | |
| who would be perfect for the given video idea, but isn't already | |
| in the user's list. | |
| This demonstrates how DSPy can handle creative, open-ended tasks | |
| where there's no single "correct" answer. | |
| """ | |
| # === INPUTS === | |
| video_idea = dspy.InputField( | |
| desc="A simple, high-level user idea or concept for a video." | |
| ) | |
| director_list: List[str] = dspy.InputField( | |
| desc="The names of directors the user wants to use." | |
| ) | |
| # === OUTPUTS === | |
| additonal_director: str = dspy.OutputField( | |
| desc="The best possible director based on the wanted video idea, that is not already in the provided director list." | |
| ) | |
| class GenerateDirectorCut(dspy.Signature): | |
| """ | |
| π¬ SIGNATURE 2: Generate Cinematic Interpretation | |
| This is the core signature that transforms a simple video idea | |
| into a detailed cinematic prompt in the style of a specific director. | |
| Notice how the output is a Pydantic model (DirectorCut). | |
| DSPy will automatically ensure the LLM returns data in exactly | |
| that structure, with all required fields filled out. | |
| This demonstrates DSPy's structured output capabilities. | |
| """ | |
| # === INPUTS === | |
| video_idea = dspy.InputField( | |
| desc="A simple, high-level user idea or concept for a video." | |
| ) | |
| director = dspy.InputField( | |
| desc="The name of the director to generate a cinematic prompt for (optional).", | |
| default=None, | |
| optional=True | |
| ) | |
| # === OUTPUTS === | |
| director_cut: DirectorCut = dspy.OutputField( | |
| desc="A structured object containing all seven deconstructed cinematic aspects." | |
| ) | |
| class DirectorJudge(dspy.Signature): | |
| """ | |
| βοΈ SIGNATURE 3: Judge and Rank Director Ideas | |
| This signature handles the complex task of comparing multiple | |
| creative interpretations and ranking them objectively. | |
| Notice this takes a List[DirectorCut] as input and returns | |
| both rankings AND an explanation. This shows how DSPy can | |
| handle complex, multi-part outputs. | |
| This demonstrates DSPy's ability to handle reasoning tasks. | |
| """ | |
| # === INPUTS === | |
| director_ideas: List[DirectorCut] = dspy.InputField( | |
| desc="A list of director interpretations to be ranked" | |
| ) | |
| # === OUTPUTS === | |
| director_rankings: List[int] = dspy.OutputField( | |
| description="Rank between 1, 2, 3 ... N where 1 is best" | |
| ) | |
| explanation: str = dspy.OutputField( | |
| description="Explain why the ranking was given and the winner selected. Format your response with clear sections for each director using HTML formatting: use <h4> tags for director names with their rank, <p> tags for paragraphs, and <br> tags for line breaks. Make it well-structured and easy to read." | |
| ) | |
| # ========================================================================== | |
| # SECTION 4: DSPY MODULE (COMBINING SIGNATURES INTO WORKFLOWS) | |
| # ========================================================================== | |
| class DirectorBakeOff(dspy.Module): | |
| """ | |
| ποΈ DSPY MODULE: The Complete Workflow | |
| A DSPy Module combines multiple Signatures into a complete workflow. | |
| Think of it like a class that orchestrates several LLM calls to solve | |
| a complex problem. | |
| This module demonstrates: | |
| 1. How to combine multiple signatures | |
| 2. Different types of DSPy predictors (Predict vs ChainOfThought) | |
| 3. Async processing for efficiency | |
| 4. Complex workflow orchestration | |
| The workflow: | |
| 1. Find an additional director suggestion | |
| 2. Generate cinematic interpretations for all directors (in parallel) | |
| 3. Judge and rank all interpretations | |
| 4. Return the best result with explanations | |
| """ | |
| def __init__(self): | |
| """ | |
| Initialize the module with three different DSPy predictors. | |
| Notice the different types: | |
| - dspy.Predict: Basic prediction (fast, direct) | |
| - dspy.ChainOfThought: Reasoning-enabled prediction (slower, more thoughtful) | |
| """ | |
| # Basic predictor for finding additional director | |
| self.findDirector = dspy.Predict(FindDirector) | |
| # Basic predictor for generating director cuts | |
| self.genDirectorCut = dspy.Predict(GenerateDirectorCut) | |
| # Chain-of-thought predictor for complex ranking decisions | |
| # This will make the LLM "think step by step" before ranking | |
| self.directorJudge = dspy.ChainOfThought(DirectorJudge) | |
| async def aforward(self, video_idea: str, directors: List[str] = ["Quentin Tarantino", "Alfred Hitchcock", "Richard Curtis"]): | |
| """ | |
| π ASYNC FORWARD: The Main Workflow | |
| This is where the magic happens! This method orchestrates the entire | |
| director bake-off process using multiple LLM calls. | |
| Key DSPy concepts demonstrated: | |
| 1. Sequential LLM calls (find director first) | |
| 2. Parallel LLM calls (generate all director cuts simultaneously) | |
| 3. Complex data flow between signatures | |
| 4. Async processing for efficiency | |
| Args: | |
| video_idea: The user's video concept | |
| directors: List of director names to compare | |
| Returns: | |
| ResultClass: Complete results including rankings and explanations | |
| """ | |
| # === STEP 1: Display user input === | |
| print("\n㪠User Wanted Directors:") | |
| for director in directors: | |
| print(f" - {director}") | |
| # === STEP 2: Find additional director suggestion === | |
| print("\nπ€ Finding AI-suggested director...") | |
| additional_director_result = self.findDirector( | |
| video_idea=video_idea, | |
| director_list=directors | |
| ) | |
| additional_director = additional_director_result.additonal_director | |
| print(f" β¨ DSPy Suggested Director: {additional_director}") | |
| # === STEP 3: Generate director interpretations (IN PARALLEL!) === | |
| print("\nβ‘ Generating director interpretations in parallel...") | |
| # Combine user directors + AI suggestion | |
| all_directors = directors + [additional_director] | |
| # Use asyncio.gather to run multiple LLM calls simultaneously | |
| # This is much faster than calling them one by one! | |
| director_ideas = await asyncio.gather( | |
| *[self.genDirectorCut.acall(video_idea=video_idea, director=director) | |
| for director in all_directors] | |
| ) | |
| # Display all generated ideas | |
| print("\nπ Generated Director Ideas:") | |
| for idea in director_ideas: | |
| idea.director_cut.pretty_print() | |
| # === STEP 4: Judge and rank all interpretations === | |
| print("\nβοΈ Judging and ranking director ideas...") | |
| # Extract just the DirectorCut objects for judging | |
| director_cuts = [idea.director_cut for idea in director_ideas] | |
| # Use Chain-of-Thought for complex ranking decision | |
| director_ranks = self.directorJudge(director_ideas=director_cuts) | |
| # === STEP 5: Display rankings === | |
| print("\nπ Director Rankings:") | |
| for rank, idea in zip(director_ranks.director_rankings, director_ideas): | |
| print(f" Rank {rank}: {idea.director_cut.director}") | |
| # === STEP 6: Find and display the winner === | |
| best_rank = min(director_ranks.director_rankings) | |
| best_index = director_ranks.director_rankings.index(best_rank) | |
| best_idea = director_ideas[best_index] | |
| print("\nπ₯ WINNER - Best Ranked Director Idea:") | |
| best_idea.director_cut.pretty_print() | |
| print(f"\nπ Judge's Reasoning:") | |
| print(f" {director_ranks.explanation}") | |
| print("=" * 50) | |
| # === STEP 7: Return complete results === | |
| return ResultClass( | |
| additional_director=additional_director, | |
| director_ideas=director_ideas, | |
| director_ranks=director_ranks | |
| ) | |
| # ========================================================================== | |
| # SECTION 5: MAIN FUNCTIONS AND ENTRY POINT | |
| # ========================================================================== | |
| def run_bake_off(video_idea: str, directors: str = None) -> ResultClass: | |
| """ | |
| π― MAIN FUNCTION: Easy-to-use interface for the Director Bake-Off | |
| This function provides a simple interface that handles: | |
| 1. LLM setup and configuration | |
| 2. Input validation and parsing | |
| 3. Running the complete workflow | |
| 4. Error handling | |
| This is the function that external code (like our Gradio interface) | |
| calls to use our DSPy system. | |
| Args: | |
| video_idea: A description of the video concept | |
| directors: Comma-separated string of director names (optional) | |
| Returns: | |
| ResultClass: Complete results from the bake-off | |
| """ | |
| print("π Running Director Bake-Off...") | |
| # === STEP 1: Ensure LLM is configured === | |
| global lm | |
| if not lm: | |
| provider = setup_dspy_provider() | |
| print(f" β DSPy configured with {provider} provider.") | |
| # === STEP 2: Parse and validate director input === | |
| if not (isinstance(directors, str) and directors.strip()): | |
| # Use default directors if none provided | |
| directors = ["Quentin Tarantino", "Alfred Hitchcock", "Richard Curtis"] | |
| print(" π Using default directors") | |
| else: | |
| # Parse comma-separated string into list | |
| directors = [d.strip() for d in directors.split(",") if d.strip()] | |
| if not directors: | |
| # Fallback to defaults if parsing failed | |
| directors = ["Quentin Tarantino", "Alfred Hitchcock", "Richard Curtis"] | |
| print(" π Parsing failed, using default directors") | |
| # === STEP 3: Create and run the bake-off === | |
| bake_off = DirectorBakeOff() | |
| result_class = asyncio.run(bake_off.aforward(video_idea=video_idea, directors=directors)) | |
| return result_class | |
| # ========================================================================== | |
| # SECTION 6: SCRIPT ENTRY POINT | |
| # ========================================================================== | |
| if __name__ == "__main__": | |
| """ | |
| π¬ DEMO: Run the script directly to see it in action! | |
| This section only runs when you execute this file directly | |
| (not when it's imported as a module). | |
| Try running: python director_bake_off.py | |
| """ | |
| print("π DSPy Director Bake-Off Demo") | |
| print("=" * 40) | |
| # Run with a sample video idea | |
| sample_idea = "A futuristic cityscape with flying cars and neon lights." | |
| print(f"π Sample Video Idea: {sample_idea}") | |
| result = run_bake_off(sample_idea) | |
| print("\nπ Demo completed! Check the output above to see how each director") | |
| print(" would approach filming this futuristic cityscape.") | |