metadata: name: "FCA Financial Classification Data Generator" id: "fca-classification-sdg" description: >- Generates synthetic training data for classifying financial communications into three regulatory categories: Guidance, Targeted Support, and Regulated Advice, based on FCA PERG 8 and CP23/24 frameworks. Includes LLM-as-judge quality filtering. version: "1.0.0" author: "djordjebatic" license: "Apache-2.0" recommended_models: default: "openai/gpt-4o" compatible: - "meta-llama/Llama-3.3-70B-Instruct" - "Qwen/Qwen2.5-7B-Instruct" tags: - "financial-regulation" - "classification" - "synthetic-data" - "fca" - "perg8" dataset_requirements: required_columns: - "target_label" - "domain" - "channel" - "persona" - "seed_text" blocks: - block_type: "PromptBuilderBlock" block_config: block_name: "build_generation_prompt" input_cols: ["target_label", "domain", "channel", "persona", "seed_text"] output_cols: "generation_prompt" prompt_config_path: "prompts/generate_examples.yaml" - block_type: "LLMChatBlock" block_config: block_name: "generate_examples" input_cols: "generation_prompt" output_cols: "raw_generation" temperature: 0.8 max_tokens: 3000 async_mode: true - block_type: "LLMResponseExtractorBlock" block_config: block_name: "extract_generation" input_cols: "raw_generation" extract_content: true expand_lists: true - block_type: "TagParserBlock" block_config: block_name: "parse_examples" input_cols: "extract_generation_content" output_cols: "generated_text" start_tags: ["[EXAMPLE]"] end_tags: ["[/EXAMPLE]"] - block_type: "PromptBuilderBlock" block_config: block_name: "build_judge_prompt" input_cols: claimed_label: "target_label" generated_text: "generated_text" output_cols: "judge_prompt" prompt_config_path: "prompts/quality_judge.yaml" - block_type: "LLMChatBlock" block_config: block_name: "judge_quality" input_cols: "judge_prompt" output_cols: "raw_judgment" temperature: 0.0 max_tokens: 500 async_mode: true - block_type: "LLMResponseExtractorBlock" block_config: block_name: "extract_judgment" input_cols: "raw_judgment" extract_content: true expand_lists: true - block_type: "TagParserBlock" block_config: block_name: "parse_verdict" input_cols: "extract_judgment_content" output_cols: "verdict" start_tags: ["[VERDICT]"] end_tags: ["[/VERDICT]"] - block_type: "ColumnValueFilterBlock" block_config: block_name: "filter_quality" input_cols: ["verdict"] filter_value: "PASS" operation: "contains"