#![allow(dead_code)] // DURP module: full CSV + submission API exposed //! DURP — Distributor Unmatched Recordings Portal. //! //! The DURP is operated by the MLC (Mechanical Licensing Collective) and DDEX. //! Distributors must submit unmatched sound recordings — those with no matching //! musical work — so that rights holders can claim them. //! //! Reference: https://www.themlc.com/durp //! DDEX DURP 1.0 XML schema (published by The MLC, 2021) //! //! This module: //! 1. Generates DURP-format CSV submission files per MLC specification. //! 2. Validates that all required fields are present and correctly formatted. //! 3. Submits CSV to the MLC SFTP drop (or S3 gateway in cloud mode). //! 4. Parses MLC acknowledgement files and updates track status. //! //! LangSec: all cells sanitised via langsec::sanitise_csv_cell(). //! Security: SFTP credentials from environment variables only. use crate::langsec; use serde::{Deserialize, Serialize}; use tracing::{info, instrument, warn}; // ── DURP Record ─────────────────────────────────────────────────────────────── /// A single DURP submission record (one row in the CSV). /// Field names follow MLC DURP CSV Template v1.2. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct DurpRecord { /// ISRC (required). pub isrc: String, /// Track title (required). pub track_title: String, /// Primary artist name (required). pub primary_artist: String, /// Featured artists, comma-separated (optional). pub featured_artists: Option, /// Release title / album name (optional). pub release_title: Option, /// UPC/EAN of the release (optional). pub upc: Option, /// Catalogue number (optional). pub catalogue_number: Option, /// Label name (required). pub label_name: String, /// Release date YYYY-MM-DD (optional). pub release_date: Option, /// Duration MM:SS (optional). pub duration: Option, /// Distributor name (required). pub distributor_name: String, /// Distributor identifier (required — your DDEX party ID). pub distributor_id: String, /// BTFS CID of the audio (Retrosync-specific, mapped to a custom column). pub btfs_cid: Option, /// Wikidata QID (Retrosync-specific metadata enrichment). pub wikidata_qid: Option, /// Internal submission reference (UUID). pub submission_ref: String, } /// DURP submission status. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] pub enum DurpStatus { Pending, Submitted, Acknowledged, Matched, Rejected, } /// DURP submission batch. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct DurpSubmission { pub batch_id: String, pub records: Vec, pub status: DurpStatus, pub submitted_at: Option, pub ack_file: Option, pub error: Option, } // ── Validation ──────────────────────────────────────────────────────────────── /// Validation error for a DURP record. #[derive(Debug, Clone, Serialize)] pub struct DurpValidationError { pub record_index: usize, pub field: String, pub reason: String, } /// Validate a batch of DURP records before CSV generation. /// Returns a list of validation errors (empty = valid). pub fn validate_records(records: &[DurpRecord]) -> Vec { let mut errors = Vec::new(); for (idx, rec) in records.iter().enumerate() { // ISRC format check (delegated to shared parsers) if let Err(e) = shared::parsers::recognize_isrc(&rec.isrc) { errors.push(DurpValidationError { record_index: idx, field: "isrc".into(), reason: e.to_string(), }); } // Required fields non-empty for (field, val) in [ ("track_title", &rec.track_title), ("primary_artist", &rec.primary_artist), ("label_name", &rec.label_name), ("distributor_name", &rec.distributor_name), ("distributor_id", &rec.distributor_id), ] { if val.trim().is_empty() { errors.push(DurpValidationError { record_index: idx, field: field.into(), reason: "required field is empty".into(), }); } } // Free-text field validation for (field, val) in [ ("track_title", &rec.track_title), ("primary_artist", &rec.primary_artist), ] { if let Err(e) = langsec::validate_free_text(val, field, 500) { errors.push(DurpValidationError { record_index: idx, field: field.into(), reason: e.reason, }); } } // Duration format MM:SS if present if let Some(dur) = &rec.duration { if !is_valid_duration(dur) { errors.push(DurpValidationError { record_index: idx, field: "duration".into(), reason: "must be MM:SS or M:SS (0:00–99:59)".into(), }); } } // Release date YYYY-MM-DD if present if let Some(date) = &rec.release_date { if !is_valid_date(date) { errors.push(DurpValidationError { record_index: idx, field: "release_date".into(), reason: "must be YYYY-MM-DD".into(), }); } } } errors } fn is_valid_duration(s: &str) -> bool { let parts: Vec<&str> = s.split(':').collect(); if parts.len() != 2 { return false; } let mins_ok = parts[0].len() <= 2 && parts[0].chars().all(|c| c.is_ascii_digit()); let secs_ok = parts[1].len() == 2 && parts[1].chars().all(|c| c.is_ascii_digit()); if !mins_ok || !secs_ok { return false; } let secs: u8 = parts[1].parse().unwrap_or(60); secs < 60 } fn is_valid_date(s: &str) -> bool { if s.len() != 10 { return false; } let parts: Vec<&str> = s.split('-').collect(); parts.len() == 3 && parts[0].len() == 4 && parts[1].len() == 2 && parts[2].len() == 2 && parts.iter().all(|p| p.chars().all(|c| c.is_ascii_digit())) } // ── CSV generation ──────────────────────────────────────────────────────────── /// CSV column headers per MLC DURP Template v1.2 + Retrosync extensions. const DURP_HEADERS: &[&str] = &[ "ISRC", "Track Title", "Primary Artist", "Featured Artists", "Release Title", "UPC", "Catalogue Number", "Label Name", "Release Date", "Duration", "Distributor Name", "Distributor ID", "BTFS CID", "Wikidata QID", "Submission Reference", ]; /// Generate a DURP-format CSV string from a slice of validated records. /// /// RFC 4180 CSV: /// - CRLF line endings /// - Fields with commas, quotes, or newlines wrapped in double-quotes /// - Embedded double-quotes escaped as "" pub fn generate_csv(records: &[DurpRecord]) -> String { let mut lines: Vec = Vec::with_capacity(records.len() + 1); // Header row lines.push(DURP_HEADERS.join(",")); for rec in records { let row = vec![ csv_field(&rec.isrc), csv_field(&rec.track_title), csv_field(&rec.primary_artist), csv_field(rec.featured_artists.as_deref().unwrap_or("")), csv_field(rec.release_title.as_deref().unwrap_or("")), csv_field(rec.upc.as_deref().unwrap_or("")), csv_field(rec.catalogue_number.as_deref().unwrap_or("")), csv_field(&rec.label_name), csv_field(rec.release_date.as_deref().unwrap_or("")), csv_field(rec.duration.as_deref().unwrap_or("")), csv_field(&rec.distributor_name), csv_field(&rec.distributor_id), csv_field(rec.btfs_cid.as_deref().unwrap_or("")), csv_field(rec.wikidata_qid.as_deref().unwrap_or("")), csv_field(&rec.submission_ref), ]; lines.push(row.join(",")); } // RFC 4180: CRLF line endings lines.join("\r\n") + "\r\n" } /// Format a single CSV field per RFC 4180. fn csv_field(value: &str) -> String { // LangSec: sanitise before embedding in CSV let sanitised = langsec::sanitise_csv_cell(value); if sanitised.contains(',') || sanitised.contains('"') || sanitised.contains('\n') { format!("\"{}\"", sanitised.replace('"', "\"\"")) } else { sanitised } } // ── Submission config ───────────────────────────────────────────────────────── #[derive(Clone)] pub struct DurpConfig { /// MLC SFTP host (e.g. sftp.themlc.com). pub sftp_host: Option, /// Distributor DDEX party ID (e.g. PADPIDA2024RETROSYNC01). pub distributor_id: String, pub distributor_name: String, pub enabled: bool, pub dev_mode: bool, } impl DurpConfig { pub fn from_env() -> Self { Self { sftp_host: std::env::var("MLC_SFTP_HOST").ok(), distributor_id: std::env::var("DDEX_PARTY_ID") .unwrap_or_else(|_| "PADPIDA-RETROSYNC".into()), distributor_name: std::env::var("DISTRIBUTOR_NAME") .unwrap_or_else(|_| "Retrosync Media Group".into()), enabled: std::env::var("DURP_ENABLED").unwrap_or_default() == "1", dev_mode: std::env::var("DURP_DEV_MODE").unwrap_or_default() == "1", } } } /// Build a DurpRecord from a track upload. pub fn build_record( config: &DurpConfig, isrc: &str, title: &str, artist: &str, label: &str, btfs_cid: Option<&str>, wikidata_qid: Option<&str>, ) -> DurpRecord { DurpRecord { isrc: isrc.to_string(), track_title: title.to_string(), primary_artist: artist.to_string(), featured_artists: None, release_title: None, upc: None, catalogue_number: None, label_name: label.to_string(), release_date: None, duration: None, distributor_name: config.distributor_name.clone(), distributor_id: config.distributor_id.clone(), btfs_cid: btfs_cid.map(String::from), wikidata_qid: wikidata_qid.map(String::from), submission_ref: generate_submission_ref(), } } /// Submit a DURP CSV batch (dev mode: log only). #[instrument(skip(config, csv))] pub async fn submit_batch( config: &DurpConfig, batch_id: &str, csv: &str, ) -> anyhow::Result { if config.dev_mode { info!(batch_id=%batch_id, rows=csv.lines().count()-1, "DURP dev-mode: stub submission"); return Ok(DurpSubmission { batch_id: batch_id.to_string(), records: vec![], status: DurpStatus::Submitted, submitted_at: Some(chrono::Utc::now().to_rfc3339()), ack_file: None, error: None, }); } if !config.enabled { warn!("DURP submission skipped — set DURP_ENABLED=1 and MLC_SFTP_HOST"); return Ok(DurpSubmission { batch_id: batch_id.to_string(), records: vec![], status: DurpStatus::Pending, submitted_at: None, ack_file: None, error: Some("DURP not enabled".into()), }); } // Production: upload CSV to MLC SFTP. // Requires SFTP client (ssh2 crate) — integrate separately. // For now, report submission pending for operator follow-up. warn!( batch_id=%batch_id, "DURP production SFTP submission requires MLC credentials — \ save CSV locally and upload via MLC portal" ); Ok(DurpSubmission { batch_id: batch_id.to_string(), records: vec![], status: DurpStatus::Pending, submitted_at: None, ack_file: None, error: Some("SFTP upload not yet connected — use MLC portal".into()), }) } fn generate_submission_ref() -> String { use std::time::{SystemTime, UNIX_EPOCH}; let t = SystemTime::now() .duration_since(UNIX_EPOCH) .unwrap_or_default() .as_nanos(); format!("RTSY-{:016x}", t & 0xFFFFFFFFFFFFFFFF) } #[cfg(test)] mod tests { use super::*; fn sample_record() -> DurpRecord { DurpRecord { isrc: "US-S1Z-99-00001".to_string(), track_title: "Test Track".to_string(), primary_artist: "Test Artist".to_string(), featured_artists: None, release_title: Some("Test Album".to_string()), upc: None, catalogue_number: None, label_name: "Test Label".to_string(), release_date: Some("2024-01-15".to_string()), duration: Some("3:45".to_string()), distributor_name: "Retrosync".to_string(), distributor_id: "PADPIDA-TEST".to_string(), btfs_cid: None, wikidata_qid: None, submission_ref: "RTSY-test".to_string(), } } #[test] fn csv_generation() { let records = vec![sample_record()]; let csv = generate_csv(&records); assert!(csv.contains("US-S1Z-99-00001")); assert!(csv.contains("Test Track")); assert!(csv.ends_with("\r\n")); } #[test] fn validation_passes() { let records = vec![sample_record()]; let errs = validate_records(&records); assert!(errs.is_empty(), "{errs:?}"); } #[test] fn validation_catches_bad_isrc() { let mut r = sample_record(); r.isrc = "INVALID".to_string(); let errs = validate_records(&[r]); assert!(errs.iter().any(|e| e.field == "isrc")); } #[test] fn csv_injection_sanitised() { let mut r = sample_record(); r.track_title = "=SUM(A1:B1)".to_string(); let csv = generate_csv(&[r]); assert!(!csv.contains("=SUM")); } #[test] fn duration_validation() { assert!(is_valid_duration("3:45")); assert!(is_valid_duration("10:00")); assert!(!is_valid_duration("3:60")); assert!(!is_valid_duration("invalid")); } }