Spaces:
Build error
Build error
| // DURP module: full CSV + submission API exposed | |
| //! DURP β Distributor Unmatched Recordings Portal. | |
| //! | |
| //! The DURP is operated by the MLC (Mechanical Licensing Collective) and DDEX. | |
| //! Distributors must submit unmatched sound recordings β those with no matching | |
| //! musical work β so that rights holders can claim them. | |
| //! | |
| //! Reference: https://www.themlc.com/durp | |
| //! DDEX DURP 1.0 XML schema (published by The MLC, 2021) | |
| //! | |
| //! This module: | |
| //! 1. Generates DURP-format CSV submission files per MLC specification. | |
| //! 2. Validates that all required fields are present and correctly formatted. | |
| //! 3. Submits CSV to the MLC SFTP drop (or S3 gateway in cloud mode). | |
| //! 4. Parses MLC acknowledgement files and updates track status. | |
| //! | |
| //! LangSec: all cells sanitised via langsec::sanitise_csv_cell(). | |
| //! Security: SFTP credentials from environment variables only. | |
| use crate::langsec; | |
| use serde::{Deserialize, Serialize}; | |
| use tracing::{info, instrument, warn}; | |
| // ββ DURP Record βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| /// A single DURP submission record (one row in the CSV). | |
| /// Field names follow MLC DURP CSV Template v1.2. | |
| pub struct DurpRecord { | |
| /// ISRC (required). | |
| pub isrc: String, | |
| /// Track title (required). | |
| pub track_title: String, | |
| /// Primary artist name (required). | |
| pub primary_artist: String, | |
| /// Featured artists, comma-separated (optional). | |
| pub featured_artists: Option<String>, | |
| /// Release title / album name (optional). | |
| pub release_title: Option<String>, | |
| /// UPC/EAN of the release (optional). | |
| pub upc: Option<String>, | |
| /// Catalogue number (optional). | |
| pub catalogue_number: Option<String>, | |
| /// Label name (required). | |
| pub label_name: String, | |
| /// Release date YYYY-MM-DD (optional). | |
| pub release_date: Option<String>, | |
| /// Duration MM:SS (optional). | |
| pub duration: Option<String>, | |
| /// Distributor name (required). | |
| pub distributor_name: String, | |
| /// Distributor identifier (required β your DDEX party ID). | |
| pub distributor_id: String, | |
| /// BTFS CID of the audio (Retrosync-specific, mapped to a custom column). | |
| pub btfs_cid: Option<String>, | |
| /// Wikidata QID (Retrosync-specific metadata enrichment). | |
| pub wikidata_qid: Option<String>, | |
| /// Internal submission reference (UUID). | |
| pub submission_ref: String, | |
| } | |
| /// DURP submission status. | |
| pub enum DurpStatus { | |
| Pending, | |
| Submitted, | |
| Acknowledged, | |
| Matched, | |
| Rejected, | |
| } | |
| /// DURP submission batch. | |
| pub struct DurpSubmission { | |
| pub batch_id: String, | |
| pub records: Vec<DurpRecord>, | |
| pub status: DurpStatus, | |
| pub submitted_at: Option<String>, | |
| pub ack_file: Option<String>, | |
| pub error: Option<String>, | |
| } | |
| // ββ Validation ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| /// Validation error for a DURP record. | |
| pub struct DurpValidationError { | |
| pub record_index: usize, | |
| pub field: String, | |
| pub reason: String, | |
| } | |
| /// Validate a batch of DURP records before CSV generation. | |
| /// Returns a list of validation errors (empty = valid). | |
| pub fn validate_records(records: &[DurpRecord]) -> Vec<DurpValidationError> { | |
| let mut errors = Vec::new(); | |
| for (idx, rec) in records.iter().enumerate() { | |
| // ISRC format check (delegated to shared parsers) | |
| if let Err(e) = shared::parsers::recognize_isrc(&rec.isrc) { | |
| errors.push(DurpValidationError { | |
| record_index: idx, | |
| field: "isrc".into(), | |
| reason: e.to_string(), | |
| }); | |
| } | |
| // Required fields non-empty | |
| for (field, val) in [ | |
| ("track_title", &rec.track_title), | |
| ("primary_artist", &rec.primary_artist), | |
| ("label_name", &rec.label_name), | |
| ("distributor_name", &rec.distributor_name), | |
| ("distributor_id", &rec.distributor_id), | |
| ] { | |
| if val.trim().is_empty() { | |
| errors.push(DurpValidationError { | |
| record_index: idx, | |
| field: field.into(), | |
| reason: "required field is empty".into(), | |
| }); | |
| } | |
| } | |
| // Free-text field validation | |
| for (field, val) in [ | |
| ("track_title", &rec.track_title), | |
| ("primary_artist", &rec.primary_artist), | |
| ] { | |
| if let Err(e) = langsec::validate_free_text(val, field, 500) { | |
| errors.push(DurpValidationError { | |
| record_index: idx, | |
| field: field.into(), | |
| reason: e.reason, | |
| }); | |
| } | |
| } | |
| // Duration format MM:SS if present | |
| if let Some(dur) = &rec.duration { | |
| if !is_valid_duration(dur) { | |
| errors.push(DurpValidationError { | |
| record_index: idx, | |
| field: "duration".into(), | |
| reason: "must be MM:SS or M:SS (0:00β99:59)".into(), | |
| }); | |
| } | |
| } | |
| // Release date YYYY-MM-DD if present | |
| if let Some(date) = &rec.release_date { | |
| if !is_valid_date(date) { | |
| errors.push(DurpValidationError { | |
| record_index: idx, | |
| field: "release_date".into(), | |
| reason: "must be YYYY-MM-DD".into(), | |
| }); | |
| } | |
| } | |
| } | |
| errors | |
| } | |
| fn is_valid_duration(s: &str) -> bool { | |
| let parts: Vec<&str> = s.split(':').collect(); | |
| if parts.len() != 2 { | |
| return false; | |
| } | |
| let mins_ok = parts[0].len() <= 2 && parts[0].chars().all(|c| c.is_ascii_digit()); | |
| let secs_ok = parts[1].len() == 2 && parts[1].chars().all(|c| c.is_ascii_digit()); | |
| if !mins_ok || !secs_ok { | |
| return false; | |
| } | |
| let secs: u8 = parts[1].parse().unwrap_or(60); | |
| secs < 60 | |
| } | |
| fn is_valid_date(s: &str) -> bool { | |
| if s.len() != 10 { | |
| return false; | |
| } | |
| let parts: Vec<&str> = s.split('-').collect(); | |
| parts.len() == 3 | |
| && parts[0].len() == 4 | |
| && parts[1].len() == 2 | |
| && parts[2].len() == 2 | |
| && parts.iter().all(|p| p.chars().all(|c| c.is_ascii_digit())) | |
| } | |
| // ββ CSV generation ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| /// CSV column headers per MLC DURP Template v1.2 + Retrosync extensions. | |
| const DURP_HEADERS: &[&str] = &[ | |
| "ISRC", | |
| "Track Title", | |
| "Primary Artist", | |
| "Featured Artists", | |
| "Release Title", | |
| "UPC", | |
| "Catalogue Number", | |
| "Label Name", | |
| "Release Date", | |
| "Duration", | |
| "Distributor Name", | |
| "Distributor ID", | |
| "BTFS CID", | |
| "Wikidata QID", | |
| "Submission Reference", | |
| ]; | |
| /// Generate a DURP-format CSV string from a slice of validated records. | |
| /// | |
| /// RFC 4180 CSV: | |
| /// - CRLF line endings | |
| /// - Fields with commas, quotes, or newlines wrapped in double-quotes | |
| /// - Embedded double-quotes escaped as "" | |
| pub fn generate_csv(records: &[DurpRecord]) -> String { | |
| let mut lines: Vec<String> = Vec::with_capacity(records.len() + 1); | |
| // Header row | |
| lines.push(DURP_HEADERS.join(",")); | |
| for rec in records { | |
| let row = vec![ | |
| csv_field(&rec.isrc), | |
| csv_field(&rec.track_title), | |
| csv_field(&rec.primary_artist), | |
| csv_field(rec.featured_artists.as_deref().unwrap_or("")), | |
| csv_field(rec.release_title.as_deref().unwrap_or("")), | |
| csv_field(rec.upc.as_deref().unwrap_or("")), | |
| csv_field(rec.catalogue_number.as_deref().unwrap_or("")), | |
| csv_field(&rec.label_name), | |
| csv_field(rec.release_date.as_deref().unwrap_or("")), | |
| csv_field(rec.duration.as_deref().unwrap_or("")), | |
| csv_field(&rec.distributor_name), | |
| csv_field(&rec.distributor_id), | |
| csv_field(rec.btfs_cid.as_deref().unwrap_or("")), | |
| csv_field(rec.wikidata_qid.as_deref().unwrap_or("")), | |
| csv_field(&rec.submission_ref), | |
| ]; | |
| lines.push(row.join(",")); | |
| } | |
| // RFC 4180: CRLF line endings | |
| lines.join("\r\n") + "\r\n" | |
| } | |
| /// Format a single CSV field per RFC 4180. | |
| fn csv_field(value: &str) -> String { | |
| // LangSec: sanitise before embedding in CSV | |
| let sanitised = langsec::sanitise_csv_cell(value); | |
| if sanitised.contains(',') || sanitised.contains('"') || sanitised.contains('\n') { | |
| format!("\"{}\"", sanitised.replace('"', "\"\"")) | |
| } else { | |
| sanitised | |
| } | |
| } | |
| // ββ Submission config βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| pub struct DurpConfig { | |
| /// MLC SFTP host (e.g. sftp.themlc.com). | |
| pub sftp_host: Option<String>, | |
| /// Distributor DDEX party ID (e.g. PADPIDA2024RETROSYNC01). | |
| pub distributor_id: String, | |
| pub distributor_name: String, | |
| pub enabled: bool, | |
| pub dev_mode: bool, | |
| } | |
| impl DurpConfig { | |
| pub fn from_env() -> Self { | |
| Self { | |
| sftp_host: std::env::var("MLC_SFTP_HOST").ok(), | |
| distributor_id: std::env::var("DDEX_PARTY_ID") | |
| .unwrap_or_else(|_| "PADPIDA-RETROSYNC".into()), | |
| distributor_name: std::env::var("DISTRIBUTOR_NAME") | |
| .unwrap_or_else(|_| "Retrosync Media Group".into()), | |
| enabled: std::env::var("DURP_ENABLED").unwrap_or_default() == "1", | |
| dev_mode: std::env::var("DURP_DEV_MODE").unwrap_or_default() == "1", | |
| } | |
| } | |
| } | |
| /// Build a DurpRecord from a track upload. | |
| pub fn build_record( | |
| config: &DurpConfig, | |
| isrc: &str, | |
| title: &str, | |
| artist: &str, | |
| label: &str, | |
| btfs_cid: Option<&str>, | |
| wikidata_qid: Option<&str>, | |
| ) -> DurpRecord { | |
| DurpRecord { | |
| isrc: isrc.to_string(), | |
| track_title: title.to_string(), | |
| primary_artist: artist.to_string(), | |
| featured_artists: None, | |
| release_title: None, | |
| upc: None, | |
| catalogue_number: None, | |
| label_name: label.to_string(), | |
| release_date: None, | |
| duration: None, | |
| distributor_name: config.distributor_name.clone(), | |
| distributor_id: config.distributor_id.clone(), | |
| btfs_cid: btfs_cid.map(String::from), | |
| wikidata_qid: wikidata_qid.map(String::from), | |
| submission_ref: generate_submission_ref(), | |
| } | |
| } | |
| /// Submit a DURP CSV batch (dev mode: log only). | |
| pub async fn submit_batch( | |
| config: &DurpConfig, | |
| batch_id: &str, | |
| csv: &str, | |
| ) -> anyhow::Result<DurpSubmission> { | |
| if config.dev_mode { | |
| info!(batch_id=%batch_id, rows=csv.lines().count()-1, "DURP dev-mode: stub submission"); | |
| return Ok(DurpSubmission { | |
| batch_id: batch_id.to_string(), | |
| records: vec![], | |
| status: DurpStatus::Submitted, | |
| submitted_at: Some(chrono::Utc::now().to_rfc3339()), | |
| ack_file: None, | |
| error: None, | |
| }); | |
| } | |
| if !config.enabled { | |
| warn!("DURP submission skipped β set DURP_ENABLED=1 and MLC_SFTP_HOST"); | |
| return Ok(DurpSubmission { | |
| batch_id: batch_id.to_string(), | |
| records: vec![], | |
| status: DurpStatus::Pending, | |
| submitted_at: None, | |
| ack_file: None, | |
| error: Some("DURP not enabled".into()), | |
| }); | |
| } | |
| // Production: upload CSV to MLC SFTP. | |
| // Requires SFTP client (ssh2 crate) β integrate separately. | |
| // For now, report submission pending for operator follow-up. | |
| warn!( | |
| batch_id=%batch_id, | |
| "DURP production SFTP submission requires MLC credentials β \ | |
| save CSV locally and upload via MLC portal" | |
| ); | |
| Ok(DurpSubmission { | |
| batch_id: batch_id.to_string(), | |
| records: vec![], | |
| status: DurpStatus::Pending, | |
| submitted_at: None, | |
| ack_file: None, | |
| error: Some("SFTP upload not yet connected β use MLC portal".into()), | |
| }) | |
| } | |
| fn generate_submission_ref() -> String { | |
| use std::time::{SystemTime, UNIX_EPOCH}; | |
| let t = SystemTime::now() | |
| .duration_since(UNIX_EPOCH) | |
| .unwrap_or_default() | |
| .as_nanos(); | |
| format!("RTSY-{:016x}", t & 0xFFFFFFFFFFFFFFFF) | |
| } | |
| mod tests { | |
| use super::*; | |
| fn sample_record() -> DurpRecord { | |
| DurpRecord { | |
| isrc: "US-S1Z-99-00001".to_string(), | |
| track_title: "Test Track".to_string(), | |
| primary_artist: "Test Artist".to_string(), | |
| featured_artists: None, | |
| release_title: Some("Test Album".to_string()), | |
| upc: None, | |
| catalogue_number: None, | |
| label_name: "Test Label".to_string(), | |
| release_date: Some("2024-01-15".to_string()), | |
| duration: Some("3:45".to_string()), | |
| distributor_name: "Retrosync".to_string(), | |
| distributor_id: "PADPIDA-TEST".to_string(), | |
| btfs_cid: None, | |
| wikidata_qid: None, | |
| submission_ref: "RTSY-test".to_string(), | |
| } | |
| } | |
| fn csv_generation() { | |
| let records = vec![sample_record()]; | |
| let csv = generate_csv(&records); | |
| assert!(csv.contains("US-S1Z-99-00001")); | |
| assert!(csv.contains("Test Track")); | |
| assert!(csv.ends_with("\r\n")); | |
| } | |
| fn validation_passes() { | |
| let records = vec![sample_record()]; | |
| let errs = validate_records(&records); | |
| assert!(errs.is_empty(), "{errs:?}"); | |
| } | |
| fn validation_catches_bad_isrc() { | |
| let mut r = sample_record(); | |
| r.isrc = "INVALID".to_string(); | |
| let errs = validate_records(&[r]); | |
| assert!(errs.iter().any(|e| e.field == "isrc")); | |
| } | |
| fn csv_injection_sanitised() { | |
| let mut r = sample_record(); | |
| r.track_title = "=SUM(A1:B1)".to_string(); | |
| let csv = generate_csv(&[r]); | |
| assert!(!csv.contains("=SUM")); | |
| } | |
| fn duration_validation() { | |
| assert!(is_valid_duration("3:45")); | |
| assert!(is_valid_duration("10:00")); | |
| assert!(!is_valid_duration("3:60")); | |
| assert!(!is_valid_duration("invalid")); | |
| } | |
| } | |