mike dupont
init: retro-sync API server + viewer + 71 Bach tiles + catalog
1295969
#![allow(dead_code)] // DURP module: full CSV + submission API exposed
//! DURP β€” Distributor Unmatched Recordings Portal.
//!
//! The DURP is operated by the MLC (Mechanical Licensing Collective) and DDEX.
//! Distributors must submit unmatched sound recordings β€” those with no matching
//! musical work β€” so that rights holders can claim them.
//!
//! Reference: https://www.themlc.com/durp
//! DDEX DURP 1.0 XML schema (published by The MLC, 2021)
//!
//! This module:
//! 1. Generates DURP-format CSV submission files per MLC specification.
//! 2. Validates that all required fields are present and correctly formatted.
//! 3. Submits CSV to the MLC SFTP drop (or S3 gateway in cloud mode).
//! 4. Parses MLC acknowledgement files and updates track status.
//!
//! LangSec: all cells sanitised via langsec::sanitise_csv_cell().
//! Security: SFTP credentials from environment variables only.
use crate::langsec;
use serde::{Deserialize, Serialize};
use tracing::{info, instrument, warn};
// ── DURP Record ───────────────────────────────────────────────────────────────
/// A single DURP submission record (one row in the CSV).
/// Field names follow MLC DURP CSV Template v1.2.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DurpRecord {
/// ISRC (required).
pub isrc: String,
/// Track title (required).
pub track_title: String,
/// Primary artist name (required).
pub primary_artist: String,
/// Featured artists, comma-separated (optional).
pub featured_artists: Option<String>,
/// Release title / album name (optional).
pub release_title: Option<String>,
/// UPC/EAN of the release (optional).
pub upc: Option<String>,
/// Catalogue number (optional).
pub catalogue_number: Option<String>,
/// Label name (required).
pub label_name: String,
/// Release date YYYY-MM-DD (optional).
pub release_date: Option<String>,
/// Duration MM:SS (optional).
pub duration: Option<String>,
/// Distributor name (required).
pub distributor_name: String,
/// Distributor identifier (required β€” your DDEX party ID).
pub distributor_id: String,
/// BTFS CID of the audio (Retrosync-specific, mapped to a custom column).
pub btfs_cid: Option<String>,
/// Wikidata QID (Retrosync-specific metadata enrichment).
pub wikidata_qid: Option<String>,
/// Internal submission reference (UUID).
pub submission_ref: String,
}
/// DURP submission status.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub enum DurpStatus {
Pending,
Submitted,
Acknowledged,
Matched,
Rejected,
}
/// DURP submission batch.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DurpSubmission {
pub batch_id: String,
pub records: Vec<DurpRecord>,
pub status: DurpStatus,
pub submitted_at: Option<String>,
pub ack_file: Option<String>,
pub error: Option<String>,
}
// ── Validation ────────────────────────────────────────────────────────────────
/// Validation error for a DURP record.
#[derive(Debug, Clone, Serialize)]
pub struct DurpValidationError {
pub record_index: usize,
pub field: String,
pub reason: String,
}
/// Validate a batch of DURP records before CSV generation.
/// Returns a list of validation errors (empty = valid).
pub fn validate_records(records: &[DurpRecord]) -> Vec<DurpValidationError> {
let mut errors = Vec::new();
for (idx, rec) in records.iter().enumerate() {
// ISRC format check (delegated to shared parsers)
if let Err(e) = shared::parsers::recognize_isrc(&rec.isrc) {
errors.push(DurpValidationError {
record_index: idx,
field: "isrc".into(),
reason: e.to_string(),
});
}
// Required fields non-empty
for (field, val) in [
("track_title", &rec.track_title),
("primary_artist", &rec.primary_artist),
("label_name", &rec.label_name),
("distributor_name", &rec.distributor_name),
("distributor_id", &rec.distributor_id),
] {
if val.trim().is_empty() {
errors.push(DurpValidationError {
record_index: idx,
field: field.into(),
reason: "required field is empty".into(),
});
}
}
// Free-text field validation
for (field, val) in [
("track_title", &rec.track_title),
("primary_artist", &rec.primary_artist),
] {
if let Err(e) = langsec::validate_free_text(val, field, 500) {
errors.push(DurpValidationError {
record_index: idx,
field: field.into(),
reason: e.reason,
});
}
}
// Duration format MM:SS if present
if let Some(dur) = &rec.duration {
if !is_valid_duration(dur) {
errors.push(DurpValidationError {
record_index: idx,
field: "duration".into(),
reason: "must be MM:SS or M:SS (0:00–99:59)".into(),
});
}
}
// Release date YYYY-MM-DD if present
if let Some(date) = &rec.release_date {
if !is_valid_date(date) {
errors.push(DurpValidationError {
record_index: idx,
field: "release_date".into(),
reason: "must be YYYY-MM-DD".into(),
});
}
}
}
errors
}
fn is_valid_duration(s: &str) -> bool {
let parts: Vec<&str> = s.split(':').collect();
if parts.len() != 2 {
return false;
}
let mins_ok = parts[0].len() <= 2 && parts[0].chars().all(|c| c.is_ascii_digit());
let secs_ok = parts[1].len() == 2 && parts[1].chars().all(|c| c.is_ascii_digit());
if !mins_ok || !secs_ok {
return false;
}
let secs: u8 = parts[1].parse().unwrap_or(60);
secs < 60
}
fn is_valid_date(s: &str) -> bool {
if s.len() != 10 {
return false;
}
let parts: Vec<&str> = s.split('-').collect();
parts.len() == 3
&& parts[0].len() == 4
&& parts[1].len() == 2
&& parts[2].len() == 2
&& parts.iter().all(|p| p.chars().all(|c| c.is_ascii_digit()))
}
// ── CSV generation ────────────────────────────────────────────────────────────
/// CSV column headers per MLC DURP Template v1.2 + Retrosync extensions.
const DURP_HEADERS: &[&str] = &[
"ISRC",
"Track Title",
"Primary Artist",
"Featured Artists",
"Release Title",
"UPC",
"Catalogue Number",
"Label Name",
"Release Date",
"Duration",
"Distributor Name",
"Distributor ID",
"BTFS CID",
"Wikidata QID",
"Submission Reference",
];
/// Generate a DURP-format CSV string from a slice of validated records.
///
/// RFC 4180 CSV:
/// - CRLF line endings
/// - Fields with commas, quotes, or newlines wrapped in double-quotes
/// - Embedded double-quotes escaped as ""
pub fn generate_csv(records: &[DurpRecord]) -> String {
let mut lines: Vec<String> = Vec::with_capacity(records.len() + 1);
// Header row
lines.push(DURP_HEADERS.join(","));
for rec in records {
let row = vec![
csv_field(&rec.isrc),
csv_field(&rec.track_title),
csv_field(&rec.primary_artist),
csv_field(rec.featured_artists.as_deref().unwrap_or("")),
csv_field(rec.release_title.as_deref().unwrap_or("")),
csv_field(rec.upc.as_deref().unwrap_or("")),
csv_field(rec.catalogue_number.as_deref().unwrap_or("")),
csv_field(&rec.label_name),
csv_field(rec.release_date.as_deref().unwrap_or("")),
csv_field(rec.duration.as_deref().unwrap_or("")),
csv_field(&rec.distributor_name),
csv_field(&rec.distributor_id),
csv_field(rec.btfs_cid.as_deref().unwrap_or("")),
csv_field(rec.wikidata_qid.as_deref().unwrap_or("")),
csv_field(&rec.submission_ref),
];
lines.push(row.join(","));
}
// RFC 4180: CRLF line endings
lines.join("\r\n") + "\r\n"
}
/// Format a single CSV field per RFC 4180.
fn csv_field(value: &str) -> String {
// LangSec: sanitise before embedding in CSV
let sanitised = langsec::sanitise_csv_cell(value);
if sanitised.contains(',') || sanitised.contains('"') || sanitised.contains('\n') {
format!("\"{}\"", sanitised.replace('"', "\"\""))
} else {
sanitised
}
}
// ── Submission config ─────────────────────────────────────────────────────────
#[derive(Clone)]
pub struct DurpConfig {
/// MLC SFTP host (e.g. sftp.themlc.com).
pub sftp_host: Option<String>,
/// Distributor DDEX party ID (e.g. PADPIDA2024RETROSYNC01).
pub distributor_id: String,
pub distributor_name: String,
pub enabled: bool,
pub dev_mode: bool,
}
impl DurpConfig {
pub fn from_env() -> Self {
Self {
sftp_host: std::env::var("MLC_SFTP_HOST").ok(),
distributor_id: std::env::var("DDEX_PARTY_ID")
.unwrap_or_else(|_| "PADPIDA-RETROSYNC".into()),
distributor_name: std::env::var("DISTRIBUTOR_NAME")
.unwrap_or_else(|_| "Retrosync Media Group".into()),
enabled: std::env::var("DURP_ENABLED").unwrap_or_default() == "1",
dev_mode: std::env::var("DURP_DEV_MODE").unwrap_or_default() == "1",
}
}
}
/// Build a DurpRecord from a track upload.
pub fn build_record(
config: &DurpConfig,
isrc: &str,
title: &str,
artist: &str,
label: &str,
btfs_cid: Option<&str>,
wikidata_qid: Option<&str>,
) -> DurpRecord {
DurpRecord {
isrc: isrc.to_string(),
track_title: title.to_string(),
primary_artist: artist.to_string(),
featured_artists: None,
release_title: None,
upc: None,
catalogue_number: None,
label_name: label.to_string(),
release_date: None,
duration: None,
distributor_name: config.distributor_name.clone(),
distributor_id: config.distributor_id.clone(),
btfs_cid: btfs_cid.map(String::from),
wikidata_qid: wikidata_qid.map(String::from),
submission_ref: generate_submission_ref(),
}
}
/// Submit a DURP CSV batch (dev mode: log only).
#[instrument(skip(config, csv))]
pub async fn submit_batch(
config: &DurpConfig,
batch_id: &str,
csv: &str,
) -> anyhow::Result<DurpSubmission> {
if config.dev_mode {
info!(batch_id=%batch_id, rows=csv.lines().count()-1, "DURP dev-mode: stub submission");
return Ok(DurpSubmission {
batch_id: batch_id.to_string(),
records: vec![],
status: DurpStatus::Submitted,
submitted_at: Some(chrono::Utc::now().to_rfc3339()),
ack_file: None,
error: None,
});
}
if !config.enabled {
warn!("DURP submission skipped β€” set DURP_ENABLED=1 and MLC_SFTP_HOST");
return Ok(DurpSubmission {
batch_id: batch_id.to_string(),
records: vec![],
status: DurpStatus::Pending,
submitted_at: None,
ack_file: None,
error: Some("DURP not enabled".into()),
});
}
// Production: upload CSV to MLC SFTP.
// Requires SFTP client (ssh2 crate) β€” integrate separately.
// For now, report submission pending for operator follow-up.
warn!(
batch_id=%batch_id,
"DURP production SFTP submission requires MLC credentials β€” \
save CSV locally and upload via MLC portal"
);
Ok(DurpSubmission {
batch_id: batch_id.to_string(),
records: vec![],
status: DurpStatus::Pending,
submitted_at: None,
ack_file: None,
error: Some("SFTP upload not yet connected β€” use MLC portal".into()),
})
}
fn generate_submission_ref() -> String {
use std::time::{SystemTime, UNIX_EPOCH};
let t = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_nanos();
format!("RTSY-{:016x}", t & 0xFFFFFFFFFFFFFFFF)
}
#[cfg(test)]
mod tests {
use super::*;
fn sample_record() -> DurpRecord {
DurpRecord {
isrc: "US-S1Z-99-00001".to_string(),
track_title: "Test Track".to_string(),
primary_artist: "Test Artist".to_string(),
featured_artists: None,
release_title: Some("Test Album".to_string()),
upc: None,
catalogue_number: None,
label_name: "Test Label".to_string(),
release_date: Some("2024-01-15".to_string()),
duration: Some("3:45".to_string()),
distributor_name: "Retrosync".to_string(),
distributor_id: "PADPIDA-TEST".to_string(),
btfs_cid: None,
wikidata_qid: None,
submission_ref: "RTSY-test".to_string(),
}
}
#[test]
fn csv_generation() {
let records = vec![sample_record()];
let csv = generate_csv(&records);
assert!(csv.contains("US-S1Z-99-00001"));
assert!(csv.contains("Test Track"));
assert!(csv.ends_with("\r\n"));
}
#[test]
fn validation_passes() {
let records = vec![sample_record()];
let errs = validate_records(&records);
assert!(errs.is_empty(), "{errs:?}");
}
#[test]
fn validation_catches_bad_isrc() {
let mut r = sample_record();
r.isrc = "INVALID".to_string();
let errs = validate_records(&[r]);
assert!(errs.iter().any(|e| e.field == "isrc"));
}
#[test]
fn csv_injection_sanitised() {
let mut r = sample_record();
r.track_title = "=SUM(A1:B1)".to_string();
let csv = generate_csv(&[r]);
assert!(!csv.contains("=SUM"));
}
#[test]
fn duration_validation() {
assert!(is_valid_duration("3:45"));
assert!(is_valid_duration("10:00"));
assert!(!is_valid_duration("3:60"));
assert!(!is_valid_duration("invalid"));
}
}