File size: 2,788 Bytes
d7efa84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
"""Skip tracking persistence layer."""

import os
import csv
import tempfile
import logging
from datetime import datetime
from pathlib import Path

from filelock import FileLock, Timeout

logger = logging.getLogger(__name__)

SKIP_CSV_COLUMNS = ["labeler", "source", "reason", "timestamp"]


def save_skip(labeler: str, source: str, reason: str, csv_path: str) -> None:
    """Save a skip record to the shared skipped_audios.csv.

    Uses filelock + temp-file-then-rename for atomicity.

    Args:
        labeler: Username of the labeler who skipped.
        source: Audio filename.
        reason: Reason for skipping.
        csv_path: Path to the shared skipped_audios.csv.

    Raises:
        IOError: If the write operation fails.
    """
    lock_path = csv_path + ".lock"
    lock = FileLock(lock_path, timeout=10)

    try:
        with lock:
            rows: list[dict] = []
            path = Path(csv_path)
            if path.exists():
                with open(path, "r", encoding="utf-8", newline="") as f:
                    reader = csv.DictReader(f)
                    rows = [row for row in reader]

            new_row = {
                "labeler": labeler,
                "source": source,
                "reason": reason,
                "timestamp": datetime.now().isoformat(),
            }

            # Upsert: overwrite if same labeler+source already skipped
            updated = False
            for i, row in enumerate(rows):
                if row["labeler"] == labeler and row["source"] == source:
                    rows[i] = new_row
                    updated = True
                    break

            if not updated:
                rows.append(new_row)

            # Atomic write
            dir_name = os.path.dirname(csv_path) or "."
            os.makedirs(dir_name, exist_ok=True)
            fd, tmp_path = tempfile.mkstemp(dir=dir_name, suffix=".tmp")
            try:
                with os.fdopen(fd, "w", encoding="utf-8", newline="") as f:
                    writer = csv.DictWriter(f, fieldnames=SKIP_CSV_COLUMNS)
                    writer.writeheader()
                    writer.writerows(rows)
                os.replace(tmp_path, csv_path)
                logger.info(f"Saved skip for '{source}' by '{labeler}': {reason}")
            except Exception:
                if os.path.exists(tmp_path):
                    os.unlink(tmp_path)
                raise

    except Timeout:
        logger.error(f"Lock timeout for skip CSV: {csv_path}")
        raise IOError("Failed to save skip record. Please try again.")
    except IOError:
        raise
    except Exception as e:
        logger.error(f"Failed to save skip for {source}: {e}")
        raise IOError("Failed to save skip record. Please try again.") from e