Clearwave48 commited on
Commit
c42513e
Β·
verified Β·
1 Parent(s): 5b73e2d

Create denoiser.py

Browse files
Files changed (1) hide show
  1. denoiser.py +143 -0
denoiser.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ClearWave β€” Denoiser (Cleanvoice SDK edition)
3
+ ================================================
4
+ βœ… Pre-converts any format (.opus, .aac, .m4a, .mp3, .wav) to standard WAV via ffmpeg
5
+ βœ… Sends to Cleanvoice API (noise, fillers, stutters, silences, breaths, mouth sounds)
6
+ βœ… Downloads and returns the enhanced audio
7
+ βœ… clean_transcript_fillers() β€” removes filler words from transcript text
8
+
9
+ API key is read from HF Space secret: CLEANVOICE_API_KEY
10
+ """
11
+
12
+ import os
13
+ import re
14
+ import logging
15
+ import subprocess
16
+ import requests
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+ # Filler words (English + Telugu + Hindi) β€” used for transcript text cleaning
21
+ FILLER_WORDS = {
22
+ "um", "umm", "ummm", "uh", "uhh", "uhhh",
23
+ "hmm", "hm", "hmmm",
24
+ "er", "err", "errr",
25
+ "eh", "ahh", "ah",
26
+ "like", "basically", "literally",
27
+ "you know", "i mean", "so",
28
+ "right", "okay", "ok",
29
+ # Telugu
30
+ "ante", "ane", "mane", "arey", "enti",
31
+ # Hindi
32
+ "matlab", "yani", "bas", "acha",
33
+ }
34
+
35
+
36
+ class Denoiser:
37
+ def __init__(self):
38
+ self.api_key = os.environ.get("CLEANVOICE_API_KEY", "")
39
+ if not self.api_key:
40
+ logger.warning("[Denoiser] ⚠️ CLEANVOICE_API_KEY not set β€” enhancement will fail")
41
+ else:
42
+ print("[Denoiser] ☁️ Cleanvoice SDK ready (WhatsApp/any-format support active)")
43
+
44
+ # ══════════════════════════════════════════════════════════════════
45
+ # MAIN ENTRY POINT
46
+ # ══════════════════════════════════════════════════════════════════
47
+ def process(self, audio_path: str, out_dir: str,
48
+ fillers: bool = True,
49
+ stutters: bool = True,
50
+ long_silences: bool = True,
51
+ breaths: bool = True,
52
+ mouth_sounds: bool = True,
53
+ **kwargs) -> dict:
54
+ """
55
+ Full Cleanvoice enhancement pipeline.
56
+
57
+ Steps:
58
+ A. Pre-convert input to 16kHz mono WAV (handles .opus, .aac, .m4a, etc.)
59
+ B. Send to Cleanvoice API via SDK
60
+ C. Download processed audio
61
+ D. Return {'audio_path': str}
62
+
63
+ Raises on failure so run_pipeline() in main.py can catch and report.
64
+ """
65
+ if not self.api_key:
66
+ raise RuntimeError("CLEANVOICE_API_KEY is not set in HF Space secrets.")
67
+
68
+ # ── Step A: Pre-convert to standard WAV ─────────────────────────────
69
+ standard_input = os.path.join(out_dir, "input_converted.wav")
70
+ result = subprocess.run([
71
+ "ffmpeg", "-y", "-i", audio_path,
72
+ "-ar", "16000", "-ac", "1", standard_input
73
+ ], capture_output=True)
74
+
75
+ if result.returncode != 0:
76
+ # ffmpeg failed β€” try passing original path directly
77
+ logger.warning(f"[Denoiser] ffmpeg pre-convert failed, using original file: "
78
+ f"{result.stderr.decode(errors='replace')[-200:]}")
79
+ standard_input = audio_path
80
+
81
+ # ── Step B: Process via Cleanvoice SDK ──────────────────────────────
82
+ try:
83
+ from cleanvoice import Cleanvoice
84
+ cv = Cleanvoice({"api_key": self.api_key})
85
+
86
+ logger.info("[Denoiser] Submitting to Cleanvoice API...")
87
+ cv_result = cv.process(
88
+ standard_input,
89
+ remove_noise=True,
90
+ studio_sound=True,
91
+ remove_filler_words=fillers,
92
+ remove_stutters=stutters,
93
+ remove_silence=long_silences,
94
+ remove_breathing=breaths,
95
+ remove_mouth_sounds=mouth_sounds,
96
+ )
97
+
98
+ # ── Step C: Download processed audio ────────────────────────────
99
+ download_url = cv_result.audio.url
100
+ logger.info(f"[Denoiser] Downloading result from {download_url[:60]}...")
101
+ audio_data = requests.get(download_url, timeout=120).content
102
+
103
+ final_wav = os.path.join(out_dir, "clean_output.wav")
104
+ with open(final_wav, "wb") as f:
105
+ f.write(audio_data)
106
+
107
+ logger.info(f"[Denoiser] βœ… Enhanced audio saved β†’ {final_wav}")
108
+ return {"audio_path": final_wav}
109
+
110
+ except Exception as e:
111
+ logger.error(f"[Denoiser] Cleanvoice SDK error: {e}")
112
+ raise RuntimeError(f"Cleanvoice enhancement failed: {e}") from e
113
+
114
+ # ══════════════════════════════════════════════════════════════════
115
+ # TRANSCRIPT FILLER CLEANER
116
+ # ══════════════════════════════════════════════════════════════════
117
+ def clean_transcript_fillers(self, transcript: str) -> str:
118
+ """
119
+ Remove filler words from transcript TEXT to match the cleaned audio.
120
+ Handles both single-word fillers ("um", "like") and
121
+ two-word fillers ("you know", "i mean").
122
+ """
123
+ words = transcript.split()
124
+ result = []
125
+ i = 0
126
+ while i < len(words):
127
+ w = re.sub(r'[^a-z\s]', '', words[i].lower()).strip()
128
+
129
+ # Check two-word filler first
130
+ if i + 1 < len(words):
131
+ two = w + " " + re.sub(r'[^a-z\s]', '', words[i + 1].lower()).strip()
132
+ if two in FILLER_WORDS:
133
+ i += 2
134
+ continue
135
+
136
+ if w in FILLER_WORDS:
137
+ i += 1
138
+ continue
139
+
140
+ result.append(words[i])
141
+ i += 1
142
+
143
+ return " ".join(result)