Spaces:
Sleeping
Sleeping
colab-user commited on
Commit ·
97bec3e
1
Parent(s): 513d906
fix processor & UI
Browse files- app/services/processor.py +22 -19
app/services/processor.py
CHANGED
|
@@ -194,24 +194,29 @@ class Processor:
|
|
| 194 |
}
|
| 195 |
|
| 196 |
speakers = list(speaker_map.values())
|
| 197 |
-
speaker_count = len(speakers)
|
| 198 |
|
| 199 |
|
| 200 |
# 6. Infer role ONLY if diarization did not provide
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
speaker_duration[seg.speaker] += seg.end - seg.start
|
| 205 |
-
|
| 206 |
-
agent_raw = max(speaker_duration, key=speaker_duration.get)
|
| 207 |
|
|
|
|
| 208 |
roles = {
|
| 209 |
-
speaker_map
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 210 |
for spk in speaker_duration
|
| 211 |
}
|
| 212 |
|
| 213 |
-
for
|
| 214 |
-
roles.setdefault(
|
|
|
|
| 215 |
|
| 216 |
|
| 217 |
# 7: Transcribe
|
|
@@ -255,14 +260,14 @@ class Processor:
|
|
| 255 |
if not text or not text.strip():
|
| 256 |
continue
|
| 257 |
|
| 258 |
-
|
| 259 |
|
| 260 |
processed_segments.append(
|
| 261 |
TranscriptSegment(
|
| 262 |
start=seg.start,
|
| 263 |
end=seg.end,
|
| 264 |
-
speaker=
|
| 265 |
-
role=roles.get(
|
| 266 |
text=text.strip(),
|
| 267 |
)
|
| 268 |
)
|
|
@@ -273,21 +278,19 @@ class Processor:
|
|
| 273 |
start=0.0,
|
| 274 |
end=duration,
|
| 275 |
speaker=speakers[0],
|
| 276 |
-
role=roles
|
| 277 |
text="(No speech detected)"
|
| 278 |
)
|
| 279 |
]
|
| 280 |
-
speakers = ["Speaker 1"]
|
| 281 |
-
roles = {"Speaker 1": "UNKNOWN"}
|
| 282 |
-
speaker_count = 1
|
| 283 |
|
| 284 |
processing_time = time.time() - t0
|
| 285 |
-
|
|
|
|
| 286 |
txt_content = cls._generate_txt(
|
| 287 |
processed_segments,
|
| 288 |
speaker_count,
|
| 289 |
-
duration,
|
| 290 |
processing_time,
|
|
|
|
| 291 |
roles
|
| 292 |
)
|
| 293 |
|
|
|
|
| 194 |
}
|
| 195 |
|
| 196 |
speakers = list(speaker_map.values())
|
|
|
|
| 197 |
|
| 198 |
|
| 199 |
# 6. Infer role ONLY if diarization did not provide
|
| 200 |
+
speaker_duration = defaultdict(float)
|
| 201 |
+
for seg in refined_segments:
|
| 202 |
+
speaker_duration[speaker_map[seg.speaker]] += seg.end - seg.start
|
|
|
|
|
|
|
|
|
|
| 203 |
|
| 204 |
+
if roles:
|
| 205 |
roles = {
|
| 206 |
+
speaker_map.get(raw, raw): role
|
| 207 |
+
for raw, role in roles.items()
|
| 208 |
+
if raw in speaker_map
|
| 209 |
+
}
|
| 210 |
+
else:
|
| 211 |
+
agent = max(speaker_duration, key=speaker_duration.get)
|
| 212 |
+
roles = {
|
| 213 |
+
spk: ("NV" if spk == agent else "KH")
|
| 214 |
for spk in speaker_duration
|
| 215 |
}
|
| 216 |
|
| 217 |
+
for spk in speakers:
|
| 218 |
+
roles.setdefault(spk, "KH")
|
| 219 |
+
|
| 220 |
|
| 221 |
|
| 222 |
# 7: Transcribe
|
|
|
|
| 260 |
if not text or not text.strip():
|
| 261 |
continue
|
| 262 |
|
| 263 |
+
label = speaker_map[seg.speaker]
|
| 264 |
|
| 265 |
processed_segments.append(
|
| 266 |
TranscriptSegment(
|
| 267 |
start=seg.start,
|
| 268 |
end=seg.end,
|
| 269 |
+
speaker=label,
|
| 270 |
+
role=roles.get(label),
|
| 271 |
text=text.strip(),
|
| 272 |
)
|
| 273 |
)
|
|
|
|
| 278 |
start=0.0,
|
| 279 |
end=duration,
|
| 280 |
speaker=speakers[0],
|
| 281 |
+
role=roles.get(speakers[0], "UNKNOWN"),
|
| 282 |
text="(No speech detected)"
|
| 283 |
)
|
| 284 |
]
|
|
|
|
|
|
|
|
|
|
| 285 |
|
| 286 |
processing_time = time.time() - t0
|
| 287 |
+
speaker_count=len(speakers)
|
| 288 |
+
|
| 289 |
txt_content = cls._generate_txt(
|
| 290 |
processed_segments,
|
| 291 |
speaker_count,
|
|
|
|
| 292 |
processing_time,
|
| 293 |
+
duration,
|
| 294 |
roles
|
| 295 |
)
|
| 296 |
|