Spaces:
Sleeping
Sleeping
colab-user commited on
Commit ·
7658895
1
Parent(s): 27f5fbd
Fix format download & transcribe segments
Browse files- app/api/routes.py +24 -10
- app/schemas/models.py +1 -1
- app/services/processor.py +4 -4
app/api/routes.py
CHANGED
|
@@ -4,6 +4,7 @@ API routes for the transcription service.
|
|
| 4 |
import logging
|
| 5 |
import time
|
| 6 |
from pathlib import Path
|
|
|
|
| 7 |
|
| 8 |
from fastapi import APIRouter, UploadFile, File, HTTPException, BackgroundTasks, Form
|
| 9 |
from fastapi.responses import FileResponse
|
|
@@ -44,7 +45,7 @@ async def get_models():
|
|
| 44 |
async def transcribe_audio(
|
| 45 |
background_tasks: BackgroundTasks,
|
| 46 |
file: UploadFile = File(..., description="Audio file to transcribe"),
|
| 47 |
-
model: str = Form(default="
|
| 48 |
language: str = Form(default="vi", description="Language code"),
|
| 49 |
merge_segments: bool = Form(default=True, description="Merge consecutive speaker segments"),
|
| 50 |
):
|
|
@@ -81,14 +82,27 @@ async def transcribe_audio(
|
|
| 81 |
)
|
| 82 |
|
| 83 |
# Save output files
|
| 84 |
-
txt_filename = f"{upload_path.stem}_transcript.txt"
|
| 85 |
-
srt_filename = f"{upload_path.stem}_transcript.srt"
|
| 86 |
-
|
| 87 |
txt_path = settings.processed_dir / txt_filename
|
| 88 |
-
srt_path = settings.processed_dir / srt_filename
|
| 89 |
-
|
| 90 |
txt_path.write_text(result.txt_content, encoding="utf-8")
|
| 91 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
|
| 93 |
# Schedule cleanup
|
| 94 |
background_tasks.add_task(cleanup_files, upload_path)
|
|
@@ -109,7 +123,7 @@ async def transcribe_audio(
|
|
| 109 |
duration=result.duration,
|
| 110 |
processing_time=result.processing_time,
|
| 111 |
download_txt=f"/api/download/{txt_filename}",
|
| 112 |
-
|
| 113 |
)
|
| 114 |
|
| 115 |
except HTTPException:
|
|
@@ -128,7 +142,7 @@ async def download_file(filename: str):
|
|
| 128 |
Supports: .txt, .srt files
|
| 129 |
"""
|
| 130 |
# Security: only allow specific extensions and no path traversal
|
| 131 |
-
if not filename.endswith(('.txt', '.
|
| 132 |
raise HTTPException(status_code=400, detail="Invalid filename")
|
| 133 |
|
| 134 |
filepath = settings.processed_dir / filename
|
|
@@ -137,7 +151,7 @@ async def download_file(filename: str):
|
|
| 137 |
raise HTTPException(status_code=404, detail="File not found")
|
| 138 |
|
| 139 |
# Determine media type
|
| 140 |
-
media_type = "text/
|
| 141 |
|
| 142 |
return FileResponse(
|
| 143 |
path=filepath,
|
|
|
|
| 4 |
import logging
|
| 5 |
import time
|
| 6 |
from pathlib import Path
|
| 7 |
+
import csv
|
| 8 |
|
| 9 |
from fastapi import APIRouter, UploadFile, File, HTTPException, BackgroundTasks, Form
|
| 10 |
from fastapi.responses import FileResponse
|
|
|
|
| 45 |
async def transcribe_audio(
|
| 46 |
background_tasks: BackgroundTasks,
|
| 47 |
file: UploadFile = File(..., description="Audio file to transcribe"),
|
| 48 |
+
model: str = Form(default="PhoWhisper Large", description="Whisper model to use"),
|
| 49 |
language: str = Form(default="vi", description="Language code"),
|
| 50 |
merge_segments: bool = Form(default=True, description="Merge consecutive speaker segments"),
|
| 51 |
):
|
|
|
|
| 82 |
)
|
| 83 |
|
| 84 |
# Save output files
|
| 85 |
+
txt_filename = f"{upload_path.stem}_transcript.txt"
|
|
|
|
|
|
|
| 86 |
txt_path = settings.processed_dir / txt_filename
|
|
|
|
|
|
|
| 87 |
txt_path.write_text(result.txt_content, encoding="utf-8")
|
| 88 |
+
|
| 89 |
+
csv_filename = f"{upload_path.stem}_transcript.csv"
|
| 90 |
+
csv_path = settings.processed_dir / csv_filename
|
| 91 |
+
|
| 92 |
+
with csv_path.open("w", newline="", encoding="utf-8") as f:
|
| 93 |
+
writer = csv.DictWriter(
|
| 94 |
+
f,
|
| 95 |
+
fieldnames=["start", "end", "speaker", "text"]
|
| 96 |
+
)
|
| 97 |
+
writer.writeheader()
|
| 98 |
+
for seg in result.segments:
|
| 99 |
+
writer.writerow({
|
| 100 |
+
"start": round(seg.start, 2),
|
| 101 |
+
"end": round(seg.end, 2),
|
| 102 |
+
"speaker": seg.speaker,
|
| 103 |
+
"text": seg.text
|
| 104 |
+
})
|
| 105 |
+
|
| 106 |
|
| 107 |
# Schedule cleanup
|
| 108 |
background_tasks.add_task(cleanup_files, upload_path)
|
|
|
|
| 123 |
duration=result.duration,
|
| 124 |
processing_time=result.processing_time,
|
| 125 |
download_txt=f"/api/download/{txt_filename}",
|
| 126 |
+
download_csv=f"/api/download/{csv_filename}",
|
| 127 |
)
|
| 128 |
|
| 129 |
except HTTPException:
|
|
|
|
| 142 |
Supports: .txt, .srt files
|
| 143 |
"""
|
| 144 |
# Security: only allow specific extensions and no path traversal
|
| 145 |
+
if not filename.endswith(('.txt', '.csv')) or '/' in filename or '..' in filename:
|
| 146 |
raise HTTPException(status_code=400, detail="Invalid filename")
|
| 147 |
|
| 148 |
filepath = settings.processed_dir / filename
|
|
|
|
| 151 |
raise HTTPException(status_code=404, detail="File not found")
|
| 152 |
|
| 153 |
# Determine media type
|
| 154 |
+
media_type = "text/csv" if filename.endswith('.txt') else "application/x-subrip"
|
| 155 |
|
| 156 |
return FileResponse(
|
| 157 |
path=filepath,
|
app/schemas/models.py
CHANGED
|
@@ -56,7 +56,7 @@ class TranscriptionResponse(BaseModel):
|
|
| 56 |
speaker_count: int = Field(default=0, description="Number of detected speakers")
|
| 57 |
processing_time: float = Field(default=0.0, description="Processing time in seconds")
|
| 58 |
download_txt: Optional[str] = Field(default=None, description="Download URL for TXT file")
|
| 59 |
-
|
| 60 |
|
| 61 |
|
| 62 |
class ErrorResponse(BaseModel):
|
|
|
|
| 56 |
speaker_count: int = Field(default=0, description="Number of detected speakers")
|
| 57 |
processing_time: float = Field(default=0.0, description="Processing time in seconds")
|
| 58 |
download_txt: Optional[str] = Field(default=None, description="Download URL for TXT file")
|
| 59 |
+
download_csv: Optional[str] = Field(default=None, description="Download URL for CSV file")
|
| 60 |
|
| 61 |
|
| 62 |
class ErrorResponse(BaseModel):
|
app/services/processor.py
CHANGED
|
@@ -39,7 +39,7 @@ class ProcessingResult:
|
|
| 39 |
|
| 40 |
# Output files
|
| 41 |
txt_content: str = ""
|
| 42 |
-
|
| 43 |
|
| 44 |
|
| 45 |
def convert_audio_to_wav(audio_path: Path) -> Path:
|
|
@@ -150,7 +150,7 @@ class Processor:
|
|
| 150 |
async def process_audio(
|
| 151 |
cls,
|
| 152 |
audio_path: Path,
|
| 153 |
-
model_name: str = "
|
| 154 |
language: str = "vi",
|
| 155 |
merge_segments: bool = True,
|
| 156 |
# VAD options
|
|
@@ -275,7 +275,7 @@ class Processor:
|
|
| 275 |
|
| 276 |
# Step 6: Generate outputs
|
| 277 |
txt_content = cls._generate_txt(processed_segments, unique_speakers, processing_time, duration)
|
| 278 |
-
|
| 279 |
|
| 280 |
# Cleanup WAV if different from original
|
| 281 |
if wav_path != audio_path and wav_path.exists():
|
|
@@ -290,7 +290,7 @@ class Processor:
|
|
| 290 |
duration=duration,
|
| 291 |
processing_time=processing_time,
|
| 292 |
txt_content=txt_content,
|
| 293 |
-
|
| 294 |
)
|
| 295 |
|
| 296 |
@classmethod
|
|
|
|
| 39 |
|
| 40 |
# Output files
|
| 41 |
txt_content: str = ""
|
| 42 |
+
csv_content: str = ""
|
| 43 |
|
| 44 |
|
| 45 |
def convert_audio_to_wav(audio_path: Path) -> Path:
|
|
|
|
| 150 |
async def process_audio(
|
| 151 |
cls,
|
| 152 |
audio_path: Path,
|
| 153 |
+
model_name: str = "PhoWhisper Large",
|
| 154 |
language: str = "vi",
|
| 155 |
merge_segments: bool = True,
|
| 156 |
# VAD options
|
|
|
|
| 275 |
|
| 276 |
# Step 6: Generate outputs
|
| 277 |
txt_content = cls._generate_txt(processed_segments, unique_speakers, processing_time, duration)
|
| 278 |
+
csv_content = cls._generate_csv(processed_segments)
|
| 279 |
|
| 280 |
# Cleanup WAV if different from original
|
| 281 |
if wav_path != audio_path and wav_path.exists():
|
|
|
|
| 290 |
duration=duration,
|
| 291 |
processing_time=processing_time,
|
| 292 |
txt_content=txt_content,
|
| 293 |
+
csv_content=csv_content
|
| 294 |
)
|
| 295 |
|
| 296 |
@classmethod
|