colab-user commited on
Commit
7658895
·
1 Parent(s): 27f5fbd

Fix format download & transcribe segments

Browse files
app/api/routes.py CHANGED
@@ -4,6 +4,7 @@ API routes for the transcription service.
4
  import logging
5
  import time
6
  from pathlib import Path
 
7
 
8
  from fastapi import APIRouter, UploadFile, File, HTTPException, BackgroundTasks, Form
9
  from fastapi.responses import FileResponse
@@ -44,7 +45,7 @@ async def get_models():
44
  async def transcribe_audio(
45
  background_tasks: BackgroundTasks,
46
  file: UploadFile = File(..., description="Audio file to transcribe"),
47
- model: str = Form(default="EraX-WoW-Turbo", description="Whisper model to use"),
48
  language: str = Form(default="vi", description="Language code"),
49
  merge_segments: bool = Form(default=True, description="Merge consecutive speaker segments"),
50
  ):
@@ -81,14 +82,27 @@ async def transcribe_audio(
81
  )
82
 
83
  # Save output files
84
- txt_filename = f"{upload_path.stem}_transcript.txt"
85
- srt_filename = f"{upload_path.stem}_transcript.srt"
86
-
87
  txt_path = settings.processed_dir / txt_filename
88
- srt_path = settings.processed_dir / srt_filename
89
-
90
  txt_path.write_text(result.txt_content, encoding="utf-8")
91
- srt_path.write_text(result.srt_content, encoding="utf-8")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
 
93
  # Schedule cleanup
94
  background_tasks.add_task(cleanup_files, upload_path)
@@ -109,7 +123,7 @@ async def transcribe_audio(
109
  duration=result.duration,
110
  processing_time=result.processing_time,
111
  download_txt=f"/api/download/{txt_filename}",
112
- download_srt=f"/api/download/{srt_filename}",
113
  )
114
 
115
  except HTTPException:
@@ -128,7 +142,7 @@ async def download_file(filename: str):
128
  Supports: .txt, .srt files
129
  """
130
  # Security: only allow specific extensions and no path traversal
131
- if not filename.endswith(('.txt', '.srt')) or '/' in filename or '..' in filename:
132
  raise HTTPException(status_code=400, detail="Invalid filename")
133
 
134
  filepath = settings.processed_dir / filename
@@ -137,7 +151,7 @@ async def download_file(filename: str):
137
  raise HTTPException(status_code=404, detail="File not found")
138
 
139
  # Determine media type
140
- media_type = "text/plain" if filename.endswith('.txt') else "application/x-subrip"
141
 
142
  return FileResponse(
143
  path=filepath,
 
4
  import logging
5
  import time
6
  from pathlib import Path
7
+ import csv
8
 
9
  from fastapi import APIRouter, UploadFile, File, HTTPException, BackgroundTasks, Form
10
  from fastapi.responses import FileResponse
 
45
  async def transcribe_audio(
46
  background_tasks: BackgroundTasks,
47
  file: UploadFile = File(..., description="Audio file to transcribe"),
48
+ model: str = Form(default="PhoWhisper Large", description="Whisper model to use"),
49
  language: str = Form(default="vi", description="Language code"),
50
  merge_segments: bool = Form(default=True, description="Merge consecutive speaker segments"),
51
  ):
 
82
  )
83
 
84
  # Save output files
85
+ txt_filename = f"{upload_path.stem}_transcript.txt"
 
 
86
  txt_path = settings.processed_dir / txt_filename
 
 
87
  txt_path.write_text(result.txt_content, encoding="utf-8")
88
+
89
+ csv_filename = f"{upload_path.stem}_transcript.csv"
90
+ csv_path = settings.processed_dir / csv_filename
91
+
92
+ with csv_path.open("w", newline="", encoding="utf-8") as f:
93
+ writer = csv.DictWriter(
94
+ f,
95
+ fieldnames=["start", "end", "speaker", "text"]
96
+ )
97
+ writer.writeheader()
98
+ for seg in result.segments:
99
+ writer.writerow({
100
+ "start": round(seg.start, 2),
101
+ "end": round(seg.end, 2),
102
+ "speaker": seg.speaker,
103
+ "text": seg.text
104
+ })
105
+
106
 
107
  # Schedule cleanup
108
  background_tasks.add_task(cleanup_files, upload_path)
 
123
  duration=result.duration,
124
  processing_time=result.processing_time,
125
  download_txt=f"/api/download/{txt_filename}",
126
+ download_csv=f"/api/download/{csv_filename}",
127
  )
128
 
129
  except HTTPException:
 
142
  Supports: .txt, .srt files
143
  """
144
  # Security: only allow specific extensions and no path traversal
145
+ if not filename.endswith(('.txt', '.csv')) or '/' in filename or '..' in filename:
146
  raise HTTPException(status_code=400, detail="Invalid filename")
147
 
148
  filepath = settings.processed_dir / filename
 
151
  raise HTTPException(status_code=404, detail="File not found")
152
 
153
  # Determine media type
154
+ media_type = "text/csv" if filename.endswith('.txt') else "application/x-subrip"
155
 
156
  return FileResponse(
157
  path=filepath,
app/schemas/models.py CHANGED
@@ -56,7 +56,7 @@ class TranscriptionResponse(BaseModel):
56
  speaker_count: int = Field(default=0, description="Number of detected speakers")
57
  processing_time: float = Field(default=0.0, description="Processing time in seconds")
58
  download_txt: Optional[str] = Field(default=None, description="Download URL for TXT file")
59
- download_srt: Optional[str] = Field(default=None, description="Download URL for SRT file")
60
 
61
 
62
  class ErrorResponse(BaseModel):
 
56
  speaker_count: int = Field(default=0, description="Number of detected speakers")
57
  processing_time: float = Field(default=0.0, description="Processing time in seconds")
58
  download_txt: Optional[str] = Field(default=None, description="Download URL for TXT file")
59
+ download_csv: Optional[str] = Field(default=None, description="Download URL for CSV file")
60
 
61
 
62
  class ErrorResponse(BaseModel):
app/services/processor.py CHANGED
@@ -39,7 +39,7 @@ class ProcessingResult:
39
 
40
  # Output files
41
  txt_content: str = ""
42
- srt_content: str = ""
43
 
44
 
45
  def convert_audio_to_wav(audio_path: Path) -> Path:
@@ -150,7 +150,7 @@ class Processor:
150
  async def process_audio(
151
  cls,
152
  audio_path: Path,
153
- model_name: str = "EraX-WoW-Turbo",
154
  language: str = "vi",
155
  merge_segments: bool = True,
156
  # VAD options
@@ -275,7 +275,7 @@ class Processor:
275
 
276
  # Step 6: Generate outputs
277
  txt_content = cls._generate_txt(processed_segments, unique_speakers, processing_time, duration)
278
- srt_content = cls._generate_srt(processed_segments)
279
 
280
  # Cleanup WAV if different from original
281
  if wav_path != audio_path and wav_path.exists():
@@ -290,7 +290,7 @@ class Processor:
290
  duration=duration,
291
  processing_time=processing_time,
292
  txt_content=txt_content,
293
- srt_content=srt_content
294
  )
295
 
296
  @classmethod
 
39
 
40
  # Output files
41
  txt_content: str = ""
42
+ csv_content: str = ""
43
 
44
 
45
  def convert_audio_to_wav(audio_path: Path) -> Path:
 
150
  async def process_audio(
151
  cls,
152
  audio_path: Path,
153
+ model_name: str = "PhoWhisper Large",
154
  language: str = "vi",
155
  merge_segments: bool = True,
156
  # VAD options
 
275
 
276
  # Step 6: Generate outputs
277
  txt_content = cls._generate_txt(processed_segments, unique_speakers, processing_time, duration)
278
+ csv_content = cls._generate_csv(processed_segments)
279
 
280
  # Cleanup WAV if different from original
281
  if wav_path != audio_path and wav_path.exists():
 
290
  duration=duration,
291
  processing_time=processing_time,
292
  txt_content=txt_content,
293
+ csv_content=csv_content
294
  )
295
 
296
  @classmethod