Spaces:
Sleeping
Sleeping
| from .celery_app import celery_app | |
| from ..services.stt_service import STTService | |
| from ..services.nlp_service import NLPService | |
| from ..models import SessionLocal, Transcript, AudioFile | |
| from ..core.config import get_settings | |
| import logging | |
| logger = logging.getLogger(__name__) | |
| settings = get_settings() | |
| def process_audio_file(audio_file_id: int): | |
| """ | |
| Background task to transcribe audio | |
| """ | |
| db = SessionLocal() | |
| try: | |
| # Retrieve audio file record | |
| audio_file = db.query(AudioFile).filter(AudioFile.id == audio_file_id).first() | |
| if not audio_file: | |
| logger.error(f"AudioFile {audio_file_id} not found") | |
| return | |
| audio_file.status = "processing" | |
| db.commit() | |
| # Initialize Service | |
| stt_service = STTService() | |
| # Transcribe | |
| result = stt_service.transcribe_file( | |
| audio_path=audio_file.storage_path, | |
| language=audio_file.language, | |
| enable_automatic_punctuation=True, | |
| enable_word_time_offsets=True, | |
| enable_speaker_diarization=True # Defaulting to True for background tasks | |
| ) | |
| # Create Transcript | |
| transcript = Transcript( | |
| audio_file_id=audio_file.id, | |
| user_id=audio_file.user_id, # Assuming we add user_id to AudioFile | |
| raw_text=result.text, | |
| processed_text=result.text, | |
| segments=[s.model_dump() for s in result.segments] if result.segments else [], | |
| words=[w.model_dump() for w in result.words] if result.words else [], | |
| language=result.language, | |
| confidence=result.confidence, | |
| duration=result.duration, | |
| word_count=result.word_count | |
| ) | |
| db.add(transcript) | |
| audio_file.status = "completed" | |
| db.commit() | |
| except Exception as e: | |
| logger.error(f"Transcription failed: {e}") | |
| audio_file.status = "failed" | |
| db.commit() | |
| finally: | |
| db.close() | |
| def analyze_transcript_background(transcript_id: int): | |
| """ | |
| Background task for NLP analysis | |
| """ | |
| db = SessionLocal() | |
| try: | |
| transcript = db.query(Transcript).filter(Transcript.id == transcript_id).first() | |
| if not transcript: | |
| return | |
| nlp_service = NLPService() | |
| analysis = nlp_service.process_transcript(transcript.processed_text) | |
| transcript.sentiment = analysis["sentiment"] | |
| transcript.topics = {"keywords": analysis["keywords"]} | |
| transcript.summary = analysis["summary"] | |
| db.commit() | |
| except Exception as e: | |
| logger.error(f"Analysis failed: {e}") | |
| finally: | |
| db.close() | |
| def transcribe_file_path(file_path: str, language: str = None, output_format: str = "txt") -> dict: | |
| """ | |
| Generic task to transcribe a file path directly (for Batch Service) | |
| """ | |
| try: | |
| stt_service = STTService() | |
| result = stt_service.transcribe_file( | |
| audio_path=file_path, | |
| language=language, | |
| enable_word_timestamps=True | |
| ) | |
| return { | |
| "text": result.text, | |
| "language": result.language, | |
| "duration": result.duration, | |
| "segments": [s.dict() for s in result.segments] if result.segments else [] | |
| } | |
| except Exception as e: | |
| logger.error(f"Task failed: {e}") | |
| raise e | |