from .celery_app import celery_app from ..services.stt_service import STTService from ..services.nlp_service import NLPService from ..models import SessionLocal, Transcript, AudioFile from ..core.config import get_settings import logging logger = logging.getLogger(__name__) settings = get_settings() @celery_app.task def process_audio_file(audio_file_id: int): """ Background task to transcribe audio """ db = SessionLocal() try: # Retrieve audio file record audio_file = db.query(AudioFile).filter(AudioFile.id == audio_file_id).first() if not audio_file: logger.error(f"AudioFile {audio_file_id} not found") return audio_file.status = "processing" db.commit() # Initialize Service stt_service = STTService() # Transcribe result = stt_service.transcribe_file( audio_path=audio_file.storage_path, language=audio_file.language, enable_automatic_punctuation=True, enable_word_time_offsets=True, enable_speaker_diarization=True # Defaulting to True for background tasks ) # Create Transcript transcript = Transcript( audio_file_id=audio_file.id, user_id=audio_file.user_id, # Assuming we add user_id to AudioFile raw_text=result.text, processed_text=result.text, segments=[s.model_dump() for s in result.segments] if result.segments else [], words=[w.model_dump() for w in result.words] if result.words else [], language=result.language, confidence=result.confidence, duration=result.duration, word_count=result.word_count ) db.add(transcript) audio_file.status = "completed" db.commit() except Exception as e: logger.error(f"Transcription failed: {e}") audio_file.status = "failed" db.commit() finally: db.close() @celery_app.task def analyze_transcript_background(transcript_id: int): """ Background task for NLP analysis """ db = SessionLocal() try: transcript = db.query(Transcript).filter(Transcript.id == transcript_id).first() if not transcript: return nlp_service = NLPService() analysis = nlp_service.process_transcript(transcript.processed_text) transcript.sentiment = analysis["sentiment"] transcript.topics = {"keywords": analysis["keywords"]} transcript.summary = analysis["summary"] db.commit() except Exception as e: logger.error(f"Analysis failed: {e}") finally: db.close() @celery_app.task def transcribe_file_path(file_path: str, language: str = None, output_format: str = "txt") -> dict: """ Generic task to transcribe a file path directly (for Batch Service) """ try: stt_service = STTService() result = stt_service.transcribe_file( audio_path=file_path, language=language, enable_word_timestamps=True ) return { "text": result.text, "language": result.language, "duration": result.duration, "segments": [s.dict() for s in result.segments] if result.segments else [] } except Exception as e: logger.error(f"Task failed: {e}") raise e