Spaces:

lordofgaming
/

voiceforge

Sleeping

voiceforge / backend /app /workers /tasks.py

lordofgaming

Initial VoiceForge deployment (clean)

673435a 23 days ago

3.5 kB

	from .celery_app import celery_app
	from ..services.stt_service import STTService
	from ..services.nlp_service import NLPService
	from ..models import SessionLocal, Transcript, AudioFile
	from ..core.config import get_settings
	import logging

	logger = logging.getLogger(__name__)
	settings = get_settings()

	@celery_app.task
	def process_audio_file(audio_file_id: int):
	"""
	Background task to transcribe audio
	"""
	db = SessionLocal()
	try:
	# Retrieve audio file record
	audio_file = db.query(AudioFile).filter(AudioFile.id == audio_file_id).first()
	if not audio_file:
	logger.error(f"AudioFile {audio_file_id} not found")
	return

	audio_file.status = "processing"
	db.commit()

	# Initialize Service
	stt_service = STTService()

	# Transcribe
	result = stt_service.transcribe_file(
	audio_path=audio_file.storage_path,
	language=audio_file.language,
	enable_automatic_punctuation=True,
	enable_word_time_offsets=True,
	enable_speaker_diarization=True # Defaulting to True for background tasks
	)

	# Create Transcript
	transcript = Transcript(
	audio_file_id=audio_file.id,
	user_id=audio_file.user_id, # Assuming we add user_id to AudioFile
	raw_text=result.text,
	processed_text=result.text,
	segments=[s.model_dump() for s in result.segments] if result.segments else [],
	words=[w.model_dump() for w in result.words] if result.words else [],
	language=result.language,
	confidence=result.confidence,
	duration=result.duration,
	word_count=result.word_count
	)
	db.add(transcript)

	audio_file.status = "completed"
	db.commit()

	except Exception as e:
	logger.error(f"Transcription failed: {e}")
	audio_file.status = "failed"
	db.commit()
	finally:
	db.close()


	@celery_app.task
	def analyze_transcript_background(transcript_id: int):
	"""
	Background task for NLP analysis
	"""
	db = SessionLocal()
	try:
	transcript = db.query(Transcript).filter(Transcript.id == transcript_id).first()
	if not transcript:
	return

	nlp_service = NLPService()
	analysis = nlp_service.process_transcript(transcript.processed_text)

	transcript.sentiment = analysis["sentiment"]
	transcript.topics = {"keywords": analysis["keywords"]}
	transcript.summary = analysis["summary"]

	db.commit()
	except Exception as e:
	logger.error(f"Analysis failed: {e}")
	finally:
	db.close()
	@celery_app.task
	def transcribe_file_path(file_path: str, language: str = None, output_format: str = "txt") -> dict:
	"""
	Generic task to transcribe a file path directly (for Batch Service)
	"""
	try:
	stt_service = STTService()
	result = stt_service.transcribe_file(
	audio_path=file_path,
	language=language,
	enable_word_timestamps=True
	)

	return {
	"text": result.text,
	"language": result.language,
	"duration": result.duration,
	"segments": [s.dict() for s in result.segments] if result.segments else []
	}
	except Exception as e:
	logger.error(f"Task failed: {e}")
	raise e