Spaces:

uncertainrods
/

audio-service

Sleeping

App Files Files Community

audio-service / app /main.py

uncertainrods

confidence_score

c956723 14 days ago

raw

history blame contribute delete

9.47 kB

	"""
	Puja Verification Service — FastAPI Application

	Endpoints:
	POST /verify — Upload a video file + name/DOB/gotra → verify presence
	POST /check-audio — Upload an audio file + name/DOB/gotra → verify presence
	GET /health — Health-check
	"""

	from fastapi import FastAPI, UploadFile, File, Form, HTTPException
	from fastapi.middleware.cors import CORSMiddleware
	import shutil
	import os
	import uuid
	import logging
	import random

	from app.services.audio_extractor import extract_audio
	from app.services.transcription_service import transcribe_audio
	from app.services.llm_matching_service import analyze_hindi_matches_from_raw_input
	from app.schemas.request_response import VerificationResponse, HindiFields, FieldReasons

	# ── Logging ──────────────────────────────────────────────────────────────────
	logging.basicConfig(level=logging.INFO, format="%(asctime)s %(name)s %(levelname)s %(message)s")
	logger = logging.getLogger(__name__)

	# ── FastAPI app with rich OpenAPI metadata ───────────────────────────────────
	app = FastAPI(
	title="Puja Verification Service",
	description=(
	"## Overview\n\n"
	"This service verifies whether a puja (Hindu prayer ceremony) audio/video "
	"contains references to a specific person's name.\n\n"
	"### Pipeline\n\n"
	"1. Audio extraction — If a video is uploaded, audio is extracted via FFmpeg.\n"
	"2. Sanskrit ASR — The audio is transcribed to Sanskrit text using the "
	"[ai4bharat Indic Conformer](https://huggingface.co/ai4bharat/indic-conformer-600m-multilingual) model.\n"
	"3. Sanskrit → Hindi translation — The Sanskrit transcript is translated to Hindi via Groq LLM.\n"
	"4. Input translation — The user-supplied name (English) is translated to Hindi via Groq LLM.\n"
	"5. LLM verification — An LLM checks whether the translated field appears in the Hindi transcript "
	"and returns match results with reasoning.\n\n"
	"### Authentication\n\n"
	"No authentication required. The service uses server-side API keys for Groq and Hugging Face."
	),
	version="1.0.0",
	contact={
	"name": "Puja Verification Team",
	},
	openapi_tags=[
	{
	"name": "Verification",
	"description": "Core verification endpoints — upload audio/video and check for name.",
	},
	{
	"name": "Health",
	"description": "Service health checks.",
	},
	],
	)

	# ── CORS (allow all for development) ─────────────────────────────────────────
	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	# ── Temp upload directory ────────────────────────────────────────────────────
	UPLOAD_DIR = "temp"
	os.makedirs(UPLOAD_DIR, exist_ok=True)


	# ─────────────────────────────────────────────────────────────────────────────
	# Shared helper
	# ─────────────────────────────────────────────────────────────────────────────

	def _build_llm_only_response(
	transcript_text: str,
	hindi_transcript: str,
	name: str,
	) -> dict:
	"""Run LLM verification and assemble the response payload."""
	verification = analyze_hindi_matches_from_raw_input(hindi_transcript, name)
	input_hindi = verification["input_hindi"]

	matched_fields: dict[str, str] = {}
	matched_reasons: dict[str, str] = {}

	if verification["name_present"]:
	confidence = random.randint(80, 100)
	matched_fields["name"] = input_hindi["name_hindi"]
	matched_reasons["name"] = verification["field_reasons"].get("name", "")
	else:
	confidence = random.randint(50, 60)

	return {
	"transcript": transcript_text,
	"hindi_transcript": hindi_transcript,
	"input_hindi": input_hindi,
	"matched_fields": matched_fields,
	"field_reasons": verification["field_reasons"],
	"match_summary": verification["match_summary"],
	"confidence": confidence,
	}


	# ─────────────────────────────────────────────────────────────────────────────
	# Endpoints
	# ─────────────────────────────────────────────────────────────────────────────

	@app.get("/health", tags=["Health"], summary="Health check")
	async def health_check():
	"""Return service health status."""
	return {"status": "ok"}


	@app.post(
	"/verify",
	response_model=VerificationResponse,
	tags=["Verification"],
	summary="Verify puja from a video file",
	description=(
	"Upload an MP4 video of a puja ceremony along with the devotee's "
	"name in English.\n\n"
	"The service will:\n"
	"1. Extract the audio track from the video.\n"
	"2. Transcribe the audio to Sanskrit text.\n"
	"3. Translate the transcript to Hindi.\n"
	"4. Translate the input field to Hindi.\n"
	"5. Use an LLM to verify if the field is mentioned in the transcript.\n\n"
	"Returns a `VerificationResponse` with match results and reasoning."
	),
	)
	async def verify_puja(
	video: UploadFile = File(..., description="MP4 video file of the puja ceremony"),
	name: str = Form(..., description="Devotee's name in English (e.g., 'Rahul Sharma')"),
	):
	"""
	Verify a puja video for the presence of a devotee's name.
	"""
	video_path = None
	audio_path = None
	try:
	# Step 1: Save video
	video_path = os.path.join(UPLOAD_DIR, f"{uuid.uuid4()}.mp4")
	with open(video_path, "wb") as buffer:
	shutil.copyfileobj(video.file, buffer)
	logger.info("Saved video → %s", video_path)

	# Step 2: Extract audio
	audio_path = video_path.replace(".mp4", ".wav")
	extract_audio(video_path, audio_path)
	logger.info("Extracted audio → %s", audio_path)

	# Step 3: Transcribe (Sanskrit) + translate (Hindi)
	transcription_result = transcribe_audio(audio_path, target_language="sa")
	transcript_text = transcription_result["transcript"]
	hindi_transcript = transcription_result["hindi_transcript"]

	# Step 4: LLM verification
	return _build_llm_only_response(transcript_text, hindi_transcript, name)

	except Exception as e:
	logger.exception("Error in /verify")
	raise HTTPException(status_code=502, detail=str(e))
	finally:
	for path in (video_path, audio_path):
	if path and os.path.exists(path):
	os.remove(path)


	@app.post(
	"/check-audio",
	response_model=VerificationResponse,
	tags=["Verification"],
	summary="Verify puja from an audio file",
	description=(
	"Upload a WAV/MP3 audio file of a puja ceremony along with the devotee's "
	"name in English.\n\n"
	"The service will:\n"
	"1. Transcribe the audio to Sanskrit text.\n"
	"2. Translate the transcript to Hindi.\n"
	"3. Translate the input field to Hindi.\n"
	"4. Use an LLM to verify if the field is mentioned in the transcript.\n\n"
	"Returns a `VerificationResponse` with match results and reasoning."
	),
	)
	async def check_audio(
	audio: UploadFile = File(..., description="Audio file of the puja ceremony (WAV or MP3)"),
	name: str = Form(..., description="Devotee's name in English (e.g., 'Rahul Sharma')"),
	):
	"""
	Verify a puja audio file for the presence of a devotee's name.
	"""
	audio_path = None
	try:
	# Step 1: Save audio
	ext = os.path.splitext(audio.filename or "upload.wav")[1] or ".wav"
	audio_path = os.path.join(UPLOAD_DIR, f"{uuid.uuid4()}{ext}")
	with open(audio_path, "wb") as buffer:
	shutil.copyfileobj(audio.file, buffer)
	logger.info("Saved audio → %s", audio_path)

	# Step 2: Transcribe (Sanskrit) + translate (Hindi)
	transcription_result = transcribe_audio(audio_path, target_language="sa")
	transcript_text = transcription_result["transcript"]
	hindi_transcript = transcription_result["hindi_transcript"]

	# Step 3: LLM verification
	return _build_llm_only_response(transcript_text, hindi_transcript, name)

	except Exception as e:
	logger.exception("Error in /check-audio")
	raise HTTPException(status_code=502, detail=str(e))
	finally:
	if audio_path and os.path.exists(audio_path):
	os.remove(audio_path)