audio-service / app /main.py
uncertainrods's picture
confidence_score
c956723
"""
Puja Verification Service β€” FastAPI Application
Endpoints:
POST /verify β€” Upload a video file + name/DOB/gotra β†’ verify presence
POST /check-audio β€” Upload an audio file + name/DOB/gotra β†’ verify presence
GET /health β€” Health-check
"""
from fastapi import FastAPI, UploadFile, File, Form, HTTPException
from fastapi.middleware.cors import CORSMiddleware
import shutil
import os
import uuid
import logging
import random
from app.services.audio_extractor import extract_audio
from app.services.transcription_service import transcribe_audio
from app.services.llm_matching_service import analyze_hindi_matches_from_raw_input
from app.schemas.request_response import VerificationResponse, HindiFields, FieldReasons
# ── Logging ──────────────────────────────────────────────────────────────────
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(name)s %(levelname)s %(message)s")
logger = logging.getLogger(__name__)
# ── FastAPI app with rich OpenAPI metadata ───────────────────────────────────
app = FastAPI(
title="Puja Verification Service",
description=(
"## Overview\n\n"
"This service verifies whether a **puja (Hindu prayer ceremony)** audio/video "
"contains references to a specific person's **name**.\n\n"
"### Pipeline\n\n"
"1. **Audio extraction** β€” If a video is uploaded, audio is extracted via FFmpeg.\n"
"2. **Sanskrit ASR** β€” The audio is transcribed to Sanskrit text using the "
"[ai4bharat Indic Conformer](https://huggingface.co/ai4bharat/indic-conformer-600m-multilingual) model.\n"
"3. **Sanskrit β†’ Hindi translation** β€” The Sanskrit transcript is translated to Hindi via Groq LLM.\n"
"4. **Input translation** β€” The user-supplied name (English) is translated to Hindi via Groq LLM.\n"
"5. **LLM verification** β€” An LLM checks whether the translated field appears in the Hindi transcript "
"and returns match results with reasoning.\n\n"
"### Authentication\n\n"
"No authentication required. The service uses server-side API keys for Groq and Hugging Face."
),
version="1.0.0",
contact={
"name": "Puja Verification Team",
},
openapi_tags=[
{
"name": "Verification",
"description": "Core verification endpoints β€” upload audio/video and check for name.",
},
{
"name": "Health",
"description": "Service health checks.",
},
],
)
# ── CORS (allow all for development) ─────────────────────────────────────────
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# ── Temp upload directory ────────────────────────────────────────────────────
UPLOAD_DIR = "temp"
os.makedirs(UPLOAD_DIR, exist_ok=True)
# ─────────────────────────────────────────────────────────────────────────────
# Shared helper
# ─────────────────────────────────────────────────────────────────────────────
def _build_llm_only_response(
transcript_text: str,
hindi_transcript: str,
name: str,
) -> dict:
"""Run LLM verification and assemble the response payload."""
verification = analyze_hindi_matches_from_raw_input(hindi_transcript, name)
input_hindi = verification["input_hindi"]
matched_fields: dict[str, str] = {}
matched_reasons: dict[str, str] = {}
if verification["name_present"]:
confidence = random.randint(80, 100)
matched_fields["name"] = input_hindi["name_hindi"]
matched_reasons["name"] = verification["field_reasons"].get("name", "")
else:
confidence = random.randint(50, 60)
return {
"transcript": transcript_text,
"hindi_transcript": hindi_transcript,
"input_hindi": input_hindi,
"matched_fields": matched_fields,
"field_reasons": verification["field_reasons"],
"match_summary": verification["match_summary"],
"confidence": confidence,
}
# ─────────────────────────────────────────────────────────────────────────────
# Endpoints
# ─────────────────────────────────────────────────────────────────────────────
@app.get("/health", tags=["Health"], summary="Health check")
async def health_check():
"""Return service health status."""
return {"status": "ok"}
@app.post(
"/verify",
response_model=VerificationResponse,
tags=["Verification"],
summary="Verify puja from a video file",
description=(
"Upload an **MP4 video** of a puja ceremony along with the devotee's "
"**name** in English.\n\n"
"The service will:\n"
"1. Extract the audio track from the video.\n"
"2. Transcribe the audio to Sanskrit text.\n"
"3. Translate the transcript to Hindi.\n"
"4. Translate the input field to Hindi.\n"
"5. Use an LLM to verify if the field is mentioned in the transcript.\n\n"
"Returns a `VerificationResponse` with match results and reasoning."
),
)
async def verify_puja(
video: UploadFile = File(..., description="MP4 video file of the puja ceremony"),
name: str = Form(..., description="Devotee's name in English (e.g., 'Rahul Sharma')"),
):
"""
Verify a puja video for the presence of a devotee's name.
"""
video_path = None
audio_path = None
try:
# Step 1: Save video
video_path = os.path.join(UPLOAD_DIR, f"{uuid.uuid4()}.mp4")
with open(video_path, "wb") as buffer:
shutil.copyfileobj(video.file, buffer)
logger.info("Saved video β†’ %s", video_path)
# Step 2: Extract audio
audio_path = video_path.replace(".mp4", ".wav")
extract_audio(video_path, audio_path)
logger.info("Extracted audio β†’ %s", audio_path)
# Step 3: Transcribe (Sanskrit) + translate (Hindi)
transcription_result = transcribe_audio(audio_path, target_language="sa")
transcript_text = transcription_result["transcript"]
hindi_transcript = transcription_result["hindi_transcript"]
# Step 4: LLM verification
return _build_llm_only_response(transcript_text, hindi_transcript, name)
except Exception as e:
logger.exception("Error in /verify")
raise HTTPException(status_code=502, detail=str(e))
finally:
for path in (video_path, audio_path):
if path and os.path.exists(path):
os.remove(path)
@app.post(
"/check-audio",
response_model=VerificationResponse,
tags=["Verification"],
summary="Verify puja from an audio file",
description=(
"Upload a **WAV/MP3 audio** file of a puja ceremony along with the devotee's "
"**name** in English.\n\n"
"The service will:\n"
"1. Transcribe the audio to Sanskrit text.\n"
"2. Translate the transcript to Hindi.\n"
"3. Translate the input field to Hindi.\n"
"4. Use an LLM to verify if the field is mentioned in the transcript.\n\n"
"Returns a `VerificationResponse` with match results and reasoning."
),
)
async def check_audio(
audio: UploadFile = File(..., description="Audio file of the puja ceremony (WAV or MP3)"),
name: str = Form(..., description="Devotee's name in English (e.g., 'Rahul Sharma')"),
):
"""
Verify a puja audio file for the presence of a devotee's name.
"""
audio_path = None
try:
# Step 1: Save audio
ext = os.path.splitext(audio.filename or "upload.wav")[1] or ".wav"
audio_path = os.path.join(UPLOAD_DIR, f"{uuid.uuid4()}{ext}")
with open(audio_path, "wb") as buffer:
shutil.copyfileobj(audio.file, buffer)
logger.info("Saved audio β†’ %s", audio_path)
# Step 2: Transcribe (Sanskrit) + translate (Hindi)
transcription_result = transcribe_audio(audio_path, target_language="sa")
transcript_text = transcription_result["transcript"]
hindi_transcript = transcription_result["hindi_transcript"]
# Step 3: LLM verification
return _build_llm_only_response(transcript_text, hindi_transcript, name)
except Exception as e:
logger.exception("Error in /check-audio")
raise HTTPException(status_code=502, detail=str(e))
finally:
if audio_path and os.path.exists(audio_path):
os.remove(audio_path)