File size: 2,098 Bytes
61a559c
 
 
 
ba14c6c
61a559c
ba14c6c
61a559c
 
ba14c6c
b7d3fbf
61a559c
b7d3fbf
 
61a559c
b7d3fbf
61a559c
 
 
 
ba14c6c
 
61a559c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ba14c6c
61a559c
ba14c6c
61a559c
 
 
 
ba14c6c
 
 
 
 
61a559c
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import io
import numpy as np
from scipy.io import wavfile
import librosa
from fastapi import FastAPI, File, UploadFile, HTTPException
from utils.audio_cleaning import denoise_audio, normalize_audio
from config.settings import get_settings
from config import constants
from groq import Groq
from schemas.transcription import TranscriptionResponse

groq_client = Groq(api_key=get_settings().groq_secret_key)
app = FastAPI()


@app.get("/")
def root():
    return {"status": "ok"}


@app.post("/transcribe", response_model=TranscriptionResponse)
async def process_and_transcribe(file: UploadFile = File(...)) -> TranscriptionResponse:
    audio_bytes = await file.read()
    print(f"File size: {len(audio_bytes)} bytes")
    if len(audio_bytes) == 0:
        raise HTTPException(400, "Empty file")

    buffer = io.BytesIO(audio_bytes)
    waveform, sr = librosa.load(buffer, sr=None)

    cleaned_audio = denoise_audio(waveform, sr)
    cleaned_audio = normalize_audio(cleaned_audio)

    # prepare audio to be sent
    audio_np = cleaned_audio.detach().cpu().numpy().squeeze()
    audio_np = librosa.resample(audio_np, orig_sr=sr, target_sr=constants.GROQ_TARGET_SR)
    audio_int16 = (audio_np * 32767).astype(np.int16)

    export_buffer = io.BytesIO()
    wavfile.write(export_buffer, constants.GROQ_TARGET_SR, audio_int16)
    export_buffer.seek(0)

    try:
        filename = file.filename if file.filename else "audio.wav"
        transcription = groq_client.audio.transcriptions.create(
            file=(filename, export_buffer.read()),
            model=constants.GROQ_MODEL_NAME,
            response_format="json",
            language="en",
        )
        return TranscriptionResponse(
            transcript=transcription.text,
            filename=filename,
            duration_seconds=round(len(waveform) / sr, 2),
        )

    except Exception as e:
        print(f"Groq API Error: {e}")
        raise HTTPException(500, f"Transcription failed: {str(e)}")


if __name__ == "__main__":
    import uvicorn

    uvicorn.run("index:app", host="127.0.0.1", port=8000, reload=True)