abd8433 commited on
Commit
f114682
Β·
verified Β·
1 Parent(s): 65c8d80

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -11
app.py CHANGED
@@ -1,13 +1,17 @@
1
  from fastapi import FastAPI, HTTPException
2
  from fastapi.middleware.cors import CORSMiddleware
3
  from pydantic import BaseModel
4
- from transformers import MarianMTModel, MarianTokenizer, VitsModel, AutoTokenizer
5
  from parler_tts import ParlerTTSForConditionalGeneration
6
  import torch
7
  import scipy.io.wavfile
8
  import base64
9
  import io
10
  import logging
 
 
 
 
11
 
12
  logging.basicConfig(level=logging.INFO)
13
  logger = logging.getLogger(__name__)
@@ -23,24 +27,26 @@ app.add_middleware(
23
 
24
  # ─── Load Models ──────────────────────────────────────────────────────────────
25
 
26
- # English TTS (MMS - fast and works well)
27
  logger.info("Loading English TTS...")
28
  eng_tts = VitsModel.from_pretrained("facebook/mms-tts-eng")
29
  eng_tok = AutoTokenizer.from_pretrained("facebook/mms-tts-eng")
30
  eng_tts.eval()
 
31
 
32
- # Translation (Helsinki - much more accurate for Urdu)
33
- logger.info("Loading Translation model...")
34
  trans_model = MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-en-ur")
35
  trans_tok = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-ur")
36
  trans_model.eval()
 
37
 
38
- # Urdu TTS (Indic Parler - smoother and more natural)
39
  logger.info("Loading Urdu TTS...")
40
  urdu_tts = ParlerTTSForConditionalGeneration.from_pretrained("ai4bharat/indic-parler-tts")
41
  urdu_tok = AutoTokenizer.from_pretrained("ai4bharat/indic-parler-tts")
42
  urdu_tts.eval()
43
- logger.info("All models loaded βœ…")
44
 
45
 
46
  # ─── Helpers ──────────────────────────────────────────────────────────────────
@@ -64,8 +70,8 @@ def english_to_audio_b64(text: str) -> str:
64
 
65
 
66
  def urdu_to_audio_b64(urdu_text: str) -> str:
67
- # Description controls the voice style β€” smooth, clear, neutral
68
- description = "A clear and natural Urdu male voice speaks in a calm, neutral tone."
69
  desc_inputs = urdu_tok(description, return_tensors="pt")
70
  text_inputs = urdu_tok(urdu_text, return_tensors="pt")
71
  with torch.no_grad():
@@ -83,7 +89,7 @@ def urdu_to_audio_b64(urdu_text: str) -> str:
83
  # ─── Request ──────────────────────────────────────────────────────────────────
84
 
85
  class TTSRequest(BaseModel):
86
- text: str # Always English
87
 
88
 
89
  # ─── Endpoints ────────────────────────────────────────────────────────────────
@@ -99,19 +105,20 @@ def health():
99
 
100
  @app.post("/tts/english")
101
  def tts_english(request: TTSRequest):
102
- """English text β†’ English speech"""
103
  if not request.text.strip():
104
  raise HTTPException(status_code=400, detail="Text cannot be empty")
105
  try:
106
  audio = english_to_audio_b64(request.text)
107
  return {"audio": audio, "language": "english", "text": request.text}
108
  except Exception as e:
 
109
  raise HTTPException(status_code=500, detail=str(e))
110
 
111
 
112
  @app.post("/tts/english-to-urdu")
113
  def tts_english_to_urdu(request: TTSRequest):
114
- """English text β†’ translate β†’ smooth Urdu speech"""
115
  if not request.text.strip():
116
  raise HTTPException(status_code=400, detail="Text cannot be empty")
117
  try:
@@ -125,4 +132,5 @@ def tts_english_to_urdu(request: TTSRequest):
125
  "urdu_text": urdu_text,
126
  }
127
  except Exception as e:
 
128
  raise HTTPException(status_code=500, detail=str(e))
 
1
  from fastapi import FastAPI, HTTPException
2
  from fastapi.middleware.cors import CORSMiddleware
3
  from pydantic import BaseModel
4
+ from transformers import MarianMTModel, MarianTokenizer, AutoTokenizer, VitsModel
5
  from parler_tts import ParlerTTSForConditionalGeneration
6
  import torch
7
  import scipy.io.wavfile
8
  import base64
9
  import io
10
  import logging
11
+ import os
12
+ from huggingface_hub import login
13
+
14
+ login(token=os.environ.get("HF_TOKEN"))
15
 
16
  logging.basicConfig(level=logging.INFO)
17
  logger = logging.getLogger(__name__)
 
27
 
28
  # ─── Load Models ──────────────────────────────────────────────────────────────
29
 
30
+ # English TTS β€” MMS (fast, no gating)
31
  logger.info("Loading English TTS...")
32
  eng_tts = VitsModel.from_pretrained("facebook/mms-tts-eng")
33
  eng_tok = AutoTokenizer.from_pretrained("facebook/mms-tts-eng")
34
  eng_tts.eval()
35
+ logger.info("English TTS loaded βœ…")
36
 
37
+ # Translation — Helsinki (accurate, dedicated EN→UR)
38
+ logger.info("Loading translation model...")
39
  trans_model = MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-en-ur")
40
  trans_tok = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-ur")
41
  trans_model.eval()
42
+ logger.info("Translation model loaded βœ…")
43
 
44
+ # Urdu TTS β€” parler-tts-mini (public, smooth, natural voice)
45
  logger.info("Loading Urdu TTS...")
46
  urdu_tts = ParlerTTSForConditionalGeneration.from_pretrained("ai4bharat/indic-parler-tts")
47
  urdu_tok = AutoTokenizer.from_pretrained("ai4bharat/indic-parler-tts")
48
  urdu_tts.eval()
49
+ logger.info("Urdu TTS loaded βœ…")
50
 
51
 
52
  # ─── Helpers ──────────────────────────────────────────────────────────────────
 
70
 
71
 
72
  def urdu_to_audio_b64(urdu_text: str) -> str:
73
+ # Voice description β€” controls how the speech sounds
74
+ description = "A male speaker delivers clear, natural speech in a calm and neutral tone with no background noise."
75
  desc_inputs = urdu_tok(description, return_tensors="pt")
76
  text_inputs = urdu_tok(urdu_text, return_tensors="pt")
77
  with torch.no_grad():
 
89
  # ─── Request ──────────────────────────────────────────────────────────────────
90
 
91
  class TTSRequest(BaseModel):
92
+ text: str # Always English input
93
 
94
 
95
  # ─── Endpoints ────────────────────────────────────────────────────────────────
 
105
 
106
  @app.post("/tts/english")
107
  def tts_english(request: TTSRequest):
108
+ """English text β†’ speaks in English"""
109
  if not request.text.strip():
110
  raise HTTPException(status_code=400, detail="Text cannot be empty")
111
  try:
112
  audio = english_to_audio_b64(request.text)
113
  return {"audio": audio, "language": "english", "text": request.text}
114
  except Exception as e:
115
+ logger.error(f"English TTS error: {e}")
116
  raise HTTPException(status_code=500, detail=str(e))
117
 
118
 
119
  @app.post("/tts/english-to-urdu")
120
  def tts_english_to_urdu(request: TTSRequest):
121
+ """English text β†’ translate to Urdu β†’ speaks in Urdu"""
122
  if not request.text.strip():
123
  raise HTTPException(status_code=400, detail="Text cannot be empty")
124
  try:
 
132
  "urdu_text": urdu_text,
133
  }
134
  except Exception as e:
135
+ logger.error(f"English→Urdu TTS error: {e}")
136
  raise HTTPException(status_code=500, detail=str(e))