j4mouser commited on
Commit
315482b
Β·
verified Β·
1 Parent(s): 44cfb06

Upload main.py

Browse files
Files changed (1) hide show
  1. main.py +96 -37
main.py CHANGED
@@ -14,6 +14,7 @@ import tempfile
14
  import os
15
  import uuid
16
  from contextlib import asynccontextmanager
 
17
 
18
  from faster_whisper import WhisperModel
19
  from zeroconf import ServiceInfo
@@ -27,6 +28,11 @@ SERVICE_PORT = 8000
27
  # Cloud deployment detection (Hugging Face Spaces, Railway, etc.)
28
  IS_CLOUD = os.environ.get("SPACE_ID") is not None or os.environ.get("RAILWAY_ENVIRONMENT") is not None
29
 
 
 
 
 
 
30
 
31
  # ──────────────────────────────────────────────────────────────
32
  # Filipino / Taglish vocabulary hint for Whisper initial_prompt.
@@ -42,7 +48,9 @@ FILIPINO_VOCAB_PROMPT = (
42
  "maganda, mabuti, masaya, malaki, maliit, "
43
  "kumain, uminom, pumunta, naglaro, natulog, "
44
  "paaralan, bahay, trabaho, kaibigan, pamilya, "
45
- "salamat, magandang, umaga, hapon, gabi"
 
 
46
  )
47
 
48
  # Known Whisper misrecognitions for Filipino β€” extend as needed.
@@ -53,6 +61,10 @@ WHISPER_CORRECTIONS: dict[str, str] = {
53
  "cami": "kami",
54
  "cum": "kum",
55
  "naman naman": "naman",
 
 
 
 
56
  }
57
 
58
 
@@ -128,43 +140,49 @@ async def lifespan(app: FastAPI):
128
  global async_zeroconf, service_info, model, roberta_model, roberta_tokenizer
129
 
130
  # 1. Load Whisper
131
- print("⏳ Loading Whisper model...")
132
- try:
133
- print(f"πŸ”§ CUDA Available: {torch.cuda.is_available()}")
134
- if torch.cuda.is_available():
135
- print(f"πŸ”§ GPU Device: {torch.cuda.get_device_name(0)}")
136
- model = WhisperModel(
137
- "small", # 3x more accurate than 'base'
138
- device="cuda",
139
- compute_type="float16"
140
- )
141
- else:
142
- # CPU / free HF Space β€” medium+int8 fits in ~1.5 GB RAM
143
- print("πŸ”§ Using CPU mode (medium + int8)")
144
- model = WhisperModel("medium", device="cpu", compute_type="int8")
145
- print("βœ… Whisper 'medium' model loaded successfully")
146
- except Exception as e:
147
- print(f"❌ Failed to load Whisper model: {e}")
148
- print("⚠️ Falling back to base/int8...")
149
- model = WhisperModel("base", device="cpu", compute_type="int8")
 
 
 
150
 
151
  # 2. Load RoBERTa (Tagalog)
152
- print("⏳ Loading RoBERTa (Tagalog) model...")
153
- try:
154
- # Use jcblaise/roberta-tagalog-base for fluency/coherence
155
- model_name = "jcblaise/roberta-tagalog-base"
156
- roberta_tokenizer = AutoTokenizer.from_pretrained(model_name)
157
- roberta_model = AutoModelForMaskedLM.from_pretrained(model_name)
158
-
159
- if torch.cuda.is_available():
160
- roberta_model.to("cuda")
161
 
162
- roberta_model.eval() # Set to evaluation mode
163
- print("βœ… RoBERTa model loaded successfully")
164
- except Exception as e:
165
- print(f"❌ Failed to load RoBERTa model: {e}")
166
- roberta_model = None
167
- roberta_tokenizer = None
 
 
 
 
 
168
 
169
 
170
  # Startup: Register mDNS service (skip on cloud deployments)
@@ -413,7 +431,7 @@ def analyze_prosody(segments: list, duration_seconds: float) -> ProsodyInfo:
413
 
414
 
415
 
416
- def calculate_fluency(text: str) -> float:
417
  """
418
  Calculate a fluency score (1-10) using RoBERTa perplexity (PPL).
419
  Lower PPL = More natural/fluent.
@@ -451,6 +469,26 @@ def calculate_fluency(text: str) -> float:
451
  print(f"⚠️ RoBERTa analysis failed: {e}")
452
  return check_coherence_heuristic(text)
453
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
454
 
455
  def check_coherence_heuristic(text: str) -> float:
456
  """Heuristic check for coherence (Fallback)."""
@@ -508,6 +546,20 @@ def generate_feedback(pace: PaceInfo, fillers: FillerInfo, prosody: ProsodyInfo,
508
 
509
  from fastapi import Form, UploadFile, File
510
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
511
  @app.post("/sessions/{session_id}/transcribe", response_model=QuickTranscriptResponse)
512
  async def quick_transcribe(
513
  session_id: str,
@@ -676,7 +728,7 @@ async def upload_audio_chunk(session_id: str, file: UploadFile = File(...)):
676
  pace = calculate_pace(transcript, safe_duration)
677
  prosody = analyze_prosody(segments, safe_duration)
678
  # Use RoBERTa for advanced fluency scoring (or fallback to heuristic)
679
- coherence = calculate_fluency(transcript)
680
 
681
  feedback = generate_feedback(pace, fillers, prosody, coherence)
682
 
@@ -691,3 +743,10 @@ async def upload_audio_chunk(session_id: str, file: UploadFile = File(...)):
691
  feedback=feedback,
692
  message=message,
693
  )
 
 
 
 
 
 
 
 
14
  import os
15
  import uuid
16
  from contextlib import asynccontextmanager
17
+ import httpx
18
 
19
  from faster_whisper import WhisperModel
20
  from zeroconf import ServiceInfo
 
28
  # Cloud deployment detection (Hugging Face Spaces, Railway, etc.)
29
  IS_CLOUD = os.environ.get("SPACE_ID") is not None or os.environ.get("RAILWAY_ENVIRONMENT") is not None
30
 
31
+ # Service Mode Configuration (Split Architecture)
32
+ SERVICE_MODE = os.environ.get("SERVICE_MODE", "audio").lower() # 'audio' or 'nlp'
33
+ NLP_API_URL = os.environ.get("NLP_API_URL", "").rstrip("/")
34
+
35
+
36
 
37
  # ──────────────────────────────────────────────────────────────
38
  # Filipino / Taglish vocabulary hint for Whisper initial_prompt.
 
48
  "maganda, mabuti, masaya, malaki, maliit, "
49
  "kumain, uminom, pumunta, naglaro, natulog, "
50
  "paaralan, bahay, trabaho, kaibigan, pamilya, "
51
+ "salamat, magandang, umaga, hapon, gabi, "
52
+ # Common English loanwords/test phrases
53
+ "hello, hi, mic, test, testing, okay, yes, no"
54
  )
55
 
56
  # Known Whisper misrecognitions for Filipino β€” extend as needed.
 
61
  "cami": "kami",
62
  "cum": "kum",
63
  "naman naman": "naman",
64
+ # English loanword corrections
65
+ "helo": "hello",
66
+ "mike": "mic",
67
+ "test": "test", # to ensure it's not accidentally stripped
68
  }
69
 
70
 
 
140
  global async_zeroconf, service_info, model, roberta_model, roberta_tokenizer
141
 
142
  # 1. Load Whisper
143
+ if SERVICE_MODE == "audio":
144
+ print("⏳ Loading Whisper model...")
145
+ try:
146
+ print(f"πŸ”§ CUDA Available: {torch.cuda.is_available()}")
147
+ if torch.cuda.is_available():
148
+ print(f"πŸ”§ GPU Device: {torch.cuda.get_device_name(0)}")
149
+ model = WhisperModel(
150
+ "small", # 3x more accurate than 'base'
151
+ device="cuda",
152
+ compute_type="float16"
153
+ )
154
+ else:
155
+ # CPU / free HF Space β€” medium+int8 fits in ~1.5 GB RAM
156
+ print("πŸ”§ Using CPU mode (medium + int8)")
157
+ model = WhisperModel("medium", device="cpu", compute_type="int8")
158
+ print("βœ… Whisper 'medium' model loaded successfully")
159
+ except Exception as e:
160
+ print(f"❌ Failed to load Whisper model: {e}")
161
+ print("⚠️ Falling back to base/int8...")
162
+ model = WhisperModel("base", device="cpu", compute_type="int8")
163
+ else:
164
+ print("⏭️ Audio Service Mode not active, skipping Whisper.")
165
 
166
  # 2. Load RoBERTa (Tagalog)
167
+ if SERVICE_MODE == "nlp":
168
+ print("⏳ Loading RoBERTa (Tagalog) model...")
169
+ try:
170
+ # Use jcblaise/roberta-tagalog-base for fluency/coherence
171
+ model_name = "jcblaise/roberta-tagalog-base"
172
+ roberta_tokenizer = AutoTokenizer.from_pretrained(model_name)
173
+ roberta_model = AutoModelForMaskedLM.from_pretrained(model_name)
 
 
174
 
175
+ if torch.cuda.is_available():
176
+ roberta_model.to("cuda")
177
+
178
+ roberta_model.eval() # Set to evaluation mode
179
+ print("βœ… RoBERTa model loaded successfully")
180
+ except Exception as e:
181
+ print(f"❌ Failed to load RoBERTa model: {e}")
182
+ roberta_model = None
183
+ roberta_tokenizer = None
184
+ else:
185
+ print("⏭️ NLP Service Mode not active, skipping RoBERTa.")
186
 
187
 
188
  # Startup: Register mDNS service (skip on cloud deployments)
 
431
 
432
 
433
 
434
+ def calculate_fluency_local(text: str) -> float:
435
  """
436
  Calculate a fluency score (1-10) using RoBERTa perplexity (PPL).
437
  Lower PPL = More natural/fluent.
 
469
  print(f"⚠️ RoBERTa analysis failed: {e}")
470
  return check_coherence_heuristic(text)
471
 
472
+ async def get_fluency_score(text: str) -> float:
473
+ """Gets the fluency score, either locally (NLP mode) or remotely (Audio mode)."""
474
+ if SERVICE_MODE == "nlp":
475
+ return calculate_fluency_local(text)
476
+
477
+ if NLP_API_URL:
478
+ # Call the NLP Microservice
479
+ try:
480
+ async with httpx.AsyncClient(timeout=10.0) as client:
481
+ res = await client.post(f"{NLP_API_URL}/fluency", json={"text": text})
482
+ if res.status_code == 200:
483
+ return res.json().get("coherence_score", 5.0)
484
+ else:
485
+ print(f"⚠️ External NLP API returned {res.status_code}, falling back to heuristic.")
486
+ except Exception as e:
487
+ print(f"⚠️ Failed to connect to NLP API at {NLP_API_URL}: {e}")
488
+
489
+ # Fallback heuristic if local model missing and no external API configured/available
490
+ return check_coherence_heuristic(text)
491
+
492
 
493
  def check_coherence_heuristic(text: str) -> float:
494
  """Heuristic check for coherence (Fallback)."""
 
546
 
547
  from fastapi import Form, UploadFile, File
548
 
549
+ class FluencyRequest(BaseModel):
550
+ text: str
551
+
552
+ class FluencyResponse(BaseModel):
553
+ coherence_score: float
554
+
555
+ @app.post("/fluency", response_model=FluencyResponse)
556
+ async def analyze_fluency(req: FluencyRequest):
557
+ """External endpoint for Audio service to request fluency scoring. (NLP Mode Only)"""
558
+ score = calculate_fluency_local(req.text)
559
+ return FluencyResponse(coherence_score=score)
560
+
561
+
562
+
563
  @app.post("/sessions/{session_id}/transcribe", response_model=QuickTranscriptResponse)
564
  async def quick_transcribe(
565
  session_id: str,
 
728
  pace = calculate_pace(transcript, safe_duration)
729
  prosody = analyze_prosody(segments, safe_duration)
730
  # Use RoBERTa for advanced fluency scoring (or fallback to heuristic)
731
+ coherence = await get_fluency_score(transcript)
732
 
733
  feedback = generate_feedback(pace, fillers, prosody, coherence)
734
 
 
743
  feedback=feedback,
744
  message=message,
745
  )
746
+
747
+ if __name__ == "__main__":
748
+ import uvicorn
749
+ # Run the FastAPI app via uvicorn directly from python
750
+ # Passing the 'app' object directly instead of the string "main:app"
751
+ # because dynamic string imports often fail inside PyInstaller EXEs
752
+ uvicorn.run(app, host="0.0.0.0", port=8000)