Spaces:

j4mouser
/

visalitako

Sleeping

App Files Files Community

j4mouser commited on Mar 7

Commit

315482b

verified ·

1 Parent(s): 44cfb06

Upload main.py

Browse files

Files changed (1) hide show

main.py +96 -37

main.py CHANGED Viewed

@@ -14,6 +14,7 @@ import tempfile
 import os
 import uuid
 from contextlib import asynccontextmanager
 from faster_whisper import WhisperModel
 from zeroconf import ServiceInfo
@@ -27,6 +28,11 @@ SERVICE_PORT = 8000
 # Cloud deployment detection (Hugging Face Spaces, Railway, etc.)
 IS_CLOUD = os.environ.get("SPACE_ID") is not None or os.environ.get("RAILWAY_ENVIRONMENT") is not None
 # ──────────────────────────────────────────────────────────────
 # Filipino / Taglish vocabulary hint for Whisper initial_prompt.
@@ -42,7 +48,9 @@ FILIPINO_VOCAB_PROMPT = (
     "maganda, mabuti, masaya, malaki, maliit, "
     "kumain, uminom, pumunta, naglaro, natulog, "
     "paaralan, bahay, trabaho, kaibigan, pamilya, "
-    "salamat, magandang, umaga, hapon, gabi"
 )
 # Known Whisper misrecognitions for Filipino — extend as needed.
@@ -53,6 +61,10 @@ WHISPER_CORRECTIONS: dict[str, str] = {
     "cami": "kami",
     "cum": "kum",
     "naman naman": "naman",
 }
@@ -128,43 +140,49 @@ async def lifespan(app: FastAPI):
     global async_zeroconf, service_info, model, roberta_model, roberta_tokenizer
     # 1. Load Whisper
-    print("⏳ Loading Whisper model...")
-    try:
-        print(f"🔧 CUDA Available: {torch.cuda.is_available()}")
-        if torch.cuda.is_available():
-            print(f"🔧 GPU Device: {torch.cuda.get_device_name(0)}")
-            model = WhisperModel(
-                "small",           # 3x more accurate than 'base'
-                device="cuda",
-                compute_type="float16"
-            )
-        else:
-            # CPU / free HF Space — medium+int8 fits in ~1.5 GB RAM
-            print("🔧 Using CPU mode (medium + int8)")
-            model = WhisperModel("medium", device="cpu", compute_type="int8")
-        print("✅ Whisper 'medium' model loaded successfully")
-    except Exception as e:
-        print(f"❌ Failed to load Whisper model: {e}")
-        print("⚠️ Falling back to base/int8...")
-        model = WhisperModel("base", device="cpu", compute_type="int8")
     # 2. Load RoBERTa (Tagalog)
-    print("⏳ Loading RoBERTa (Tagalog) model...")
-    try:
-        # Use jcblaise/roberta-tagalog-base for fluency/coherence
-        model_name = "jcblaise/roberta-tagalog-base"
-        roberta_tokenizer = AutoTokenizer.from_pretrained(model_name)
-        roberta_model = AutoModelForMaskedLM.from_pretrained(model_name)
-        if torch.cuda.is_available():
-            roberta_model.to("cuda")
-        roberta_model.eval() # Set to evaluation mode
-        print("✅ RoBERTa model loaded successfully")
-    except Exception as e:
-        print(f"❌ Failed to load RoBERTa model: {e}")
-        roberta_model = None
-        roberta_tokenizer = None
     # Startup: Register mDNS service (skip on cloud deployments)
@@ -413,7 +431,7 @@ def analyze_prosody(segments: list, duration_seconds: float) -> ProsodyInfo:
-def calculate_fluency(text: str) -> float:
     """
     Calculate a fluency score (1-10) using RoBERTa perplexity (PPL).
     Lower PPL = More natural/fluent.
@@ -451,6 +469,26 @@ def calculate_fluency(text: str) -> float:
         print(f"⚠️ RoBERTa analysis failed: {e}")
         return check_coherence_heuristic(text)
 def check_coherence_heuristic(text: str) -> float:
     """Heuristic check for coherence (Fallback)."""
@@ -508,6 +546,20 @@ def generate_feedback(pace: PaceInfo, fillers: FillerInfo, prosody: ProsodyInfo,
 from fastapi import Form, UploadFile, File
 @app.post("/sessions/{session_id}/transcribe", response_model=QuickTranscriptResponse)
 async def quick_transcribe(
     session_id: str,
@@ -676,7 +728,7 @@ async def upload_audio_chunk(session_id: str, file: UploadFile = File(...)):
     pace = calculate_pace(transcript, safe_duration)
     prosody = analyze_prosody(segments, safe_duration)
     # Use RoBERTa for advanced fluency scoring (or fallback to heuristic)
-    coherence = calculate_fluency(transcript)
     feedback = generate_feedback(pace, fillers, prosody, coherence)
@@ -691,3 +743,10 @@ async def upload_audio_chunk(session_id: str, file: UploadFile = File(...)):
         feedback=feedback,
         message=message,
     )

 import os
 import uuid
 from contextlib import asynccontextmanager
+import httpx
 from faster_whisper import WhisperModel
 from zeroconf import ServiceInfo
 # Cloud deployment detection (Hugging Face Spaces, Railway, etc.)
 IS_CLOUD = os.environ.get("SPACE_ID") is not None or os.environ.get("RAILWAY_ENVIRONMENT") is not None
+# Service Mode Configuration (Split Architecture)
+SERVICE_MODE = os.environ.get("SERVICE_MODE", "audio").lower() # 'audio' or 'nlp'
+NLP_API_URL = os.environ.get("NLP_API_URL", "").rstrip("/")
 # ──────────────────────────────────────────────────────────────
 # Filipino / Taglish vocabulary hint for Whisper initial_prompt.
     "maganda, mabuti, masaya, malaki, maliit, "
     "kumain, uminom, pumunta, naglaro, natulog, "
     "paaralan, bahay, trabaho, kaibigan, pamilya, "
+    "salamat, magandang, umaga, hapon, gabi, "
+    # Common English loanwords/test phrases
+    "hello, hi, mic, test, testing, okay, yes, no"
 )
 # Known Whisper misrecognitions for Filipino — extend as needed.
     "cami": "kami",
     "cum": "kum",
     "naman naman": "naman",
+    # English loanword corrections
+    "helo": "hello",
+    "mike": "mic",
+    "test": "test", # to ensure it's not accidentally stripped
 }
     global async_zeroconf, service_info, model, roberta_model, roberta_tokenizer
     # 1. Load Whisper
+    if SERVICE_MODE == "audio":
+        print("⏳ Loading Whisper model...")
+        try:
+            print(f"🔧 CUDA Available: {torch.cuda.is_available()}")
+            if torch.cuda.is_available():
+                print(f"🔧 GPU Device: {torch.cuda.get_device_name(0)}")
+                model = WhisperModel(
+                    "small",           # 3x more accurate than 'base'
+                    device="cuda",
+                    compute_type="float16"
+                )
+            else:
+                # CPU / free HF Space — medium+int8 fits in ~1.5 GB RAM
+                print("🔧 Using CPU mode (medium + int8)")
+                model = WhisperModel("medium", device="cpu", compute_type="int8")
+            print("✅ Whisper 'medium' model loaded successfully")
+        except Exception as e:
+            print(f"❌ Failed to load Whisper model: {e}")
+            print("⚠️ Falling back to base/int8...")
+            model = WhisperModel("base", device="cpu", compute_type="int8")
+    else:
+        print("⏭️ Audio Service Mode not active, skipping Whisper.")
     # 2. Load RoBERTa (Tagalog)
+    if SERVICE_MODE == "nlp":
+        print("⏳ Loading RoBERTa (Tagalog) model...")
+        try:
+            # Use jcblaise/roberta-tagalog-base for fluency/coherence
+            model_name = "jcblaise/roberta-tagalog-base"
+            roberta_tokenizer = AutoTokenizer.from_pretrained(model_name)
+            roberta_model = AutoModelForMaskedLM.from_pretrained(model_name)
+            if torch.cuda.is_available():
+                roberta_model.to("cuda")
+            roberta_model.eval() # Set to evaluation mode
+            print("✅ RoBERTa model loaded successfully")
+        except Exception as e:
+            print(f"❌ Failed to load RoBERTa model: {e}")
+            roberta_model = None
+            roberta_tokenizer = None
+    else:
+        print("⏭️ NLP Service Mode not active, skipping RoBERTa.")
     # Startup: Register mDNS service (skip on cloud deployments)
+def calculate_fluency_local(text: str) -> float:
     """
     Calculate a fluency score (1-10) using RoBERTa perplexity (PPL).
     Lower PPL = More natural/fluent.
         print(f"⚠️ RoBERTa analysis failed: {e}")
         return check_coherence_heuristic(text)
+async def get_fluency_score(text: str) -> float:
+    """Gets the fluency score, either locally (NLP mode) or remotely (Audio mode)."""
+    if SERVICE_MODE == "nlp":
+        return calculate_fluency_local(text)
+    if NLP_API_URL:
+        # Call the NLP Microservice
+        try:
+            async with httpx.AsyncClient(timeout=10.0) as client:
+                res = await client.post(f"{NLP_API_URL}/fluency", json={"text": text})
+                if res.status_code == 200:
+                    return res.json().get("coherence_score", 5.0)
+                else:
+                    print(f"⚠️ External NLP API returned {res.status_code}, falling back to heuristic.")
+        except Exception as e:
+            print(f"⚠️ Failed to connect to NLP API at {NLP_API_URL}: {e}")
+    # Fallback heuristic if local model missing and no external API configured/available
+    return check_coherence_heuristic(text)
 def check_coherence_heuristic(text: str) -> float:
     """Heuristic check for coherence (Fallback)."""
 from fastapi import Form, UploadFile, File
+class FluencyRequest(BaseModel):
+    text: str
+class FluencyResponse(BaseModel):
+    coherence_score: float
+@app.post("/fluency", response_model=FluencyResponse)
+async def analyze_fluency(req: FluencyRequest):
+    """External endpoint for Audio service to request fluency scoring. (NLP Mode Only)"""
+    score = calculate_fluency_local(req.text)
+    return FluencyResponse(coherence_score=score)
 @app.post("/sessions/{session_id}/transcribe", response_model=QuickTranscriptResponse)
 async def quick_transcribe(
     session_id: str,
     pace = calculate_pace(transcript, safe_duration)
     prosody = analyze_prosody(segments, safe_duration)
     # Use RoBERTa for advanced fluency scoring (or fallback to heuristic)
+    coherence = await get_fluency_score(transcript)
     feedback = generate_feedback(pace, fillers, prosody, coherence)
         feedback=feedback,
         message=message,
     )
+if __name__ == "__main__":
+    import uvicorn
+    # Run the FastAPI app via uvicorn directly from python
+    # Passing the 'app' object directly instead of the string "main:app"
+    # because dynamic string imports often fail inside PyInstaller EXEs
+    uvicorn.run(app, host="0.0.0.0", port=8000)