Spaces:

Jay162005
/

salitako2.0

Sleeping

App Files Files Community

Jay162005 commited on Feb 15

Commit

03e3fc5

verified ·

1 Parent(s): ea86986

Delete hgserver

Browse files

Files changed (4) hide show

hgserver/Dockerfile +0 -32
hgserver/README.md +0 -31
hgserver/main.py +0 -607
hgserver/requirements.txt +0 -11

hgserver/Dockerfile DELETED Viewed

@@ -1,32 +0,0 @@
-# Hugging Face Spaces Dockerfile for SalitaKo Backend
-FROM python:3.11-slim
-# Install system dependencies for audio processing
-RUN apt-get update && apt-get install -y \
-    ffmpeg \
-    libsndfile1 \
-    && rm -rf /var/lib/apt/lists/*
-# Create non-root user (required by HF Spaces)
-RUN useradd -m -u 1000 user
-USER user
-ENV HOME=/home/user \
-    PATH=/home/user/.local/bin:$PATH
-WORKDIR $HOME/app
-# Copy requirements first for caching
-COPY --chown=user requirements-hf.txt requirements.txt
-# Install Python dependencies (CPU-only torch for free tier)
-RUN pip install --no-cache-dir --upgrade pip && \
-    pip install --no-cache-dir -r requirements.txt
-# Copy application code
-COPY --chown=user . .
-# Expose port 7860 (Hugging Face Spaces default)
-EXPOSE 7860
-# Run the FastAPI app
-CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

hgserver/README.md DELETED Viewed

@@ -1,31 +0,0 @@
----
-title: SalitaKo Speech Coach API
-emoji: 🎤
-colorFrom: blue
-colorTo: purple
-sdk: docker
-app_port: 7860
-pinned: false
-license: mit
----
-# SalitaKo Speech Coach API
-Filipino/Tagalog speech coaching backend powered by:
-- **Whisper** (faster-whisper) - Speech-to-text
-- **RoBERTa** (jcblaise/roberta-tagalog-base) - Fluency scoring
-## API Endpoints
-- `GET /` - Welcome message
-- `GET /health` - Health check
-- `GET /docs` - Swagger UI documentation
-- `POST /sessions` - Create a new session
-- `POST /sessions/{id}/transcribe` - Quick transcription
-- `POST /sessions/{id}/audio-chunk` - Full analysis with feedback
-## Usage
-```bash
-curl https://YOUR-SPACE.hf.space/health
-```

hgserver/main.py DELETED Viewed

@@ -1,607 +0,0 @@
-import re
-import socket
-import sqlite3
-import datetime
-import numpy as np
-from fastapi import FastAPI, UploadFile, File
-from fastapi.middleware.cors import CORSMiddleware
-from pydantic import BaseModel
-import asyncio
-import tempfile
-import os
-import uuid
-from contextlib import asynccontextmanager
-from faster_whisper import WhisperModel
-from zeroconf import ServiceInfo
-from zeroconf.asyncio import AsyncZeroconf
-# mDNS Service Configuration
-SERVICE_TYPE = "_salitako._tcp.local."
-SERVICE_NAME = "SalitaKo Server._salitako._tcp.local."
-SERVICE_PORT = 8000
-# Cloud deployment detection (Hugging Face Spaces, Railway, etc.)
-IS_CLOUD = os.environ.get("SPACE_ID") is not None or os.environ.get("RAILWAY_ENVIRONMENT") is not None
-def get_local_ip():
-    """Get the local IP address of this machine."""
-    try:
-        s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
-        s.connect(("8.8.8.8", 80))
-        ip = s.getsockname()[0]
-        s.close()
-        return ip
-    except Exception:
-        return "127.0.0.1"
-# Global async zeroconf instance
-async_zeroconf = None
-service_info = None
-from transformers import AutoTokenizer, AutoModelForMaskedLM
-import torch
-# Global model instances
-model = None  # Whisper
-roberta_model = None
-roberta_tokenizer = None
-@asynccontextmanager
-async def lifespan(app: FastAPI):
-    """Manage mDNS service registration and Model loading on startup/shutdown."""
-    global async_zeroconf, service_info, model, roberta_model, roberta_tokenizer
-    # 1. Load Whisper
-    print("⏳ Loading Whisper model...")
-    try:
-        print(f"🔧 CUDA Available: {torch.cuda.is_available()}")
-        if torch.cuda.is_available():
-            print(f"🔧 GPU Device: {torch.cuda.get_device_name(0)}")
-            model = WhisperModel(
-                "base",           # Fast loading
-                device="cuda",     # Use NVIDIA GPU
-                compute_type="float16"
-            )
-        else:
-            # CPU fallback (for cloud free tiers)
-            print("🔧 Using CPU mode")
-            model = WhisperModel("base", device="cpu", compute_type="int8")
-        print("✅ Whisper model loaded successfully")
-    except Exception as e:
-        print(f"❌ Failed to load Whisper model: {e}")
-        print("⚠️ Falling back to CPU/int8...")
-        model = WhisperModel("small", device="cpu", compute_type="int8")
-    # 2. Load RoBERTa (Tagalog)
-    print("⏳ Loading RoBERTa (Tagalog) model...")
-    try:
-        # Use jcblaise/roberta-tagalog-base for fluency/coherence
-        model_name = "jcblaise/roberta-tagalog-base"
-        roberta_tokenizer = AutoTokenizer.from_pretrained(model_name)
-        roberta_model = AutoModelForMaskedLM.from_pretrained(model_name)
-        if torch.cuda.is_available():
-            roberta_model.to("cuda")
-        roberta_model.eval() # Set to evaluation mode
-        print("✅ RoBERTa model loaded successfully")
-    except Exception as e:
-        print(f"❌ Failed to load RoBERTa model: {e}")
-        roberta_model = None
-        roberta_tokenizer = None
-    # Startup: Register mDNS service (skip on cloud deployments)
-    if IS_CLOUD:
-        print("☁️ Cloud deployment detected - skipping mDNS registration")
-    else:
-        local_ip = get_local_ip()
-        print(f"🌐 Local IP: {local_ip}")
-        try:
-            async_zeroconf = AsyncZeroconf()
-            service_info = ServiceInfo(
-                SERVICE_TYPE,
-                SERVICE_NAME,
-                addresses=[socket.inet_aton(local_ip)],
-                port=SERVICE_PORT,
-                properties={
-                    "version": "0.2.0",
-                    "api": "/docs",
-                    "name": "SalitaKo Speech Coach"
-                },
-                server=f"salitako.local.",
-            )
-            await async_zeroconf.async_register_service(service_info)
-            print(f"📡 mDNS service registered: {SERVICE_NAME} at {local_ip}:{SERVICE_PORT}")
-        except Exception as e:
-            print(f"⚠️ mDNS registration failed (non-fatal): {e}")
-            async_zeroconf = None
-    yield
-    # Shutdown: Unregister mDNS service
-    if async_zeroconf and service_info:
-        print("📡 Unregistering mDNS service...")
-        try:
-            await async_zeroconf.async_unregister_service(service_info)
-            await async_zeroconf.async_close()
-        except Exception as e:
-            print(f"⚠️ mDNS unregister failed: {e}")
-app = FastAPI(title="SalitaKo API", version="0.2.0", lifespan=lifespan)
-@app.get("/")
-async def read_root():
-    local_ip = get_local_ip()
-    return {
-        "message": "Welcome to SalitaKo API",
-        "docs_url": f"http://{local_ip}:8000/docs",
-        "health_check": f"http://{local_ip}:8000/health",
-        "local_ip": local_ip
-    }
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=[
-        "http://localhost:3000",
-        "https://*.hf.space",  # Hugging Face Spaces
-        "*"  # Allow all for development (restrict in production)
-    ],
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-class SessionResult(BaseModel):
-    student_name: str
-    wpm: float
-    fluency_score: float
-    filler_count: int
-    duration_seconds: int
-@app.post("/log-session")
-async def log_session_result(data: SessionResult):
-    """Log session results to a local SQLite database for research analysis."""
-    try:
-        # Connect to a simple file-based DB
-        conn = sqlite3.connect('thesis_data.db')
-        cursor = conn.cursor()
-        # Create table if it doesn't exist
-        cursor.execute('''
-            CREATE TABLE IF NOT EXISTS results (
-                id INTEGER PRIMARY KEY AUTOINCREMENT,
-                student_name TEXT,
-                wpm REAL,
-                fluency_score REAL,
-                filler_count INTEGER,
-                duration INTEGER,
-                timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
-            )
-        ''')
-        # Insert the data
-        cursor.execute('''
-            INSERT INTO results (student_name, wpm, fluency_score, filler_count, duration)
-            VALUES (?, ?, ?, ?, ?)
-        ''', (data.student_name, data.wpm, data.fluency_score, data.filler_count, data.duration_seconds))
-        conn.commit()
-        conn.close()
-        print(f"📝 Logged session for {data.student_name}")
-        return {"status": "logged"}
-    except Exception as e:
-        print(f"❌ Failed to log session: {e}")
-        return {"status": "error", "message": str(e)}
-class AppConfig(BaseModel):
-    update_interval_seconds: int
-    supported_languages: list[str]
-    semantic_score_min: int
-    semantic_score_max: int
-class SessionCreateResponse(BaseModel):
-    session_id: str
-class FillerInfo(BaseModel):
-    count: int
-    fillers_detected: list[str]
-class PaceInfo(BaseModel):
-    wpm: float
-    status: str  # Slow, Normal, Fast
-class ProsodyInfo(BaseModel):
-    volume_db: float | None
-    silence_ratio: float | None
-class Feedback(BaseModel):
-    general: str
-    pacing: str
-    fillers: str
-    coherence: str
-class ChunkAnalysisResponse(BaseModel):
-    transcript: str
-    wpm: float | None
-    filler_count: int | None
-    # Detailed analysis
-    fillers: FillerInfo | None
-    pacing: PaceInfo | None
-    prosody: ProsodyInfo | None
-    coherence_score: float | None
-    feedback: Feedback | None
-    message: str
-# Lightweight response for real-time transcription (no analysis)
-class QuickTranscriptResponse(BaseModel):
-    transcript: str
-    has_speech: bool  # For auto-stop detection
-    message: str
-@app.get("/health")
-async def health_check():
-    return {"status": "ok"}
-@app.get("/config", response_model=AppConfig)
-async def get_config():
-    """Return static configuration for the frontend UI."""
-    return AppConfig(
-        update_interval_seconds=3,
-        supported_languages=["en", "fil"],
-        semantic_score_min=0,
-        semantic_score_max=100,
-    )
-@app.post("/sessions", response_model=SessionCreateResponse)
-async def create_session():
-    """Create a new speaking session and return its ID.
-    For now, the session is not persisted; this is a placeholder
-    to be backed by a database later.
-    """
-    session_id = str(uuid.uuid4())
-    return SessionCreateResponse(session_id=session_id)
-def detect_fillers(text: str) -> FillerInfo:
-    """Detect and count common Filipino filler words."""
-    keywords = [
-        "ano", "ah", "uh", "uhm", "parang", "kasi", "ganun",
-        "e", "eh", "diba", "yung", "bale", "so", "like"
-    ]
-    detected = []
-    count = 0
-    words = re.findall(r"\b\w+\b", text.lower())
-    for word in words:
-        if word in keywords:
-            detected.append(word)
-            count += 1
-    return FillerInfo(count=count, fillers_detected=detected)
-def calculate_pace(transcript: str, duration_seconds: float) -> PaceInfo:
-    """Calculate WPM and classify speed."""
-    words = len(transcript.split())
-    if duration_seconds <= 0:
-        return PaceInfo(wpm=0.0, status="Normal")
-    wpm = (words / duration_seconds) * 60.0
-    if wpm < 100:
-        status = "Slow"
-    elif wpm > 160:
-        status = "Fast"
-    else:
-        status = "Normal"
-    return PaceInfo(wpm=float(f"{wpm:.2f}"), status=status)
-def analyze_prosody(segments: list, duration_seconds: float) -> ProsodyInfo:
-    """Analyze prosody based on segment timings (silence detection)."""
-    if not segments:
-        return ProsodyInfo(volume_db=0.0, silence_ratio=1.0)
-    speech_duration = 0.0
-    for seg in segments:
-        speech_duration += (seg.end - seg.start)
-    silence_duration = max(0.0, duration_seconds - speech_duration)
-    silence_ratio = silence_duration / duration_seconds if duration_seconds > 0 else 0.0
-    return ProsodyInfo(volume_db=None, silence_ratio=float(f"{silence_ratio:.2f}"))
-def calculate_fluency(text: str) -> float:
-    """
-    Calculate a fluency score (1-10) using RoBERTa perplexity (PPL).
-    Lower PPL = More natural/fluent.
-    """
-    global roberta_model, roberta_tokenizer
-    if not roberta_model or not roberta_tokenizer:
-        # Fallback to simple heuristic if model not loaded
-        return check_coherence_heuristic(text)
-    if not text.strip() or len(text.split()) < 2:
-        return 1.0 # Too short
-    try:
-        inputs = roberta_tokenizer(text, return_tensors="pt")
-        if torch.cuda.is_available():
-            inputs = {k: v.to("cuda") for k, v in inputs.items()}
-        with torch.no_grad():
-            outputs = roberta_model(**inputs, labels=inputs["input_ids"])
-            loss = outputs.loss
-            ppl = torch.exp(loss).item()
-        # Normalize PPL to Score (1-10)
-        # Typical coherent text has PPL 5-50.
-        # >100 is likely incoherent.
-        # Score = 10 - (log(PPL) * factor)
-        # PPL 10 -> Score ~8
-        # PPL 100 -> Score ~3
-        import math
-        score = max(1.0, min(10.0, 11.0 - math.log(ppl)))
-        return float(f"{score:.2f}")
-    except Exception as e:
-        print(f"⚠️ RoBERTa analysis failed: {e}")
-        return check_coherence_heuristic(text)
-def check_coherence_heuristic(text: str) -> float:
-    """Heuristic check for coherence (Fallback)."""
-    score = 5.0
-    # Penalize very short fragments
-    if len(text.split()) < 3:
-        score -= 2.0
-    # Penalize excessive repetition
-    words = text.lower().split()
-    if len(words) > 4:
-        unique_words = set(words)
-        ratio = len(unique_words) / len(words)
-        if ratio < 0.5:
-            score -= 2.0
-    return max(1.0, score)
-def generate_feedback(pace: PaceInfo, fillers: FillerInfo, prosody: ProsodyInfo, coherence_score: float) -> Feedback:
-    """Generate Filipino feedback based on metrics."""
-    # Pacing Feedback
-    if pace.status == "Fast":
-        pacing_msg = "Medyo mabilis ang iyong pagsasalita. Subukang bagalan ng kaunti para mas maintindihan."
-    elif pace.status == "Slow":
-        pacing_msg = "Medyo mabagal. Subukang bilisan nang kaunti para mas tuloy-tuloy ang daloy."
-    else:
-        pacing_msg = "Ayos ang iyong bilis! Panatilihin ito."
-    # Filler Feedback
-    if fillers.count > 2:
-        filler_msg = f"Napansin ko ang paggamit ng '{fillers.fillers_detected[0]}'. Subukang mag-pause sandali sa halip na gumamit ng filler words."
-    else:
-        filler_msg = "Mahusay! Malinis ang iyong pagsasalita mula sa mga filler words."
-    # General/Coherence
-    if coherence_score < 3.0:
-        coherence_msg = "Medyo putol-putol ang ideya. Subukang buuin ang pangungusap."
-        general_msg = "Kaya mo yan! Practice pa tayo."
-    else:
-        coherence_msg = "Malinaw ang daloy ng iyong ideya."
-        general_msg = "Maganda ang iyong performance!"
-    return Feedback(
-        general=general_msg,
-        pacing=pacing_msg,
-        fillers=filler_msg,
-        coherence=coherence_msg
-    )
-from fastapi import Form, UploadFile, File
-@app.post("/sessions/{session_id}/transcribe", response_model=QuickTranscriptResponse)
-async def quick_transcribe(
-    session_id: str,
-    file: UploadFile = File(...),
-    prompt: str = Form("")  # Optional previous context
-):
-    """Fast transcription endpoint with context prompt."""
-    audio_bytes = await file.read()
-    def _transcribe() -> tuple[str, bool]:
-        tmp_file = tempfile.NamedTemporaryFile(suffix=".webm", delete=False)
-        try:
-            tmp_file.write(audio_bytes)
-            tmp_file.flush()
-            tmp_file.close()
-            # Use the previous transcript as a prompt to guide Whisper
-            # This fixes "amo" -> "ano" by giving context
-            initial_prompt_text = prompt if prompt else None
-            segments, info = model.transcribe(
-                tmp_file.name,
-                language="tl",     # Force Tagalog/Taglish to prevent Spanish detection
-                task="transcribe",
-                beam_size=5,
-                vad_filter=True,   # Re-enable VAD to help with silence (looping)
-                vad_parameters=dict(min_silence_duration_ms=500),
-                initial_prompt=initial_prompt_text,
-                condition_on_previous_text=False,
-                # Filters to reduce hallucinations/looping:
-                temperature=0.0,
-                compression_ratio_threshold=2.4, # Filter loops
-                log_prob_threshold=-1.0,         # Filter uncertain nonsense (fixed param name)
-                no_speech_threshold=0.6,         # Filter silence
-            )
-            texts = [seg.text.strip() for seg in segments if seg.text]
-            transcript = " ".join(texts).strip()
-            # Consider any non-trivial transcript as speech
-            has_speech = len(transcript) > 2
-            return transcript, has_speech
-        finally:
-            try:
-                os.remove(tmp_file.name)
-            except OSError:
-                pass
-    try:
-        transcript, has_speech = await asyncio.to_thread(_transcribe)
-        return QuickTranscriptResponse(
-            transcript=transcript,
-            has_speech=has_speech,
-            message="OK" if has_speech else "No speech detected"
-        )
-    except Exception as exc:
-        print(f"[transcribe-error] {exc}")
-        return QuickTranscriptResponse(
-            transcript="",
-            has_speech=False,
-            message="Transcription failed"
-        )
-@app.post("/sessions/{session_id}/audio-chunk", response_model=ChunkAnalysisResponse)
-async def upload_audio_chunk(session_id: str, file: UploadFile = File(...)):
-    """Full analysis endpoint - use when recording stops.
-    Uses a local Whisper model (via faster-whisper) so there is
-    no dependency on paid cloud APIs. The audio comes from the
-    browser as WEBM/Opus; we write it to a temporary file and let
-    Whisper handle decoding via ffmpeg.
-    """
-    audio_bytes = await file.read()
-    async def recognize_with_whisper(audio_content: bytes) -> tuple[str, float | None, list]:
-        """Run Whisper transcription in a worker thread.
-        Returns a pair of (transcript, duration_seconds, segments).
-        """
-        def _call() -> tuple[str, float | None, list]:
-            # Use global model instance
-            tmp_file = tempfile.NamedTemporaryFile(suffix=".webm", delete=False)
-            try:
-                tmp_file.write(audio_content)
-                tmp_file.flush()
-                tmp_file.close()
-                segments, info = model.transcribe(
-                    tmp_file.name,
-                    language="tl",  # Force Tagalog to prevent translation to English
-                    task="transcribe",  # Transcribe, don't translate to English
-                    beam_size=5,    # Better accuracy
-                    vad_filter=False,  # Disabled to avoid cutting off speech
-                    condition_on_previous_text=False,  # Faster, no context dependency
-                )
-                segment_list = list(segments)
-                texts: list[str] = []
-                for segment in segment_list:
-                    if segment.text:
-                        texts.append(segment.text.strip())
-                transcript_text = " ".join(texts).strip()
-                duration_seconds: float | None = None
-                # Prefer model-reported duration when available.
-                if getattr(info, "duration", None):
-                    duration_seconds = float(info.duration)  # type: ignore[arg-type]
-                elif segment_list:
-                    start = float(segment_list[0].start or 0.0)
-                    end = float(segment_list[-1].end or 0.0)
-                    if end > start:
-                        duration_seconds = end - start
-                return transcript_text, duration_seconds, segment_list
-            finally:
-                try:
-                    os.remove(tmp_file.name)
-                except OSError:
-                    pass
-        return await asyncio.to_thread(_call)
-    transcript = ""
-    duration_seconds: float | None = None
-    segments: list = []
-    try:
-        transcript, duration_seconds, segments = await recognize_with_whisper(audio_bytes)
-        if transcript:
-            message = "Transcription successful."
-        else:
-            message = "No clear speech detected in this chunk."
-    except Exception as exc:  # pragma: no cover - defensive for runtime issues
-        # Log detailed error on the server side only.
-        print(f"[whisper-error] Failed to transcribe chunk for session {session_id}: {exc}")
-        message = "Transcription skipped for this chunk (audio too short or invalid)."
-        transcript = ""
-    # Run analysis modules
-    # Use fallback duration of 3.0s if undefined, to avoid division by zero
-    safe_duration = duration_seconds if duration_seconds and duration_seconds > 0 else 3.0
-    fillers = detect_fillers(transcript)
-    pace = calculate_pace(transcript, safe_duration)
-    prosody = analyze_prosody(segments, safe_duration)
-    # Use RoBERTa for advanced fluency scoring (or fallback to heuristic)
-    coherence = calculate_fluency(transcript)
-    feedback = generate_feedback(pace, fillers, prosody, coherence)
-    return ChunkAnalysisResponse(
-        transcript=transcript,
-        wpm=pace.wpm,
-        filler_count=fillers.count,
-        fillers=fillers,
-        pacing=pace,
-        prosody=prosody,
-        coherence_score=coherence,
-        feedback=feedback,
-        message=message,
-    )

hgserver/requirements.txt DELETED Viewed

@@ -1,11 +0,0 @@
-# Hugging Face Spaces specific requirements (CPU-only for free tier)
-fastapi
-uvicorn[standard]
-python-multipart
-faster-whisper
-numpy
-scipy
-zeroconf
-transformers
---extra-index-url https://download.pytorch.org/whl/cpu
-torch