Spaces:

bichnhan2701
/

PhoWhisperBaseAPI

Sleeping

App Files Files Community

bichnhan2701 commited on Dec 20, 2025

Commit

7158b5e

1 Parent(s): 557b80a

Update Phowhisper services logic

Browse files

Files changed (19) hide show

.dockerignore +1 -0
.gitignore +2 -3
Dockerfile +11 -18
app/api/transcribe.py +67 -148
app/config/settings.py +14 -23
app/core/asr_engine.py +82 -16
app/infra/metrics.py +7 -13
app/infra/redis_client.py +5 -20
app/jobs/transcribe_job.py +31 -55
app/main.py +5 -41
app/schemas/transcribe.py +1 -8
app/services/mindmap_service.py +0 -56
app/services/nlp_postprocess.py +0 -156
app/services/note_client.py +32 -58
app/services/summary_service.py +0 -35
app/services/text_normalizer.py +0 -74
app/utils/hashing.py +0 -7
start.sh +16 -0
test/conftest.py +0 -11

.dockerignore CHANGED Viewed

@@ -1,6 +1,7 @@
 test/
 *.md
 .myvenv
 __pycache__
 *.pyc
 .DS_Store

 test/
 *.md
 .myvenv
+.myvenv1
 __pycache__
 *.pyc
 .DS_Store

.gitignore CHANGED Viewed

@@ -4,6 +4,5 @@ __pycache__/
 *.pyc
 .env
 *.md
-docker-compose.yml
-examples/
-docs/

 *.pyc
 .env
 *.md
+docs/
+*.json

Dockerfile CHANGED Viewed

@@ -3,34 +3,27 @@ FROM python:3.11-slim
 ENV PYTHONDONTWRITEBYTECODE=1 \
     PYTHONUNBUFFERED=1 \
     TMP_DIR=/tmp/uploads \
-    PORT=7860
-ENV HF_HOME=/tmp/huggingface
-ENV TRANSFORMERS_CACHE=/tmp/huggingface
-# system deps (single RUN to minimize layers)
 RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
-    ffmpeg libsndfile1 git build-essential wget curl && \
     rm -rf /var/lib/apt/lists/*
 WORKDIR /app
-# install python deps using cached layer
-COPY requirements.txt /app/requirements.txt
 RUN pip install --upgrade pip && \
-    pip install --no-cache-dir -r /app/requirements.txt
-# copy app code
-COPY . /app
-# create tmp dir and non-root user
-RUN mkdir -p ${TMP_DIR} && groupadd -r app && useradd -r -g app app && \
-    chown -R app:app /app ${TMP_DIR}
-USER app
 EXPOSE ${PORT}
-HEALTHCHECK --interval=30s --timeout=3s --start-period=10s \
   CMD curl -f http://localhost:${PORT}/health || exit 1
-CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]

 ENV PYTHONDONTWRITEBYTECODE=1 \
     PYTHONUNBUFFERED=1 \
     TMP_DIR=/tmp/uploads \
+    PORT=7860 \
+    HF_HOME=/tmp/huggingface \
+    TRANSFORMERS_CACHE=/tmp/huggingface
 RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
+    ffmpeg libsndfile1 git build-essential wget curl redis-server && \
     rm -rf /var/lib/apt/lists/*
 WORKDIR /app
+COPY requirements.txt .
 RUN pip install --upgrade pip && \
+    pip install --no-cache-dir -r requirements.txt rq
+COPY . .
+COPY start.sh /start.sh
+RUN chmod +x /start.sh && mkdir -p ${TMP_DIR}
 EXPOSE ${PORT}
+HEALTHCHECK --interval=30s --timeout=3s --start-period=15s \
   CMD curl -f http://localhost:${PORT}/health || exit 1
+CMD ["/start.sh"]

app/api/transcribe.py CHANGED Viewed

@@ -7,11 +7,19 @@ from fastapi.responses import JSONResponse
 from pathlib import Path
 from typing import Optional
 import time
-from app.core.audio_utils import save_upload_file, get_audio_info, ensure_wav_16k_mono, make_temp_path, download_file_from_url
-from app.core.asr_engine import load_model, transcribe_file, transcribe_file_chunks
 from app.config import settings
-from app.services.nlp_postprocess import normalize_and_extract
-# Summary and mindmap generation moved to Note Service; do not import here
 from app.services.note_client import NoteServiceClient
 from rq import Queue
 from app.infra.redis_client import redis_client
@@ -21,14 +29,9 @@ from app.infra.metrics import (
     REQUEST_COUNT,
     REQUEST_LATENCY,
     ASR_DURATION,
-    NORMALIZE_DURATION,
-    ERROR_COUNT,
 )
 router = APIRouter()
-# load model on import/startup to avoid repeated initialization
-# you may prefer to call load_model in FastAPI startup event
 ASR_MODEL = None
 @router.on_event("startup")
@@ -100,36 +103,39 @@ async def transcribe(file: UploadFile = File(...)):
             model = ASR_MODEL or await asyncio.to_thread(load_model, 30)
             with ASR_DURATION.labels(endpoint).time():
                 text = await asyncio.to_thread(transcribe_file, model, tmp_wav, 30.0, 5.0)
-                chunks = await asyncio.to_thread(transcribe_file_chunks, model, tmp_wav, 30.0, 5.0)
-            # normalize via Gemini (already async safe in your service)
-            with NORMALIZE_DURATION.labels(endpoint).time():
-                # normalized_text = await normalize_text(text)
-                nlp = await normalize_and_extract(text)
-                normalized_text = nlp["normalized_text"]
-                keywords = nlp["keywords"]
-                # Summary / mindmap are generated by the Note Service; omit local generation
-                summary = None
-                mindmap = None
             info2 = get_audio_info(tmp_wav) or {}
             # persist to Note Service (async HTTP)
             payload = {
                 "note_id": note_id,
                 "raw_text": text,
-                "normalized_text": normalized_text,
-                "keywords": keywords,
-                "summary": summary,
-                "mindmap": mindmap,
-                "duration": info2.get("duration"),
-                "sample_rate": info2.get("samplerate"),
-                "chunks": chunks,
-                "asr_model": "PhoWhisper-base",
-                "normalization_model": "gemini-1.5",
-                "generate": ["summary", "mindmap"],  # <-- thêm dòng này
             }
-            await note_service.save_transcript(payload)
             duration = time.perf_counter() - start_time
             logging.info(f"/transcribe success note_id={note_id} duration={duration:.2f}s audio_dur={info2.get('duration')}")
@@ -138,22 +144,11 @@ async def transcribe(file: UploadFile = File(...)):
                 status_code=200,
                 content={
                     "note_id": note_id,
-                    "status": "transcribed",
                     "duration": info2.get("duration"),
                 },
             )
-        except HTTPException:
-            status_label = "http_error"
-            ERROR_COUNT.labels(endpoint, status_label).inc()
-            raise
-        except Exception as e:
-            status_label = "error"
-            ERROR_COUNT.labels(endpoint, status_label).inc()
-            logging.exception(f"/transcribe failed note_id={note_id}")
-            raise HTTPException(status_code=500, detail=f"Transcription failed: {e}")
         finally:
             # cleanup
             for p in [tmp_in, tmp_wav]:
@@ -235,32 +230,39 @@ async def transcribe_url(payload: dict):
                 chunks = await asyncio.to_thread(
                     transcribe_file_chunks, model, tmp_wav, 30.0, 5.0
                 )
-            with NORMALIZE_DURATION.labels(endpoint).time():
-                nlp = await normalize_and_extract(text)
-                normalized_text = nlp["normalized_text"]
-                keywords = nlp["keywords"]
-                # Summary / mindmap are generated by the Note Service; omit local generation
-                summary = None
-                mindmap = None
             # 5. Persist to Note Service
             payload = {
                 "note_id": note_id,
                 "raw_text": text,
-                "normalized_text": normalized_text,
-                "keywords": keywords,
-                "summary": summary,
-                "mindmap": mindmap,
-                "duration": info.get("duration"),
-                "sample_rate": info.get("samplerate"),
-                "chunks": chunks,
-                "asr_model": "PhoWhisper-base",
-                "normalization_model": "gemini-1.5",
-                "generate": ["summary", "mindmap"],  # <-- thêm dòng này
             }
-            await note_service.save_transcript(payload)
             duration = time.perf_counter() - start_time
             logging.info(
@@ -273,98 +275,15 @@ async def transcribe_url(payload: dict):
                 status_code=200,
                 content={
                     "note_id": note_id,
-                    "status": "transcribed",
                     "duration": info.get("duration"),
                 },
             )
-        except HTTPException:
-            status_label = "http_error"
-            ERROR_COUNT.labels(endpoint, status_label).inc()
-            raise
-        except Exception as e:
-            status_label = "error"
-            ERROR_COUNT.labels(endpoint, status_label).inc()
-            logging.exception(f"/transcribe-url failed note_id={note_id}")
-            raise HTTPException(status_code=500, detail=str(e))
         finally:
             for p in [tmp_in, tmp_wav]:
                 try:
                     if p and os.path.exists(p):
                         os.remove(p)
                 except Exception:
-                    pass
-# @router.post("/transcribe-url", response_model=TranscribeResponse)
-# async def transcribe_url(payload: dict):
-#     audio_url = payload.get("audio_url")
-#     user_id = payload.get("user_id")
-#     if not audio_url:
-#         raise HTTPException(status_code=400, detail="audio_url required")
-#     if not user_id:
-#         raise HTTPException(status_code=400, detail="user_id required")
-#     tmp_in = make_temp_path(suffix=Path(audio_url).suffix or ".tmp")
-#     tmp_wav = None
-#     note_service = NoteServiceClient()
-#     note_id = str(uuid.uuid4())
-#     start_time = time.perf_counter()
-#     try:
-#         # download blocking -> thread
-#         await asyncio.to_thread(download_file_from_url, audio_url, tmp_in)
-#         _ensure_file_limits(tmp_in)
-#         tmp_wav = make_temp_path(suffix=".wav")
-#         await asyncio.to_thread(ensure_wav_16k_mono, tmp_in, tmp_wav)
-#         model = ASR_MODEL or await asyncio.to_thread(load_model, 30)
-#         text = await asyncio.to_thread(transcribe_file, model, tmp_wav, 30.0, 5.0)
-#         chunks = await asyncio.to_thread(transcribe_file_chunks, model, tmp_wav, 30.0, 5.0)
-#         # NLP pipeline: normalize, extract keywords, then summary and mindmap
-#         nlp = await normalize_and_extract(text)
-#         normalized_text = nlp.get("normalized_text", text)
-#         keywords = nlp.get("keywords", [])
-#         summary = await generate_summary(normalized_text)
-#         mindmap = await generate_mindmap(normalized_text)
-#         info2 = get_audio_info(tmp_wav) or {}
-#         await note_service.save_transcript(
-#             note_id=note_id,
-#             raw_text=text,
-#             normalized_text=normalized_text,
-#             keywords=keywords,
-#             summary=summary,
-#             mindmap=mindmap,
-#             duration=info2.get("duration"),
-#             sample_rate=info2.get("samplerate"),
-#             chunks=chunks,
-#             asr_model="PhoWhisper-base",
-#             normalization_model="gemini-1.5"
-#         )
-#         duration = time.perf_counter() - start_time
-#         logging.info(f"/transcribe-url success note_id={note_id} duration={duration:.2f}s audio_dur={info2.get('duration')}")
-#         return JSONResponse(status_code=200, content={
-#             "note_id": note_id,
-#             "status": "transcribed",
-#             "duration": info2.get("duration")
-#         })
-#     except HTTPException:
-#         raise
-#     except Exception as e:
-#         logging.exception(f"/transcribe-url failed note_id={note_id}")
-#         raise HTTPException(status_code=500, detail=f"Transcription failed: {e}")
-#     finally:
-#         for p in [tmp_in, tmp_wav]:
-#             try:
-#                 if p and os.path.exists(p):
-#                     os.remove(p)
-#             except Exception:
-#                 pass

 from pathlib import Path
 from typing import Optional
 import time
+from app.core.audio_utils import (
+    save_upload_file,
+    get_audio_info,
+    ensure_wav_16k_mono,
+    make_temp_path,
+    download_file_from_url
+)
+from app.core.asr_engine import (
+    load_model,
+    transcribe_file,
+    transcribe_file_chunks
+)
 from app.config import settings
 from app.services.note_client import NoteServiceClient
 from rq import Queue
 from app.infra.redis_client import redis_client
     REQUEST_COUNT,
     REQUEST_LATENCY,
     ASR_DURATION,
 )
 router = APIRouter()
 ASR_MODEL = None
 @router.on_event("startup")
             model = ASR_MODEL or await asyncio.to_thread(load_model, 30)
             with ASR_DURATION.labels(endpoint).time():
                 text = await asyncio.to_thread(transcribe_file, model, tmp_wav, 30.0, 5.0)
+                chunks = await asyncio.to_thread(transcribe_file_chunks, model, tmp_wav, 30.0, 5.0)
+            # 🔥 DROP invalid chunks
+            chunks = [
+                c for c in chunks
+                if c.get("text", "").strip() and c.get("end", 0) > c.get("start", 0)
+            ]
+            note_status = "transcribed" if chunks and any(c.get("text", "").strip() for c in chunks) else "error"
             info2 = get_audio_info(tmp_wav) or {}
             # persist to Note Service (async HTTP)
             payload = {
                 "note_id": note_id,
+                "type": "audio",
+                "status": note_status,
                 "raw_text": text,
+                "metadata": {
+                    "audio": {
+                    "duration": info2.get("duration"),
+                    "sample_rate": info2.get("samplerate"),
+                    "chunks": chunks,
+                    "asr_model": "PhoWhisper-base"
+                    }
+                },
+                "generate": ["normalize", "keywords", "summary", "mindmap"]
             }
+            logging.info(
+                "Create audio note note_id=%s status=%s chunks=%d text_len=%d",
+                note_id,
+                note_status,
+                len(chunks) if chunks else 0,
+                len(text or ""),
+            )
+            await note_service.create_audio_note(payload)
             duration = time.perf_counter() - start_time
             logging.info(f"/transcribe success note_id={note_id} duration={duration:.2f}s audio_dur={info2.get('duration')}")
                 status_code=200,
                 content={
                     "note_id": note_id,
+                    "status": note_status,
                     "duration": info2.get("duration"),
                 },
             )
         finally:
             # cleanup
             for p in [tmp_in, tmp_wav]:
                 chunks = await asyncio.to_thread(
                     transcribe_file_chunks, model, tmp_wav, 30.0, 5.0
                 )
+            # 🔥 DROP invalid chunks
+            chunks = [
+                c for c in chunks
+                if c.get("text", "").strip() and c.get("end", 0) > c.get("start", 0)
+            ]
+            note_status = "transcribed" if chunks and any(c.get("text", "").strip() for c in chunks) else "error"
             # 5. Persist to Note Service
             payload = {
                 "note_id": note_id,
+                "type": "audio",
+                "status": note_status,
                 "raw_text": text,
+                "metadata": {
+                    "audio": {
+                    "duration": info.get("duration"),
+                    "sample_rate": info.get("samplerate"),
+                    "chunks": chunks,
+                    "asr_model": "PhoWhisper-base"
+                    }
+                },
+                "generate": ["normalize", "keywords", "summary", "mindmap"]
             }
+            logging.info(
+                "Create audio note note_id=%s status=%s chunks=%d text_len=%d",
+                note_id,
+                note_status,
+                len(chunks) if chunks else 0,
+                len(text or ""),
+            )
+            await note_service.create_audio_note(payload)
             duration = time.perf_counter() - start_time
             logging.info(
                 status_code=200,
                 content={
                     "note_id": note_id,
+                    "status": note_status,
                     "duration": info.get("duration"),
                 },
             )
         finally:
             for p in [tmp_in, tmp_wav]:
                 try:
                     if p and os.path.exists(p):
                         os.remove(p)
                 except Exception:
+                    pass

app/config/settings.py CHANGED Viewed

@@ -1,34 +1,25 @@
-# App settings and configuration
 import os
-# Limits & model setting
-MAX_UPLOAD_BYTES = int(os.getenv("MAX_UPLOAD_BYTES", 100 * 1024 * 1024))   # 100 MB
-MAX_DURATION_SECS = int(os.getenv("MAX_DURATION_SECS", 60 * 60))          # 1 hour
-MODEL_NAME = os.getenv("MODEL_NAME", "vinai/PhoWhisper-base")  # change if desired
-# Temporary storage
 TMP_DIR = os.getenv("TMP_DIR", "/tmp/uploads")
 os.makedirs(TMP_DIR, exist_ok=True)
-# Cloud credentials (set as HF Spaces secrets or env)
-# FIREBASE_SERVICE_ACCOUNT = os.getenv("FIREBASE_SERVICE_ACCOUNT_JSON")  # optional
-# CLOUDINARY_URL = os.getenv("CLOUDINARY_URL")  # optional
-# Gemini API Key (for text normalization)
-GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "")
 GEMINI_MODEL = os.getenv("GEMINI_MODEL", "")
-# External services
-NOTE_SERVICE_URL = os.getenv(
-    "NOTE_SERVICE_URL",
-    "https://bichnhan2701-NoteServicesAPI.hf.space"
-)
-# HTTP timeouts
-HTTPX_TIMEOUT = float(os.getenv("HTTPX_TIMEOUT", "10.0"))
-# Redis URL
-REDIS_URL = os.getenv("REDIS_URL", "redis://localhost:6379/0")
-REDIS_HOST = os.getenv("REDIS_HOST", "localhost")
 REDIS_PORT = int(os.getenv("REDIS_PORT", "6379"))
 REDIS_DB = int(os.getenv("REDIS_DB", "0"))

 import os
+MAX_UPLOAD_BYTES = int(os.getenv("MAX_UPLOAD_BYTES", 100 * 1024 * 1024))
+MAX_DURATION_SECS = int(os.getenv("MAX_DURATION_SECS", 60 * 60))
+MODEL_NAME = os.getenv("MODEL_NAME", "vinai/PhoWhisper-base")
 TMP_DIR = os.getenv("TMP_DIR", "/tmp/uploads")
 os.makedirs(TMP_DIR, exist_ok=True)
 GEMINI_MODEL = os.getenv("GEMINI_MODEL", "")
+NOTE_SERVICE_URL = os.getenv("NOTE_SERVICE_URL")
+if not NOTE_SERVICE_URL:
+    raise RuntimeError("NOTE_SERVICE_URL must be set")
+REDIS_HOST = os.getenv("REDIS_HOST", "127.0.0.1")
 REDIS_PORT = int(os.getenv("REDIS_PORT", "6379"))
 REDIS_DB = int(os.getenv("REDIS_DB", "0"))
+REDIS_URL = os.getenv(
+    "REDIS_URL",
+    f"redis://{REDIS_HOST}:{REDIS_PORT}/{REDIS_DB}"
+)
+HTTPX_TIMEOUT = float(os.getenv("HTTPX_TIMEOUT", "10.0"))

app/core/asr_engine.py CHANGED Viewed

@@ -3,6 +3,11 @@
 import logging
 from transformers import pipeline
 from app.config.settings import MODEL_NAME
 _model = None
@@ -17,6 +22,51 @@ def load_model(chunk_length_s: int = None):
         logging.info("Model loaded")
     return _model
 # Heuristic merge for chunked transcripts
 def merge_transcripts(prev_text: str, new_text: str, max_overlap_words: int = 8) -> str:
     if not prev_text:
@@ -41,14 +91,10 @@ def merge_transcripts(prev_text: str, new_text: str, max_overlap_words: int = 8)
     return prev_text.rstrip() + " " + new_text.lstrip()
 def transcribe_long_audio(model, wav_path: str, chunk_length_s: float = 30.0, overlap_s: float = 5.0, parallel: bool = False) -> str:
-    from app.core.chunking import split_audio_to_chunks
-    from app.core.audio_utils import make_temp_path
-    import os
     chunks = split_audio_to_chunks(wav_path, chunk_length_s=chunk_length_s, overlap_s=overlap_s)
     logging.info(f"Split into {len(chunks)} chunks")
     texts = []
     if parallel:
-        from concurrent.futures import ThreadPoolExecutor, as_completed
         def process_chunk(path):
             try:
                 out = model(path)
@@ -80,7 +126,6 @@ def transcribe_long_audio(model, wav_path: str, chunk_length_s: float = 30.0, ov
     return merged
 def transcribe_file(model, wav_path: str, max_chunk_length: float = 30.0, overlap_s: float = 5.0):
-    from app.core.audio_utils import get_audio_info
     info = get_audio_info(wav_path) or {}
     duration = info.get("duration", 0.0)
     if duration and duration > max_chunk_length * 1.1:
@@ -91,33 +136,54 @@ def transcribe_file(model, wav_path: str, max_chunk_length: float = 30.0, overla
         return out.get("text") or ""
     return str(out)
-def transcribe_file_chunks(model, wav_path: str, max_chunk_length: float = 30.0, overlap_s: float = 5.0):
-    from app.core.audio_utils import get_audio_info, make_temp_path
-    from app.core.chunking import ffmpeg_extract_segment
-    import os
     info = get_audio_info(wav_path) or {}
     duration = info.get("duration", 0.0)
     step = max_chunk_length - overlap_s
     if step <= 0:
         raise ValueError("max_chunk_length must be > overlap_s")
     starts = []
     t = 0.0
     while t < duration:
         starts.append(t)
         t += step
-    results = []
     for i, s in enumerate(starts):
         chunk_end = min(s + max_chunk_length, duration)
         dst = make_temp_path(suffix=f".chunk{i}.wav")
         ffmpeg_extract_segment(wav_path, s, chunk_end - s, dst)
         out = model(dst)
-        if isinstance(out, dict):
-            text = out.get("text", "")
-        else:
-            text = str(out)
-        results.append({"start": s, "end": chunk_end, "text": text})
         try:
             os.remove(dst)
         except Exception:
             pass
-    return results

 import logging
 from transformers import pipeline
 from app.config.settings import MODEL_NAME
+from app.core.chunking import split_audio_to_chunks, ffmpeg_extract_segment
+from app.core.audio_utils import make_temp_path
+import os
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from app.core.audio_utils import get_audio_info, make_temp_path
 _model = None
         logging.info("Model loaded")
     return _model
+def merge_chunks(chunks, max_overlap_words=12):
+    merged = []
+    for ch in chunks:
+        if not merged:
+            merged.append(ch)
+            continue
+        prev = merged[-1]
+        merged_text = merge_transcripts(
+            prev["text"],
+            ch["text"],
+            max_overlap_words=max_overlap_words
+        )
+        if merged_text != prev["text"]:
+            prev["text"] = merged_text
+            prev["end"] = ch["end"]
+        else:
+            merged.append(ch)
+    return merged
+def normalize_chunks(chunks):
+    normalized = []
+    last_end = 0.0
+    for ch in chunks:
+        start = max(ch["start"], last_end)
+        end = max(start, ch["end"])
+        text = ch["text"].strip()
+        if not text:
+            continue
+        normalized.append({
+            "start": round(start, 3),
+            "end": round(end, 3),
+            "text": text
+        })
+        last_end = end
+    return normalized
 # Heuristic merge for chunked transcripts
 def merge_transcripts(prev_text: str, new_text: str, max_overlap_words: int = 8) -> str:
     if not prev_text:
     return prev_text.rstrip() + " " + new_text.lstrip()
 def transcribe_long_audio(model, wav_path: str, chunk_length_s: float = 30.0, overlap_s: float = 5.0, parallel: bool = False) -> str:
     chunks = split_audio_to_chunks(wav_path, chunk_length_s=chunk_length_s, overlap_s=overlap_s)
     logging.info(f"Split into {len(chunks)} chunks")
     texts = []
     if parallel:
         def process_chunk(path):
             try:
                 out = model(path)
     return merged
 def transcribe_file(model, wav_path: str, max_chunk_length: float = 30.0, overlap_s: float = 5.0):
     info = get_audio_info(wav_path) or {}
     duration = info.get("duration", 0.0)
     if duration and duration > max_chunk_length * 1.1:
         return out.get("text") or ""
     return str(out)
+def transcribe_file_chunks(
+    model,
+    wav_path: str,
+    max_chunk_length: float = 30.0,
+    overlap_s: float = 5.0,
+):
     info = get_audio_info(wav_path) or {}
     duration = info.get("duration", 0.0)
     step = max_chunk_length - overlap_s
     if step <= 0:
         raise ValueError("max_chunk_length must be > overlap_s")
     starts = []
     t = 0.0
     while t < duration:
         starts.append(t)
         t += step
+    raw_chunks = []
     for i, s in enumerate(starts):
         chunk_end = min(s + max_chunk_length, duration)
         dst = make_temp_path(suffix=f".chunk{i}.wav")
         ffmpeg_extract_segment(wav_path, s, chunk_end - s, dst)
         out = model(dst)
+        text = out.get("text", "") if isinstance(out, dict) else str(out)
+        raw_chunks.append({
+            "start": s,
+            "end": chunk_end,
+            "text": text
+        })
         try:
             os.remove(dst)
         except Exception:
             pass
+    # 🔽 CHUỖI XỬ LÝ CHUẨN
+    merged = merge_chunks(raw_chunks)
+    normalized = normalize_chunks(merged)
+    logging.info(
+        "ASR result: raw=%d merged=%d normalized=%d",
+        len(raw_chunks),
+        len(merged),
+        len(normalized),
+    )
+    return normalized

app/infra/metrics.py CHANGED Viewed

@@ -1,4 +1,6 @@
-from prometheus_client import Counter, Histogram
 REQUEST_COUNT = Counter(
     "asr_requests_total",
@@ -6,7 +8,6 @@ REQUEST_COUNT = Counter(
     ["endpoint", "status"]
 )
 REQUEST_LATENCY = Histogram(
     "asr_request_latency_seconds",
     "ASR request latency",
@@ -19,14 +20,7 @@ ASR_DURATION = Histogram(
     ["endpoint"]
 )
-NORMALIZE_DURATION = Histogram(
-    "normalize_duration_seconds",
-    "Text normalization duration",
-    ["endpoint"]
-)
-ERROR_COUNT = Counter(
-    "asr_error_total",
-    "Total ASR errors",
-    ["endpoint", "error_type"]
-)

+from prometheus_client import Counter, Histogram, generate_latest
+from fastapi import FastAPI
+from fastapi.responses import Response
 REQUEST_COUNT = Counter(
     "asr_requests_total",
     ["endpoint", "status"]
 )
 REQUEST_LATENCY = Histogram(
     "asr_request_latency_seconds",
     "ASR request latency",
     ["endpoint"]
 )
+def setup_metrics(app: FastAPI):
+    @app.get("/metrics")
+    def metrics():
+        return Response(generate_latest(), media_type="text/plain")

app/infra/redis_client.py CHANGED Viewed

@@ -1,22 +1,7 @@
-# import os
-# import redis
-# from app.config.settings import REDIS_URL
-# redis_client = redis.Redis.from_url(
-#     REDIS_URL,
-#     decode_responses=True
-# )
-import os
 import redis
-from app.config.settings import REDIS_HOST, REDIS_PORT, REDIS_DB, REDIS_URL
-if REDIS_URL:
-    redis_client = redis.from_url(REDIS_URL)
-else:
-    redis_client = redis.Redis(
-        host=REDIS_HOST,
-        port=REDIS_PORT,
-        db=REDIS_DB,
-    )

 import redis
+from app.config.settings import REDIS_URL
+redis_client = redis.from_url(
+    REDIS_URL,
+    decode_responses=True,
+)

app/jobs/transcribe_job.py CHANGED Viewed

@@ -1,65 +1,41 @@
 import asyncio
-from app.core.asr_engine import load_model, transcribe_file
 from app.services.note_client import NoteServiceClient
-from app.services.nlp_postprocess import normalize_and_extract
-def transcribe_job(tmp_wav: str, note_id: str):
     model = load_model()
-    raw_text = transcribe_file(model, tmp_wav, 30.0, 5.0)
-    nlp = asyncio.run(normalize_and_extract(raw_text))
-    payload = {
-        "note_id": note_id,
-        "raw_text": raw_text,
-        "normalized_text": nlp["normalized_text"],
-        "keywords": nlp["keywords"],
-        "chunks": [],
-        "duration": None,
-        "sample_rate": None,
-        "asr_model": "PhoWhisper-base",
-        "normalization_model": "gemini-1.5",
-        "generate": ["summary", "mindmap"]
-    }
-    note_service = NoteServiceClient()
-    asyncio.run(note_service.save_transcript(payload))
-    return True
-# from app.core.asr_engine import load_model, transcribe_file
-# from app.services.note_client import NoteServiceClient
-# from app.services.nlp_postprocess import normalize_and_extract
-# from app.services.summary_service import generate_summary
-# from app.services.mindmap_service import generate_mindmap
-# # This function will be run by RQ worker
-# def transcribe_job(tmp_wav: str, note_id: str):
-#     model = load_model()
-#     raw_text = transcribe_file(model, tmp_wav, 30.0, 5.0)
-#     nlp = asyncio.run(normalize_and_extract(raw_text))
-#     normalized = nlp["normalized_text"]
-#     keywords = nlp["keywords"]
-#     summary = asyncio.run(generate_summary(normalized))
-#     mindmap = asyncio.run(generate_mindmap(normalized))
-#     note_service = NoteServiceClient()
-#     # normalize_text có thể là async, nhưng RQ chỉ chạy sync nên cần chạy event loop nếu cần
-#     import asyncio
-#     asyncio.run(
-#         note_service.save_transcript(
-#             note_id=note_id,
-#             raw_text=raw_text,
-#             normalized_text=normalized,
-#             keywords=keywords,
-#             summary=summary,
-#             mindmap=mindmap,
-#             duration=None,
-#             sample_rate=None,
-#             chunks=None,
-#             asr_model="PhoWhisper-base",
-#             normalization_model="gemini-1.5",
-#         )
-#     )
-#     return True

 import asyncio
+from app.core.asr_engine import load_model, transcribe_file, transcribe_file_chunks
 from app.services.note_client import NoteServiceClient
+from app.core.audio_utils import get_audio_info
+def transcribe_job(wav_path: str, note_id: str, user_id: str | None = None):
     model = load_model()
+    # 🔥 ASR giống hệt API sync
+    text = transcribe_file(model, wav_path, 30.0, 5.0)
+    chunks = transcribe_file_chunks(model, wav_path, 30.0, 5.0)
+    # drop invalid chunks (defensive)
+    chunks = [
+        c for c in chunks
+        if c.get("text", "").strip() and c.get("end", 0) > c.get("start", 0)
+    ]
+    note_status = "transcribed" if chunks else "error"
+    info = get_audio_info(wav_path) or {}
+    payload = {
+        "note_id": note_id,
+        "type": "audio",
+        "status": note_status,
+        "raw_text": text,
+        "metadata": {
+            "audio": {
+                "duration": info.get("duration"),
+                "sample_rate": info.get("samplerate"),
+                "chunks": chunks,
+                "asr_model": "PhoWhisper-base",
+            },
+            "client": {"user_id": user_id},
+        },
+        "generate": ["normalize", "keywords", "summary", "mindmap"],
+    }
+    client = NoteServiceClient()
+    asyncio.run(client.create_audio_note(payload))

app/main.py CHANGED Viewed

@@ -1,48 +1,12 @@
-from fastapi import FastAPI, Response
-from prometheus_client import generate_latest
-import asyncio
-import logging
-from fastapi.middleware.cors import CORSMiddleware
 from app.api.transcribe import router as transcribe_router
-from app.core.asr_engine import load_model
-app = FastAPI(title="PhoWhisper ASR API")
-# Preload ASR model at startup
-@app.on_event("startup")
-async def preload_asr_model():
-    # Load model in thread to avoid blocking event loop
-    logging.info("Preloading ASR model at startup...")
-    await asyncio.to_thread(load_model, 30)
-    logging.info("ASR model preloaded.")
-# CORS — tighten in prod
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_methods=["GET","POST","OPTIONS"],
-    allow_headers=["*"],
-)
-# --- OLD LOGIC: Đã chuyển sang app/api/transcribe.py ---
-# - Định nghĩa endpoint trực tiếp
-# - Chứa toàn bộ logic xử lý
-# - Đã refactor thành router riêng và tách core/service
-# Health check (có thể giữ lại nếu muốn)
 @app.get("/health")
 def health():
     return {"status": "ok"}
-# Expose /metrics endpoint for Prometheus
-@app.get("/metrics")
-def metrics():
-    return Response(generate_latest(), media_type="text/plain")
-# Include API routers
-app.include_router(transcribe_router)

+from fastapi import FastAPI
 from app.api.transcribe import router as transcribe_router
+from app.infra.metrics import setup_metrics
+app = FastAPI(title="PhoWhisper ASR Service")
+setup_metrics(app)
+app.include_router(transcribe_router)
 @app.get("/health")
 def health():
     return {"status": "ok"}

app/schemas/transcribe.py CHANGED Viewed

@@ -1,12 +1,5 @@
-# Request/Response models for transcription
 from pydantic import BaseModel
-from typing import List, Optional
-class Chunk(BaseModel):
-    start: float
-    end: float
-    text: str
 class TranscribeResponse(BaseModel):
     note_id: str

 from pydantic import BaseModel
+from typing import Optional
 class TranscribeResponse(BaseModel):
     note_id: str

app/services/mindmap_service.py DELETED Viewed

@@ -1,56 +0,0 @@
-# import asyncio, json
-# from app.config.settings import GEMINI_API_KEY
-# import google.generativeai as genai
-# if GEMINI_API_KEY:
-#     genai.configure(api_key=GEMINI_API_KEY)
-#     _model = genai.GenerativeModel("gemini-pro")
-# else:
-#     _model = None
-# async def generate_mindmap(text: str) -> dict:
-#     if not _model:
-#         return {}
-#     prompt = f"""
-# Bạn là chuyên gia tạo Sơ đồ tư duy. Hãy phân tích văn bản sau và tạo cấu trúc JSON Mindmap.
-# Yêu cầu:
-# 1. Xác định Ý chính làm Root.
-# 2. Phân tách ý phụ thành nhánh con (tối đa 3 cấp).
-# 3. Nhãn (label) ngắn gọn (< 7 từ).
-# 4. Màu sắc (colorHex): Root="#6200EE", Con="#F59E2B", "#2ECF9A", "#2F9BFF".
-# Cấu trúc JSON bắt buộc (Chỉ trả về JSON):
-# {{
-#     "root": {{
-#         "label": "Chủ đề",
-#         "colorHex": "#6200EE",
-#         "children": [
-#             {{
-#                 "label": "Ý 1",
-#                 "colorHex": "#F59E2B",
-#                 "children": []
-#             }}
-#         ]
-#     }}
-# }}
-# Văn bản:
-# {text}
-# """
-#     loop = asyncio.get_event_loop()
-#     def call():
-#         r = _model.generate_content(prompt)
-#         return r.text
-#     raw = await loop.run_in_executor(None, call)
-#     start = raw.find("{")
-#     end = raw.rfind("}")
-#     if start != -1 and end != -1:
-#         return json.loads(raw[start:end+1])
-#     return {}

app/services/nlp_postprocess.py DELETED Viewed

@@ -1,156 +0,0 @@
-import asyncio
-import json
-import logging
-import random
-import time
-from app.infra.redis_client import redis_client
-from app.utils.hashing import sha256
-from app.config.settings import GEMINI_API_KEY, GEMINI_MODEL
-# New official client
-try:
-    import google.genai as genai
-    from google.api_core.exceptions import GoogleAPIError  # optional but useful
-except Exception:
-    genai = None
-    # fallback exception type so except GoogleAPIError still works
-    class GoogleAPIError(Exception):
-        pass
-CACHE_TTL = 60 * 60 * 24 * 3  # 3 days
-# Retry settings for transient model errors (503 / UNAVAILABLE)
-RETRY_MAX_ATTEMPTS = 3
-RETRY_BASE_BACKOFF = 1.0
-# Tạo client Gemini nếu có API key
-_gemini_client = None
-_GEMINI_MODEL = GEMINI_MODEL
-if GEMINI_API_KEY and genai is not None:
-    try:
-        _gemini_client = genai.Client(api_key=GEMINI_API_KEY)
-        logging.info(f"[nlp_postprocess] Initialized google.genai client with model={_GEMINI_MODEL}")
-    except Exception as e:
-        logging.exception(f"[nlp_postprocess] Failed to init google.genai client: {e}")
-        _gemini_client = None
-elif GEMINI_API_KEY and genai is None:
-    logging.warning("[nlp_postprocess] google.genai package not installed; GEMINI API disabled")
-else:
-    logging.warning("[nlp_postprocess] GEMINI_API_KEY is not set, using raw_text as normalization fallback")
-async def normalize_and_extract(raw_text: str) -> dict:
-    """
-    return {
-      "normalized_text": "...",
-      "keywords": [...]
-    }
-    """
-    cache_key = f"nlp:{sha256(raw_text)}"
-    # 1) Try get from Redis cache (best effort)
-    try:
-        cached = redis_client.get(cache_key)
-        if cached:
-            return json.loads(cached)
-    except Exception as e:
-        logging.warning(f"[nlp_postprocess] Redis GET failed, skip cache: {e}")
-    # 2) Default fallback result (if no model or error)
-    result = {
-        "normalized_text": raw_text,
-        "keywords": [],
-    }
-    # 3) Call Gemini if available
-    if _gemini_client:
-        prompt = f"""
-Bạn là một hệ thống Xử lý Hậu kỳ NLP (NLP Post-Processing) Tiếng Việt.
-Đầu vào là văn bản thô (raw transcript), có thể thiếu dấu câu và sai chính tả do nhận dạng giọng nói.
-Nhiệm vụ (Trả về JSON duy nhất):
-1. Sửa lỗi chính tả ASR, thêm dấu câu, viết hoa chuẩn xác, loại bỏ các từ bị lặp lại vô nghĩa.
-2. Trích xuất danh sách từ khóa quan trọng (keywords) liên quan đến chủ đề, độ dài từ 1-4 từ.
-Văn bản đầu vào:
-\"\"\"{raw_text}\"\"\"
-Cấu trúc JSON bắt buộc (chỉ trả JSON, không giải thích thêm):
-{{
-  "normalizedText": "Văn bản đã sửa hoàn chỉnh...",
-  "keywords": ["Từ khóa 1", "Từ khóa 2", "..."]
-}}
-"""
-        loop = asyncio.get_event_loop()
-        def call():
-            # Nếu lỗi từ API, để try/except bên ngoài handle
-            resp = _gemini_client.models.generate_content(
-                model=_GEMINI_MODEL,
-                contents=prompt,
-            )
-            # resp.text là chuỗi model trả (có thể chứa code block)
-            return resp.text
-        # Try with a small exponential backoff for transient server errors
-        text = None
-        attempt = 0
-        while attempt < RETRY_MAX_ATTEMPTS:
-            attempt += 1
-            try:
-                text = await loop.run_in_executor(None, call)
-                break
-            except Exception as e:
-                # Try to detect transient server-side/genai errors (503 / UNAVAILABLE)
-                is_transient = False
-                try:
-                    # try to import genai-specific ServerError if available
-                    from google.genai import errors as _genai_errors  # type: ignore
-                    ServerError = getattr(_genai_errors, "ServerError", None)
-                except Exception:
-                    ServerError = None
-                if ServerError is not None and isinstance(e, ServerError):
-                    is_transient = True
-                else:
-                    msg = str(e)
-                    if "503" in msg or "UNAVAILABLE" in msg.upper() or "model is overloaded" in msg.lower():
-                        is_transient = True
-                if is_transient and attempt < RETRY_MAX_ATTEMPTS:
-                    backoff = RETRY_BASE_BACKOFF * (2 ** (attempt - 1)) + random.uniform(0, 0.5)
-                    logging.warning(f"[nlp_postprocess] Gemini transient error (attempt {attempt}): {e}; retrying in {backoff:.1f}s")
-                    # use asyncio.sleep to not block event loop
-                    await asyncio.sleep(backoff)
-                    continue
-                else:
-                    logging.exception(f"[nlp_postprocess] Gemini call failed, fallback to raw_text: {e}")
-                    text = None
-                    break
-        if text:
-            if text:
-                # clean JSON
-                start = text.find("{")
-                end = text.rfind("}")
-                if start != -1 and end != -1:
-                    try:
-                        data = json.loads(text[start:end + 1])
-                        result = {
-                            "normalized_text": data.get("normalizedText", raw_text),
-                            "keywords": data.get("keywords", []) or [],
-                        }
-                    except Exception as e:
-                        logging.warning(f"[nlp_postprocess] Failed to parse Gemini JSON, fallback to raw_text: {e}")
-                else:
-                    logging.warning("[nlp_postprocess] Gemini response has no JSON block, fallback to raw_text")
-    # 4) Try write back to Redis (best effort)
-    try:
-        redis_client.setex(cache_key, CACHE_TTL, json.dumps(result))
-    except Exception as e:
-        logging.warning(f"[nlp_postprocess] Redis SETEX failed, skip cache: {e}")
-    return result

app/services/note_client.py CHANGED Viewed

@@ -1,64 +1,38 @@
 import httpx
 from app.config.settings import NOTE_SERVICE_URL
-class NoteServiceClient:
-    async def save_transcript(self, payload: dict):
-        async with httpx.AsyncClient(timeout=30) as client:
-            r = await client.post(f"{NOTE_SERVICE_URL}/notes", json=payload)
-            r.raise_for_status()
-            return r.json()
-# import httpx
-# from app.config.settings import NOTE_SERVICE_URL, HTTPX_TIMEOUT
-# from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception
-# class NoteServiceClient:
-#     def __init__(self, base_url: str = None):
-#         self.base_url = (base_url or NOTE_SERVICE_URL).rstrip("/")
-#     @retry(
-#         stop=stop_after_attempt(3),
-#         wait=wait_exponential(multiplier=1, min=1, max=8),
-#         reraise=True,
-#         retry=retry_if_exception(
-#             lambda e: (
-#                 isinstance(e, httpx.RequestError) or
-#                 (isinstance(e, httpx.HTTPStatusError) and 500 <= e.response.status_code < 600)
-#             )
-#         )
-#     )
-#     async def save_transcript(self, note_id: str, raw_text: str, normalized_text: str,
-#                               keywords: list, summary: str, mindmap: dict,
-#                               duration: float, sample_rate: int, chunks: list,
-#                               asr_model: str = "PhoWhisper-base",
-#                               normalization_model: str = "gemini-1.5"):
-#         url = f"{self.base_url}/notes/{note_id}/transcript"
-#         payload = {
-#             "raw_text": raw_text,
-#             "normalized_text": normalized_text,
-#             "keywords": keywords,
-#             "summary": summary,
-#             "mindmap": mindmap,
-#             "duration": duration,
-#             "sample_rate": sample_rate,
-#             "chunks": chunks,
-#             "asr_model": asr_model,
-#             "normalization_model": normalization_model
-#         }
-#         timeout = httpx.Timeout(HTTPX_TIMEOUT)
-#         async with httpx.AsyncClient(timeout=timeout) as client:
-#             try:
-#                 resp = await client.post(url, json=payload)
-#                 resp.raise_for_status()
-#                 return resp.json()
-#             except httpx.HTTPStatusError as e:
-#                 # Chỉ retry nếu là 5xx
-#                 if 500 <= e.response.status_code < 600:
-#                     raise
-#                 else:
-#                     raise
-#             except httpx.RequestError as e:
-#                 # Retry network errors
-#                 raise

+import logging
 import httpx
 from app.config.settings import NOTE_SERVICE_URL
+class NoteServiceClient:
+    async def create_audio_note(self, payload: dict):
+        """Call the Note Service to create an audio note.
+        This method catches HTTP errors and logs them instead of raising,
+        to avoid making transcription endpoints return 500 when the
+        Note Service is unavailable or returns 4xx/5xx.
+        Returns parsed JSON on success or None on failure.
+        """
+        try:
+            async with httpx.AsyncClient(timeout=30) as client:
+                r = await client.post(
+                    f"{NOTE_SERVICE_URL}/internal/notes/audio",
+                    json=payload,
+                )
+                r.raise_for_status()
+                return r.json()
+        except httpx.HTTPStatusError as exc:
+            status = getattr(exc.response, "status_code", "?")
+            logging.warning(
+                "NoteService returned HTTP %s for %s: %s",
+                status,
+                f"{NOTE_SERVICE_URL}/internal/notes/audio",
+                exc,
+            )
+            return None
+        except Exception as exc:  # network errors, timeouts, etc.
+            logging.exception("Failed to call NoteService: %s", exc)
+            return None
+    async def save_transcript(self, payload: dict):
+        # alias used elsewhere in the codebase
+        return await self.create_audio_note(payload)

app/services/summary_service.py DELETED Viewed

@@ -1,35 +0,0 @@
-# import asyncio
-# from app.config.settings import GEMINI_API_KEY
-# import google.generativeai as genai
-# if GEMINI_API_KEY:
-#     genai.configure(api_key=GEMINI_API_KEY)
-#     _model = genai.GenerativeModel("gemini-pro")
-# else:
-#     _model = None
-# async def generate_summary(text: str) -> str:
-#     if not _model:
-#         return ""
-#     prompt = f"""
-#     Bạn là chuyên gia tóm tắt. Hãy tóm tắt văn bản sau thành **một đoạn văn duy nhất**.
-#     Yêu cầu:
-#     1. Viết khoảng 3-5 câu, tổng hợp đầy đủ chủ đề và các ý chính.
-#     2. Viết liền mạch, KHÔNG xuống dòng, KHÔNG dùng gạch đầu dòng hay đánh số.
-#     3. Chỉ dựa trên thông tin được cung cấp, tuyệt đối KHÔNG tự thêm thông tin bên ngoài.
-#     4. Trả về văn bản thuần (plain text).
-#     Văn bản:
-#     \"\"\"{text}\"\"\"
-#     """
-#     loop = asyncio.get_event_loop()
-#     def call():
-#         r = _model.generate_content(prompt)
-#         return r.text.strip()
-#     result = await loop.run_in_executor(None, call)
-#     return result.replace("```", "").strip()

app/services/text_normalizer.py DELETED Viewed

@@ -1,74 +0,0 @@
-from app.infra.redis_client import redis_client
-from app.utils.hashing import sha256
-CACHE_TTL = 60 * 60 * 24 * 3  # 3 days
-# Simple in-memory cache (có thể thay bằng Redis, v.v. sau này)
-# _normalize_cache = {}
-# --- Gemini client (use new `google.genai` if available) ---
-try:
-	import google.genai as genai
-	from google.api_core.exceptions import GoogleAPIError  # optional but useful
-except Exception:
-	genai = None
-	class GoogleAPIError(Exception):
-		pass
-from app.config.settings import GEMINI_API_KEY, GEMINI_MODEL
-_gemini_client = None
-_GEMINI_MODEL = GEMINI_MODEL
-if GEMINI_API_KEY and genai is not None:
-	try:
-		_gemini_client = genai.Client(api_key=GEMINI_API_KEY)
-	except Exception:
-		_gemini_client = None
-elif GEMINI_API_KEY and genai is None:
-	# package not installed
-	_gemini_client = None
-else:
-	_gemini_client = None
-async def normalize_text(raw_text: str) -> str:
-	cache_key = f"normalize:{sha256(raw_text)}"
-	cached = redis_client.get(cache_key)
-	if cached:
-		return cached
-	prompt = f"""
-Bạn là hệ thống chuẩn hóa transcript tiếng Việt.
-- KHÔNG thêm ý mới
-- Giữ nguyên nội dung
-- Chỉ sửa chính tả, dấu câu, xuống dòng hợp lý
-Văn bản:
-{raw_text}
-"""
-	result = raw_text
-	if _gemini_client:
-		import asyncio
-		loop = asyncio.get_event_loop()
-		def call_gemini():
-			resp = _gemini_client.models.generate_content(
-				model=_GEMINI_MODEL,
-				contents=prompt,
-			)
-			return resp.text if hasattr(resp, 'text') else str(resp)
-		try:
-			result = await loop.run_in_executor(None, call_gemini)
-			if isinstance(result, str):
-				result = result.strip()
-		except GoogleAPIError:
-			result = raw_text
-		except Exception:
-			result = raw_text
-	else:
-		# Nếu chưa cấu hình Gemini, trả về text gốc
-		result = raw_text
-	result = result.strip()
-	redis_client.setex(cache_key, CACHE_TTL, result)
-	return result

app/utils/hashing.py DELETED Viewed

@@ -1,7 +0,0 @@
-# Hashing utilities for cache keys, helpers
-import hashlib
-def sha256(text: str) -> str:
-	return hashlib.sha256(text.encode('utf-8')).hexdigest()

start.sh ADDED Viewed

	@@ -0,0 +1,16 @@

+#!/bin/bash
+set -e
+echo "Starting Redis..."
+redis-server --daemonize yes --save "" --appendonly no
+sleep 2
+echo "Starting RQ worker..."
+rq worker asr --url redis://127.0.0.1:6379 &
+WORKER_PID=$!
+echo "Starting FastAPI..."
+uvicorn app.main:app --host 0.0.0.0 --port ${PORT} &
+wait $WORKER_PID

test/conftest.py DELETED Viewed

@@ -1,11 +0,0 @@
-import pytest
-import tempfile
-import os
-@pytest.fixture(autouse=True)
-def mock_env(monkeypatch):
-    monkeypatch.setenv("TMP_DIR", tempfile.gettempdir())
-    monkeypatch.setenv("MAX_UPLOAD_BYTES", "1048576")
-    monkeypatch.setenv("MAX_DURATION_SECS", "3600")
-    monkeypatch.setenv("NOTE_SERVICE_URL", "http://note")
-    monkeypatch.setenv("REDIS_URL", "redis://localhost:6379/0")