Mintik24's picture
🎉 Полный рефакторинг проекта Medical Transcriber
e275025
#!/usr/bin/env python3
"""
Trans-for-Doctors CLI
Runs the end-to-end pipeline: STT → Knowledge Base → LLM Correction → (optional) DOCX report.
Usage examples:
uv run transmed --audio path/to.wav --model . --llm --generate-report
uv run transmed --audio path/to.wav --model . --no-llm
"""
import argparse
import logging
import os
from pathlib import Path
from pipeline import MedicalTranscriptionPipeline, PipelineConfig
def setup_logging(level: str = "INFO") -> None:
logging.basicConfig(
level=getattr(logging, level.upper(), logging.INFO),
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="Run medical transcription pipeline (STT + LLM Corrector + KB)",
)
# Core
parser.add_argument("--audio", required=True, type=str, help="Path to audio .wav file")
parser.add_argument("--model", type=str, default=".", help="Path to Whisper model directory")
parser.add_argument("--device", type=str, default="auto", choices=["auto", "cuda", "cpu", "mps"], help="Inference device")
parser.add_argument("--dtype", type=str, default="float32", choices=["float32", "float16", "bfloat16"], help="Torch dtype")
parser.add_argument("--language", type=str, default="russian", help="Transcription language")
# Knowledge base
parser.add_argument("--terms", type=str, default="medical_terms.txt", help="Path to medical terms file")
# LLM correction
parser.add_argument("--llm", dest="llm", action="store_true", help="Enable LLM correction")
parser.add_argument("--no-llm", dest="llm", action="store_false", help="Disable LLM correction")
parser.set_defaults(llm=True)
parser.add_argument("--openai-model", type=str, default="gpt-4o", help="OpenAI model name")
parser.add_argument("--openai-key", type=str, default=os.getenv("OPENAI_API_KEY"), help="OpenAI API key (defaults to env OPENAI_API_KEY)")
# Outputs
parser.add_argument("--save-original", action="store_true", help="Save original transcription JSON")
parser.add_argument("--save-corrected", action="store_true", help="Save corrected transcription JSON")
parser.add_argument("--generate-report", action="store_true", help="Generate DOCX report")
parser.add_argument("--results-dir", type=str, default="results", help="Directory to store results")
parser.add_argument("--logs-dir", type=str, default="logs", help="Directory to store logs")
# Logging
parser.add_argument("--log-level", type=str, default="INFO", help="Logging level")
# Patient metadata (optional)
parser.add_argument("--patient-name", type=str, default=None)
parser.add_argument("--patient-id", type=str, default=None)
parser.add_argument("--study-date", type=str, default=None)
parser.add_argument("--modality", type=str, default=None)
parser.add_argument("--body-part", type=str, default=None)
return parser.parse_args()
def main() -> None:
args = parse_args()
setup_logging(args.log_level)
logger = logging.getLogger("transmed")
audio_path = Path(args.audio)
model_path = Path(args.model)
terms_path = Path(args.terms)
results_dir = Path(args.results_dir)
logs_dir = Path(args.logs_dir)
if not audio_path.exists():
logger.error(f"Audio file not found: {audio_path}")
raise SystemExit(1)
if not model_path.exists():
logger.error(f"Model path not found: {model_path}")
raise SystemExit(1)
if not terms_path.exists():
logger.warning(f"Terms file not found: {terms_path} — proceeding without extra terms")
# Configure pipeline
config = PipelineConfig(
model_path=model_path,
device=args.device,
dtype=args.dtype,
language=args.language,
medical_terms_file=terms_path,
openai_api_key=args.openai_key,
openai_model=args.openai_model,
correction_enabled=args.llm,
save_original=args.save_original,
save_corrected=args.save_corrected,
save_diff=True,
generate_report=args.generate_report,
results_dir=results_dir,
reports_dir=results_dir / "reports",
logs_dir=logs_dir,
)
logger.info("Creating medical transcription pipeline...")
pipeline = MedicalTranscriptionPipeline(config)
patient_metadata = None
if args.generate_report:
patient_metadata = {
"patient_name": args.patient_name,
"patient_id": args.patient_id,
"study_date": args.study_date,
"modality": args.modality,
"body_part": args.body_part,
}
logger.info(f"Processing audio: {audio_path.name}")
result = pipeline.process_audio_file(audio_path=audio_path, patient_metadata=patient_metadata)
if result.get("status") != "success":
logger.error(f"Pipeline failed: {result.get('error')}")
raise SystemExit(2)
# Summarize
orig = result.get("original_transcription", "")
corr = result.get("corrected_transcription", orig)
logger.info(f"Original ({len(orig)} chars): {orig[:200]}...")
if config.correction_enabled:
logger.info(f"Corrected ({len(corr)} chars): {corr[:200]}...")
logger.info(f"Corrections: {len(result.get('corrections', []))}")
if result.get("report_path"):
logger.info(f"Report: {result['report_path']}")
if __name__ == "__main__":
main()