Subtrans / app /tests /test_medium_accuracy.py
arjun-ms's picture
Initial commit: Subtrans Subtitle Pipeline
57bbccb
import os
import sys
# Ensure the app module can be imported from root directory
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from app.services.transcribe import extract_audio, transcribe_audio
def run_test():
video_path = r"C:\Users\arjun\Downloads\nikhil kamath clip.mp4"
if not os.path.exists(video_path):
video_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "resources", "tests-done", "nikhil kamath clip.mp4")
audio_path = "test_audio.wav"
print("1. Extracting audio...")
extract_audio(video_path, audio_path)
print("2. Transcribing with medium model...")
segments, info = transcribe_audio(audio_path, model_size="medium")
print("\n--- Checking for Previous Transcription Errors ---")
found_gratification = False
found_groove = False
found_peer_pressure = False
found_quota = False
print("\nFull segments with interesting keywords:")
for segment in segments:
text = segment.text.lower()
original_text = segment.text.strip()
# 1. Gratification check
if "ratification" in text or "gratification" in text:
print(f"[ GRATIFICATION ] {original_text}")
found_gratification = True
# 2. Groove check
if "group" in text or "groove" in text:
print(f"[ GROOVE ] {original_text}")
found_groove = True
# 3. Peer pressure check
if "pure pressure" in text or "peer pressure" in text:
print(f"[ PEER PRESSURE ] {original_text}")
found_peer_pressure = True
# 4. Quota/Counterparts check
if "quota" in text or "counterpart" in text:
print(f"[QUOTA/COUNTERPART] {original_text}")
found_quota = True
print("\nCleaning up...")
if os.path.exists(audio_path):
os.remove(audio_path)
print("Done.")
if __name__ == "__main__":
run_test()