Spaces:

arjun-ms
/

Subtrans

Sleeping

App Files Files Community

Subtrans / app /tests /test_medium_accuracy.py

arjun-ms

Initial commit: Subtrans Subtitle Pipeline

57bbccb 13 days ago

raw

history blame contribute delete

2.03 kB

	import os
	import sys

	# Ensure the app module can be imported from root directory
	sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))

	from app.services.transcribe import extract_audio, transcribe_audio

	def run_test():
	video_path = r"C:\Users\arjun\Downloads\nikhil kamath clip.mp4"
	if not os.path.exists(video_path):
	video_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "resources", "tests-done", "nikhil kamath clip.mp4")

	audio_path = "test_audio.wav"

	print("1. Extracting audio...")
	extract_audio(video_path, audio_path)

	print("2. Transcribing with medium model...")
	segments, info = transcribe_audio(audio_path, model_size="medium")

	print("\n--- Checking for Previous Transcription Errors ---")

	found_gratification = False
	found_groove = False
	found_peer_pressure = False
	found_quota = False

	print("\nFull segments with interesting keywords:")
	for segment in segments:
	text = segment.text.lower()
	original_text = segment.text.strip()

	# 1. Gratification check
	if "ratification" in text or "gratification" in text:
	print(f"[ GRATIFICATION ] {original_text}")
	found_gratification = True

	# 2. Groove check
	if "group" in text or "groove" in text:
	print(f"[ GROOVE ] {original_text}")
	found_groove = True

	# 3. Peer pressure check
	if "pure pressure" in text or "peer pressure" in text:
	print(f"[ PEER PRESSURE ] {original_text}")
	found_peer_pressure = True

	# 4. Quota/Counterparts check
	if "quota" in text or "counterpart" in text:
	print(f"[QUOTA/COUNTERPART] {original_text}")
	found_quota = True

	print("\nCleaning up...")
	if os.path.exists(audio_path):
	os.remove(audio_path)
	print("Done.")

	if __name__ == "__main__":
	run_test()