Spaces:

Navyssh
/

TDS-Project2

Sleeping

TDS-Project2 / tools /transcribe.py

Upload transcribe.py

4af4406 verified about 2 months ago

1.48 kB

	# tools/transcribe.py
	from langchain_core.tools import tool
	from groq import Groq
	import os
	from dotenv import load_dotenv

	load_dotenv()

	@tool
	def transcribe_audio(filename: str) -> str:
	"""
	Transcribe speech from an audio file into text.
	Use this immediately after downloading an .mp3 or .wav file to know what is said inside it.

	Args:
	filename (str): The filename of the audio (e.g., 'audio.mp3') located in LLMFiles/.

	Returns:
	str: The transcribed text.
	"""
	try:
	api_key = os.getenv("GROQ_API_KEY")
	if not api_key:
	return "Error: GROQ_API_KEY not found."

	# Initialize Groq Client
	client = Groq(api_key=api_key)

	# Construct full path (assuming download_file saves to LLMFiles)
	file_path = os.path.join("LLMFiles", filename)

	if not os.path.exists(file_path):
	return f"Error: File {file_path} does not exist. Did you download it first?"

	# Open file and send to Groq Whisper
	with open(file_path, "rb") as file:
	transcription = client.audio.transcriptions.create(
	file=(filename, file.read()),
	model="distil-whisper-large-v3-en", # Fast and accurate English model
	response_format="json",
	temperature=0.0
	)

	return transcription.text

	except Exception as e:
	return f"Error transcribing audio: {str(e)}"