Final_Assignment_Template

Runtime error

App Files Files Community

nikhmr1235 commited on Jun 3, 2025

Commit

e92eb81

verified ·

1 Parent(s): 590f46b

Update helper.py

Browse files

Files changed (1) hide show

helper.py +102 -4

helper.py CHANGED Viewed

@@ -127,7 +127,7 @@ import requests
 from langchain.tools import Tool
 def download_limited_content(url: str, max_chars: int = 10000) -> str: # Limit to ~2500 tokens
-    """Downloads content from a URL, truncating if it exceeds max_chars."""
     try:
         with requests.get(url, stream=True, timeout=10) as response:
             response.raise_for_status()
@@ -159,13 +159,111 @@ def download_limited_content(url: str, max_chars: int = 10000) -> str: # Limit t
     except Exception as e:
         return f"Error processing content from {url}: {e}"
-web_downloader_limited_tool = Tool(
-    name="web_downloader_limited",
     description="""
-    Downloads content from a URL, automatically truncating it to save tokens.
     Useful when you need information from a web page but want to avoid
     exceeding token limits by downloading excessively large content.
     Input should be a single, valid URL.
     """,
     func=download_limited_content,
 )

 from langchain.tools import Tool
 def download_limited_content(url: str, max_chars: int = 10000) -> str: # Limit to ~2500 tokens
+    """Downloads text content from a URL, truncating if it exceeds max_chars."""
     try:
         with requests.get(url, stream=True, timeout=10) as response:
             response.raise_for_status()
     except Exception as e:
         return f"Error processing content from {url}: {e}"
+text_downloader_limited_tool = Tool(
+    name="text_downloader_limited_tool",
     description="""
+    Downloads text content from a URL, automatically truncating it to save tokens.
     Useful when you need information from a web page but want to avoid
     exceeding token limits by downloading excessively large content.
     Input should be a single, valid URL.
+    NOTE: use this tool only for text-based-content URLs (e.g., articles, documentation, python code file).
+    The content will be truncated to approximately 10,000 characters (~2500 tokens).
+    If the content is larger, it will be cut off with a note indicating truncation.
     """,
     func=download_limited_content,
+)
+import speech_recognition as sr
+from pydub import AudioSegment
+import os
+import requests # Needed for downloading the URL content
+def transcribe_audio_from_path_or_url(audio_source: str, language: str = "en-US") -> str:
+    """
+    Transcribes audio content from a local file path or a URL to a text string.
+    This tool is designed to convert spoken content from audio into written text.
+    It automatically handles downloading the audio if a URL is provided.
+    Supports various audio formats (e.g., MP3, WAV) and converts them to WAV internally for transcription.
+    For best results, specify the correct language code (e.g., 'en-US' for US English, 'es-ES' for Spanish).
+    Args:
+        audio_source (str): The local file path to the audio (e.g., "my_recording.mp3")
+                            OR a direct URL to an audio file (e.g., "https://example.com/audio.wav").
+        language (str, optional): The spoken language in the audio. Defaults to "en-US".
+                                 Refer to Google Speech Recognition language codes for options.
+    Returns:
+        str: The transcribed text, or an informative error message if transcription fails.
+    """
+    r = sr.Recognizer()
+    temp_download_path = None
+    transcribed_text = ""
+    try:
+        # Determine if the input is a URL or a local path
+        if audio_source.startswith("http://") or audio_source.startswith("https://"):
+            # It's a URL, use requests to download
+            response = requests.get(audio_source, stream=True, timeout=30)
+            response.raise_for_status() # Raise an exception for bad status codes
+            # Save to a temporary file
+            # Determine file extension from URL or assume common audio type
+            ext = os.path.splitext(audio_source.split('?')[0])[-1] # Get extension, handle query params
+            if not ext:
+                ext = ".mp3" # Default if no extension in URL
+            temp_download_path = f"temp_download_audio{ext}"
+            with open(temp_download_path, 'wb') as f:
+                for chunk in response.iter_content(chunk_size=8192):
+                    f.write(chunk)
+            current_audio_path = temp_download_path
+        else:
+            # It's a local file path
+            current_audio_path = audio_source
+        # Convert to WAV if not already (SpeechRecognition prefers WAV)
+        temp_wav_path = "temp_audio_to_transcribe.wav"
+        audio = AudioSegment.from_file(current_audio_path)
+        audio.export(temp_wav_path, format="wav")
+        # Transcribe the audio
+        with sr.AudioFile(temp_wav_path) as source:
+            audio_listened = r.record(source)
+            try:
+                transcribed_text = r.recognize_google(audio_listened, language=language)
+            except sr.UnknownValueError:
+                return "Could not understand audio (speech not clear or too short)."
+            except sr.RequestError as e:
+                return f"Could not request results from Google Speech Recognition service; {e}"
+    except FileNotFoundError:
+        return f"Error: Audio file not found at '{audio_source}'."
+    except requests.exceptions.RequestException as e:
+        return f"Error downloading audio from URL '{audio_source}': {e}"
+    except Exception as e:
+        return f"An unexpected error occurred during audio processing or transcription: {e}"
+    finally:
+        # Clean up temporary files
+        if temp_download_path and os.path.exists(temp_download_path):
+            os.remove(temp_download_path)
+        if os.path.exists(temp_wav_path):
+            os.remove(temp_wav_path)
+    return transcribed_text.strip()
+# Get your audio_transcriber tool
+from langchain.tools import Tool
+audio_transcriber_tool = Tool(
+    name="audio_transcriber_tool",
+    description=(
+        "Converts an audio file (local path or URL) to a text transcript. "
+        "This tool is useful for extracting spoken information from audio recordings. "
+        "Input should be either a local file path (e.g., 'path/to/audio.mp3') "
+        "or a direct URL to an audio file (e.g., 'https://example.com/speech.wav'). "
+        "Optionally, provide the 'language' parameter (e.g., 'en-US', 'es-ES') for better accuracy. "
+        "Returns the transcribed text or an error message if transcription fails."
+    ),
+    func=transcribe_audio_from_path_or_url,
 )