Spaces:
Sleeping
Sleeping
Commit ·
3c67a24
1
Parent(s): 0fa772a
update transcribe_audio tool
Browse files
agent.py
CHANGED
|
@@ -7,19 +7,9 @@ from typing import List, TypedDict, Annotated, Optional
|
|
| 7 |
|
| 8 |
from langchain.tools import tool
|
| 9 |
from langchain_community.document_loaders import (
|
| 10 |
-
CSVLoader,
|
| 11 |
-
YoutubeLoader,
|
| 12 |
-
PyPDFLoader
|
| 13 |
)
|
| 14 |
-
from langchain_community.document_loaders
|
| 15 |
-
YoutubeAudioLoader,
|
| 16 |
-
)
|
| 17 |
-
from langchain_community.document_loaders.generic import GenericLoader
|
| 18 |
-
from langchain_community.document_loaders.parsers.audio import (
|
| 19 |
-
OpenAIWhisperParser,
|
| 20 |
-
OpenAIWhisperParserLocal,
|
| 21 |
-
)
|
| 22 |
-
|
| 23 |
from langchain.chat_models import init_chat_model
|
| 24 |
from langchain.agents import initialize_agent, AgentType
|
| 25 |
from langchain_community.retrievers import BM25Retriever
|
|
@@ -124,16 +114,6 @@ def read_spreadsheet(spreadsheet_path: str) -> str:
|
|
| 124 |
except Exception as e:
|
| 125 |
return f"Error reading spreadsheet: {e}"
|
| 126 |
|
| 127 |
-
@tool
|
| 128 |
-
def transcribe_audio(audio_path: str) -> str:
|
| 129 |
-
"""Transcribe audio file (e.g., MP3) using Whisper."""
|
| 130 |
-
try:
|
| 131 |
-
docs = AudioLoader(audio_path).load()
|
| 132 |
-
transcripts = WhisperLoader().load(docs)
|
| 133 |
-
return "\n".join(doc.page_content for doc in transcripts)
|
| 134 |
-
except Exception as e:
|
| 135 |
-
return f"Error transcribing audio: {e}"
|
| 136 |
-
|
| 137 |
@tool
|
| 138 |
def youtube_transcript_tool(video_url: str) -> str:
|
| 139 |
"""Download the transcript of a YouTube video using LangChain YoutubeLoader."""
|
|
@@ -155,7 +135,17 @@ def youtube_transcript_api(video_url_or_id: str) -> str:
|
|
| 155 |
except Exception as e:
|
| 156 |
return f"Error fetching transcript via API: {e}"
|
| 157 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 158 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 159 |
|
| 160 |
#o3_mini = init_chat_model("openai:o3-mini", temperature=0)
|
| 161 |
#claude_sonnet = init_chat_model(anthropic:claude-3-5-sonnet-latest", temperature=0)
|
|
@@ -202,8 +192,8 @@ class MyAgent:
|
|
| 202 |
loader = PyPDFLoader(path)
|
| 203 |
self.docs.extend(loader.load())
|
| 204 |
elif ext in [".mp3", ".wav"]:
|
| 205 |
-
|
| 206 |
-
self.docs.extend(
|
| 207 |
elif "youtube" in path:
|
| 208 |
loader = YoutubeLoader.from_youtube_url(path)
|
| 209 |
self.docs.extend(loader.load())
|
|
@@ -280,8 +270,5 @@ class MyAgent:
|
|
| 280 |
return state
|
| 281 |
|
| 282 |
|
| 283 |
-
|
| 284 |
-
if __name__ == "__main__":
|
| 285 |
-
import fire
|
| 286 |
-
fire.Fire(MyAgent)
|
| 287 |
|
|
|
|
| 7 |
|
| 8 |
from langchain.tools import tool
|
| 9 |
from langchain_community.document_loaders import (
|
| 10 |
+
CSVLoader, PyPDFLoader, YoutubeLoader
|
|
|
|
|
|
|
| 11 |
)
|
| 12 |
+
from langchain_community.document_loaders import AssemblyAIAudioTranscriptLoader
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
from langchain.chat_models import init_chat_model
|
| 14 |
from langchain.agents import initialize_agent, AgentType
|
| 15 |
from langchain_community.retrievers import BM25Retriever
|
|
|
|
| 114 |
except Exception as e:
|
| 115 |
return f"Error reading spreadsheet: {e}"
|
| 116 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
@tool
|
| 118 |
def youtube_transcript_tool(video_url: str) -> str:
|
| 119 |
"""Download the transcript of a YouTube video using LangChain YoutubeLoader."""
|
|
|
|
| 135 |
except Exception as e:
|
| 136 |
return f"Error fetching transcript via API: {e}"
|
| 137 |
|
| 138 |
+
@tool
|
| 139 |
+
def transcribe_audio(audio_path: str) -> str:
|
| 140 |
+
"""Transcribe audio file (e.g., MP3) using AssemblyAI."""
|
| 141 |
+
try:
|
| 142 |
|
| 143 |
+
loader = AssemblyAIAudioTranscriptLoader(file_path=audio_path)
|
| 144 |
+
docs = loader.load()
|
| 145 |
+
return "\n".join(doc.page_content for doc in docs)
|
| 146 |
+
except Exception as e:
|
| 147 |
+
return f"Error transcribing audio: {e}"
|
| 148 |
+
|
| 149 |
|
| 150 |
#o3_mini = init_chat_model("openai:o3-mini", temperature=0)
|
| 151 |
#claude_sonnet = init_chat_model(anthropic:claude-3-5-sonnet-latest", temperature=0)
|
|
|
|
| 192 |
loader = PyPDFLoader(path)
|
| 193 |
self.docs.extend(loader.load())
|
| 194 |
elif ext in [".mp3", ".wav"]:
|
| 195 |
+
loader = AssemblyAIAudioTranscriptLoader(file_path=path)
|
| 196 |
+
self.docs.extend(loader.load())
|
| 197 |
elif "youtube" in path:
|
| 198 |
loader = YoutubeLoader.from_youtube_url(path)
|
| 199 |
self.docs.extend(loader.load())
|
|
|
|
| 270 |
return state
|
| 271 |
|
| 272 |
|
| 273 |
+
|
|
|
|
|
|
|
|
|
|
| 274 |
|