File size: 5,546 Bytes
b4cd776
c285622
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b4cd776
c285622
 
 
b4cd776
c285622
b4cd776
c285622
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b4cd776
c285622
 
 
 
 
 
 
b4cd776
 
c285622
 
 
b4cd776
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c285622
 
 
 
 
 
 
 
 
 
 
b97774a
c285622
 
 
 
 
 
 
 
 
b97774a
 
 
 
 
c285622
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b4cd776
 
c285622
 
b4cd776
c285622
 
b4cd776
 
c285622
 
b4cd776
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
import os
from langchain_core.tools import tool
from typing import Annotated
from typing_extensions import Annotated
from langchain_core.tools.base import InjectedToolCallId
from langchain_core.runnables import RunnableConfig
from langgraph.types import Command
from langchain_core.messages import ToolMessage
from dotenv import load_dotenv
import tempfile
from pathlib import Path
import yt_dlp
from e2b_code_interpreter import Sandbox

load_dotenv()

@tool
def execute_code(code: str, tool_call_id: Annotated[str, InjectedToolCallId], config: RunnableConfig) -> Command:
    """
    Execute code in a e2b_code_interpreter sandbox and return the results.
    Args:
        code: The code to execute. Should be Python code without the triple backticks.
    """
    result = _execute_code_in_sandbox(code, os.getenv("E2B_API_KEY"))
    
    formatted_result = f"""
## Code
```python
{code}
```
## Output
```
{result['stdout']}
```
## Errors
```
{result['stderr']}
```
"""
    
    external_information = f"{config.get('external_information', '')}\n---\n# Code Execution Results \n{formatted_result}"
    return Command(
        update={
            "external_information": external_information,
            "messages": [ToolMessage(content=formatted_result, tool_call_id=tool_call_id)]
        }
    )
    
def _execute_code_in_sandbox(code: str, api_key: str):
    """Execute code in E2B sandbox and return the results."""
    sbx = Sandbox()
    execution = sbx.run_code(code)
    
    files = sbx.files.list("/")
    
    return {
        "stdout": execution.logs.stdout,
        "stderr": execution.logs.stderr,
        "files": files
    }

@tool
def execute_python_file_url(file_url: str, tool_call_id: Annotated[str, InjectedToolCallId], config: RunnableConfig) -> Command:
    """
    Download a python file from a given URL and get the result
    Args:
        file_url: The URL of the file to download.
    Returns:
        The content of the file as a string, or an error message if the file couldn't be downloaded
    """
    sbx = Sandbox()
    file_name = "code.py"
    result = sbx.commands.run(f"wget -O {file_name} {file_url} && cat {file_name}")

    result_code = _execute_code_in_sandbox(result.stdout, os.getenv("E2B_API_KEY"))
    final_result = ""
    for value in result_code["stdout"]:
        final_result += value
    
    return Command(
        update={
            "external_information": f"{config.get('external_information', '')}\n---\n# result {final_result}",
            "messages": [ToolMessage(content=final_result, tool_call_id=tool_call_id)]
        }
    )
        
@tool
def get_youtube_transcript(url: str, tool_call_id: Annotated[str, InjectedToolCallId] = None, config: RunnableConfig = None) -> Command | str:
    """
    This tool extracts the transcript text from YouTube videos, returns the transcript as a string.
    Args:
        url: The YouTube video URL.
    Returns:
        The transcript as a string, or an error message if the transcript couldn't be obtained
    """
    temp_dir = tempfile.mkdtemp()
    current_dir = os.getcwd()
    subtitle_content = ""
    
    try:
        os.chdir(temp_dir)
         
        ydl_opts = {
            'writesubtitles': True,       
            'writeautomaticsub': True,     
            'subtitleslangs': ['en'],    
            'skip_download': True,        
            'outtmpl': 'subtitle',
            'quiet': True,
            'no_warnings': False,
            'ignoreerrors': True,
            'geo_bypass': True,
        }
        
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            ydl.extract_info(url, download=True)
        
        subtitle_files = list(Path(temp_dir).glob("*.vtt")) + list(Path(temp_dir).glob("*.srt"))
        
        if subtitle_files:
            with open(subtitle_files[0], 'r', encoding='utf-8') as f:
                subtitle_content = f.read()
            
            lines = subtitle_content.split('\n')
            cleaned_lines = []
            for line in lines:
                if line.strip() and not line.strip().isdigit() and not '-->' in line and not line.startswith('WEBVTT'):
                    cleaned_lines.append(line)
            subtitle_content = '\n '.join(cleaned_lines)
        else:
            subtitle_content = "Error: No subtitles found for this video."
            
    except Exception as e:
        subtitle_content = f"Error retrieving YouTube transcript: {str(e)}"
    finally:
        os.chdir(current_dir)
        
        try:
            for file in os.listdir(temp_dir):
                os.remove(os.path.join(temp_dir, file))
            os.rmdir(temp_dir)
        except:
            pass
    
    external_information= f"{config.get('external_information', '')}\n---\n# Youtube transcript \n{subtitle_content}"
    return Command(
            update={
                "external_information": external_information,
                "messages": [ToolMessage(content=subtitle_content, tool_call_id=tool_call_id)]
            }
        )

""" if __name__ == "__main__":
    # Simple test: print "Hello World"
    url = "https://agents-course-unit4-scoring.hf.space/files/f918266a-b3e0-4914-865d-4faa564f1aef" 
 
    # Build a minimal RunnableConfig with no external information
    config = RunnableConfig(**{"external_information": ""})
    input = f"{url}"
    # Execute the test code
    # Call the underlying function to bypass the BaseTool wrapper
    cmd: Command = execute_python_file_url.func(
        input,
        "test-call",
        config,
    ) """