File size: 2,916 Bytes
602a16c
b712b2b
602a16c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
"""
Custom tools for the GAIA Agent
"""
import requests
from smolagents import tool
from markdownify import markdownify as md


@tool
def smart_visit(url: str) -> str:
    """
    Visits a webpage and returns its content converted to Markdown. 
    Essential for Wikipedia, documentation, or any web content.
    
    Args:
        url: The URL of the page to visit.
    
    Returns:
        str: Webpage content in Markdown format (max 25000 chars)
    """
    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
            'Accept-Language': 'en-US,en;q=0.5',
            'Referer': 'https://www.google.com/'
        }
        response = requests.get(url, headers=headers, timeout=25)
        response.raise_for_status()
        
        content = md(response.text)
        return content[:25000]
    except Exception as e:
        return f"Error visiting {url}: {str(e)}"


@tool
def get_youtube_info(video_url: str) -> str:
    """
    Gets information about a YouTube video including title, description, 
    and attempts to find transcripts or related information.
    
    Args:
        video_url: YouTube video URL (e.g., https://www.youtube.com/watch?v=VIDEO_ID)
    
    Returns:
        str: Video information and transcript search strategy
    """
    try:
        if "youtube.com" in video_url:
            video_id = video_url.split("v=")[1].split("&")[0] if "v=" in video_url else ""
        elif "youtu.be" in video_url:
            video_id = video_url.split("/")[-1].split("?")[0]
        else:
            return "Invalid YouTube URL"
        
        if not video_id:
            return "Could not extract video ID"
        
        return f"""Video ID: {video_id}
        
STRATEGY TO ANSWER: 
1. Search for '{video_id}' + keywords from the question on DuckDuckGo
2. Look for transcripts, comments, or discussion forums about this video
3. The video URL is: {video_url}

Note: Direct video playback is not available. Search online for transcripts or summaries."""
        
    except Exception as e:
        return f"Error processing YouTube video: {str(e)}"


@tool
def wikipedia_search(query: str) -> str:
    """
    Searches Wikipedia for a query and returns the page content in Markdown format.
    
    Args:
        query: The search term or topic to look up on Wikipedia
        
    Returns:
        str: The Wikipedia page content in Markdown format, or an error message
    """
    try:
        import urllib.parse
        search_url = f"https://en.wikipedia.org/w/index.php?search={urllib.parse.quote_plus(query)}&title=Special%3ASearch&go=Go"
        return smart_visit.forward(search_url)
    except Exception as e:
        return f"Error searching Wikipedia: {e}"