File size: 5,969 Bytes
c80f0ef
 
 
1829c17
5c62ce3
1829c17
c80f0ef
 
1829c17
 
 
 
 
 
 
 
 
 
c95149f
1829c17
 
 
 
 
 
 
 
c95149f
1829c17
 
 
 
 
 
 
c95149f
1829c17
 
 
 
 
 
 
 
 
 
c95149f
1829c17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c95149f
 
5c4367e
c80f0ef
654b523
c80f0ef
 
 
 
 
 
 
 
 
 
5c4367e
c80f0ef
 
 
 
 
c95149f
 
 
 
 
 
 
c80f0ef
 
5c62ce3
 
c80f0ef
 
bc7a752
 
 
 
 
 
 
 
 
 
 
 
 
 
c80f0ef
5c62ce3
 
 
c80f0ef
 
5c62ce3
c80f0ef
5c62ce3
c80f0ef
5c62ce3
 
c80f0ef
5c62ce3
 
 
 
 
 
 
 
c80f0ef
5c4367e
 
c80f0ef
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
from __future__ import annotations
import re
from langchain_core.tools import tool
from tavily import TavilyClient  # type:ignore
from youtube_transcript_api import YouTubeTranscriptApi #type:ignore
from config import TAVILY_API_KEY


# ── Web Search (Tavily) ───────────────────────────────────────
def run_web_search(
    query: str,
    api_key: str = "",
    *,
    search_depth: str = "advanced",
    topic: str = "general",
    max_results: int = 5,
    include_answer: bool = True,
) -> str:
    """
    Run a web search using the Tavily API.

    `api_key` is the user's own Tavily key (BYOK). Falls back to the shared
    server key if the user didn't provide one. If no key is available at all,
    web search is treated as disabled.

    Returns a formatted block: an optional LLM-ready answer followed by
    result cards (title, URL, relevance score, content snippet).
    """
    key = api_key or TAVILY_API_KEY
    if not key:
        return (
            "Web search is unavailable: no Tavily API key configured. "
            "Add a Tavily API key in Settings to enable web search."
        )

    try:
        client = TavilyClient(api_key=key)
        resp = client.search(
            query,
            search_depth=search_depth,
            topic=topic,
            max_results=max_results,
            include_answer=include_answer,
            include_raw_content=False,
            chunks_per_source=3,
        )
    except Exception as e:
        print(f"[TAVILY SEARCH ERROR] {e}", flush=True)
        return f"Web search is temporarily unavailable. (Error: {e})"

    results = resp.get("results", []) if isinstance(resp, dict) else []
    if not results and not (isinstance(resp, dict) and resp.get("answer")):
        return "No search results found."

    blocks = []
    answer = resp.get("answer") if isinstance(resp, dict) else None
    if answer:
        blocks.append(f"Answer: {answer}\n")

    for r in results:
        title = r.get("title", "No Title")
        url = r.get("url", "")
        score = r.get("score", "")
        content = r.get("content", "")
        score_str = f" (relevance: {score:.2f})" if isinstance(score, (int, float)) else ""
        blocks.append(f"Title: {title}{score_str}\nURL: {url}\nSnippet: {content}\n")

    return "\n".join(blocks)


@tool
def web_search(query: str) -> str:
    """
    Search the internet for current information. Use when the student asks
    about recent events, specific facts, or anything not covered by the NCERT
    curriculum context. Input: a concise search query string.
    """
    return run_web_search(query)


# ── YouTube Transcript ────────────────────────────────────────
def _extract_video_id(url_or_id: str) -> str | None:
    """Extract YouTube 11-character video ID from URL or bare ID."""
    patterns = [
        r"(?:v=|youtu\.be/|embed/|shorts/)([A-Za-z0-9_-]{11})",
        r"^([A-Za-z0-9_-]{11})$",
    ]
    for pat in patterns:
        m = re.search(pat, url_or_id.strip())
        if m:
            return m.group(1)
    return None


@tool
def yt_transcript(youtube_url: str) -> str:
    """
    Fetch the full transcript of a YouTube video.
    """
    return fetch_yt_transcript(youtube_url)


def fetch_yt_transcript(youtube_url: str) -> str:
    """
    Programmatic helper to fetch the transcript of a YouTube video URL or ID.
    """
    video_id = _extract_video_id(youtube_url)
    if not video_id:
        print(f"[YT TRANSCRIPT] No video ID found in input: {youtube_url[:80]}", flush=True)
        return "TRANSCRIPT_UNAVAILABLE: Could not extract a valid YouTube video ID from the message."

    try:
        langs = ["en", "hi", "en-IN", "en-US"]
        # youtube-transcript-api β‰₯ 1.0 replaced the static `get_transcript`
        # with an instance method `.fetch()`. Support both APIs.
        if hasattr(YouTubeTranscriptApi, "get_transcript"):
            fetched = YouTubeTranscriptApi.get_transcript(video_id, languages=langs)  # type:ignore[attr-defined]
            transcript = " ".join(seg["text"] for seg in fetched)
        else:
            fetched = YouTubeTranscriptApi().fetch(video_id, languages=langs)
            # FetchedTranscript yields snippet objects with a `.text` attribute
            # (older dict form `seg["text"]` is handled as a fallback).
            transcript = " ".join(
                getattr(seg, "text", None) or (seg.get("text", "") if isinstance(seg, dict) else "")
                for seg in fetched
            )
        if not transcript.strip():
            print(f"[YT TRANSCRIPT] Empty transcript for video_id={video_id}", flush=True)
            return "TRANSCRIPT_UNAVAILABLE: Transcript is empty for this video."
        print(f"[YT TRANSCRIPT] OK β€” {len(transcript)} chars for video_id={video_id}", flush=True)
        return transcript
    except Exception as exc:
        print(f"[YT TRANSCRIPT EXCEPTION] video_id={video_id} | error={exc}", flush=True)
        err = str(exc).lower()
        if "disabled" in err or "no transcript" in err or "no element" in err:
            return (
                "TRANSCRIPT_UNAVAILABLE: This video has no transcript available "
                "(subtitles are disabled or no captions exist for this video)."
            )
        if "too many requests" in err or "429" in err:
            return (
                "TRANSCRIPT_UNAVAILABLE: YouTube is rate-limiting transcript requests right now. "
                "Please try again in a few minutes."
            )
        return (
            f"TRANSCRIPT_UNAVAILABLE: Could not retrieve transcript. Reason: {exc}"
        )


# ── Exported list ─────────────────────────────────────────────
TOOLS = [web_search, yt_transcript]