Spaces:

Moai633
/

StemGraph_AI

Running

App Files Files Community

StemGraph_AI / tools.py

Krishna111111

round-robin removed; static models; fix; ux changes..

bc7a752 7 days ago

Raw

History Blame Contribute Delete

5.97 kB

	from __future__ import annotations
	import re
	from langchain_core.tools import tool
	from tavily import TavilyClient # type:ignore
	from youtube_transcript_api import YouTubeTranscriptApi #type:ignore
	from config import TAVILY_API_KEY


	# ── Web Search (Tavily) ───────────────────────────────────────
	def run_web_search(
	query: str,
	api_key: str = "",
	*,
	search_depth: str = "advanced",
	topic: str = "general",
	max_results: int = 5,
	include_answer: bool = True,
	) -> str:
	"""
	Run a web search using the Tavily API.

	`api_key` is the user's own Tavily key (BYOK). Falls back to the shared
	server key if the user didn't provide one. If no key is available at all,
	web search is treated as disabled.

	Returns a formatted block: an optional LLM-ready answer followed by
	result cards (title, URL, relevance score, content snippet).
	"""
	key = api_key or TAVILY_API_KEY
	if not key:
	return (
	"Web search is unavailable: no Tavily API key configured. "
	"Add a Tavily API key in Settings to enable web search."
	)

	try:
	client = TavilyClient(api_key=key)
	resp = client.search(
	query,
	search_depth=search_depth,
	topic=topic,
	max_results=max_results,
	include_answer=include_answer,
	include_raw_content=False,
	chunks_per_source=3,
	)
	except Exception as e:
	print(f"[TAVILY SEARCH ERROR] {e}", flush=True)
	return f"Web search is temporarily unavailable. (Error: {e})"

	results = resp.get("results", []) if isinstance(resp, dict) else []
	if not results and not (isinstance(resp, dict) and resp.get("answer")):
	return "No search results found."

	blocks = []
	answer = resp.get("answer") if isinstance(resp, dict) else None
	if answer:
	blocks.append(f"Answer: {answer}\n")

	for r in results:
	title = r.get("title", "No Title")
	url = r.get("url", "")
	score = r.get("score", "")
	content = r.get("content", "")
	score_str = f" (relevance: {score:.2f})" if isinstance(score, (int, float)) else ""
	blocks.append(f"Title: {title}{score_str}\nURL: {url}\nSnippet: {content}\n")

	return "\n".join(blocks)


	@tool
	def web_search(query: str) -> str:
	"""
	Search the internet for current information. Use when the student asks
	about recent events, specific facts, or anything not covered by the NCERT
	curriculum context. Input: a concise search query string.
	"""
	return run_web_search(query)


	# ── YouTube Transcript ────────────────────────────────────────
	def _extract_video_id(url_or_id: str) -> str \| None:
	"""Extract YouTube 11-character video ID from URL or bare ID."""
	patterns = [
	r"(?:v=\|youtu\.be/\|embed/\|shorts/)([A-Za-z0-9_-]{11})",
	r"^([A-Za-z0-9_-]{11})$",
	]
	for pat in patterns:
	m = re.search(pat, url_or_id.strip())
	if m:
	return m.group(1)
	return None


	@tool
	def yt_transcript(youtube_url: str) -> str:
	"""
	Fetch the full transcript of a YouTube video.
	"""
	return fetch_yt_transcript(youtube_url)


	def fetch_yt_transcript(youtube_url: str) -> str:
	"""
	Programmatic helper to fetch the transcript of a YouTube video URL or ID.
	"""
	video_id = _extract_video_id(youtube_url)
	if not video_id:
	print(f"[YT TRANSCRIPT] No video ID found in input: {youtube_url[:80]}", flush=True)
	return "TRANSCRIPT_UNAVAILABLE: Could not extract a valid YouTube video ID from the message."

	try:
	langs = ["en", "hi", "en-IN", "en-US"]
	# youtube-transcript-api ≥ 1.0 replaced the static `get_transcript`
	# with an instance method `.fetch()`. Support both APIs.
	if hasattr(YouTubeTranscriptApi, "get_transcript"):
	fetched = YouTubeTranscriptApi.get_transcript(video_id, languages=langs) # type:ignore[attr-defined]
	transcript = " ".join(seg["text"] for seg in fetched)
	else:
	fetched = YouTubeTranscriptApi().fetch(video_id, languages=langs)
	# FetchedTranscript yields snippet objects with a `.text` attribute
	# (older dict form `seg["text"]` is handled as a fallback).
	transcript = " ".join(
	getattr(seg, "text", None) or (seg.get("text", "") if isinstance(seg, dict) else "")
	for seg in fetched
	)
	if not transcript.strip():
	print(f"[YT TRANSCRIPT] Empty transcript for video_id={video_id}", flush=True)
	return "TRANSCRIPT_UNAVAILABLE: Transcript is empty for this video."
	print(f"[YT TRANSCRIPT] OK — {len(transcript)} chars for video_id={video_id}", flush=True)
	return transcript
	except Exception as exc:
	print(f"[YT TRANSCRIPT EXCEPTION] video_id={video_id} \| error={exc}", flush=True)
	err = str(exc).lower()
	if "disabled" in err or "no transcript" in err or "no element" in err:
	return (
	"TRANSCRIPT_UNAVAILABLE: This video has no transcript available "
	"(subtitles are disabled or no captions exist for this video)."
	)
	if "too many requests" in err or "429" in err:
	return (
	"TRANSCRIPT_UNAVAILABLE: YouTube is rate-limiting transcript requests right now. "
	"Please try again in a few minutes."
	)
	return (
	f"TRANSCRIPT_UNAVAILABLE: Could not retrieve transcript. Reason: {exc}"
	)


	# ── Exported list ─────────────────────────────────────────────
	TOOLS = [web_search, yt_transcript]