frkhan commited on
Commit
4e3f7be
·
1 Parent(s): 2329760

-- Added mcp tool to extract youtube transcript.

Browse files

-- Modified to system prompt to improve accuracy of the LLM.

-- Used ChatOpenAI package to consume Nvidia NIM APi.

app.py CHANGED
@@ -40,7 +40,7 @@ async def run_and_submit_all( profile: gr.OAuthProfile | None):
40
  print(f"Error instantiating agent: {e}")
41
  return f"Error initializing agent: {e}", None
42
  # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
43
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
44
  print(agent_code)
45
 
46
  # 2. Fetch Questions
 
40
  print(f"Error instantiating agent: {e}")
41
  return f"Error initializing agent: {e}", None
42
  # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
43
+ agent_code = f"https://huggingface.co/spaces/frkhan/hf-agent-course-final-assignment/tree/main"
44
  print(agent_code)
45
 
46
  # 2. Fetch Questions
langchain_agent.py CHANGED
@@ -1,8 +1,10 @@
1
  import asyncio
 
2
  from langchain.agents import create_agent
3
  from langchain_google_genai import ChatGoogleGenerativeAI
4
  from langchain_mcp_adapters.client import MultiServerMCPClient
5
  from langchain.chat_models import init_chat_model
 
6
  from dotenv import load_dotenv
7
  from langfuse import observe
8
  from langfuse.langchain import CallbackHandler
@@ -17,18 +19,22 @@ class LangChainAgent:
17
  You are a general AI assistant. I will ask you a question. Report your thoughtsYou are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
18
 
19
  You have access to browser tool like playwright through MCP. Use it to search the web when needed.
 
 
20
 
21
  -- Always try to use google or any popular search engine first. If required visit the pages shown in the search result to find proper information.
22
 
23
  -- Some websites are built using javascript and builds / loads the content completely on browser / client side. In such a case you may have to wait until the page is fully loaded to get the content you want. Use the browser tool to navigate, click, scroll, and interact with the web page as needed to find the information required to answer the question.
24
 
25
- If you are asked about youtube videos, try to get the information from the video description or comments section. If you are asked something from the video, try to find the subtitle / closed caption of the video. Then you can examine the text conent to find proper answer. You don't need to be able to watch the video of process the video to know the conent of the video if subtitle / closed caption is available.
 
 
26
 
27
  Sometimes LLM inference APIs may hit rate limit. In such a case, try to understand the response from the API. Often it contains information about when you can retry or what to do next.
28
 
29
  """
30
 
31
- # Initialize Langfuse client
32
  # This block sets up the Langfuse callback handler for LangChain.
33
  # It initializes the Langfuse client and creates a CallbackHandler instance
34
  # only if the required API keys are available. The handler is then added to
@@ -49,7 +55,15 @@ class LangChainAgent:
49
  "transport": "stdio",
50
  "command": "npx",
51
  "args": [
52
- "@playwright/mcp@latest"
 
 
 
 
 
 
 
 
53
  ]
54
  }
55
  })
@@ -57,10 +71,27 @@ class LangChainAgent:
57
  tools = await client.get_tools()
58
  print(tools)
59
 
60
- model_name = "gemini-2.0-flash"
61
- model_provider = "google_genai" #google_genai
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
- model = init_chat_model(model_name, model_provider=model_provider)
 
 
 
 
64
 
65
  agent = create_agent(model, tools)
66
 
 
1
  import asyncio
2
+ import os
3
  from langchain.agents import create_agent
4
  from langchain_google_genai import ChatGoogleGenerativeAI
5
  from langchain_mcp_adapters.client import MultiServerMCPClient
6
  from langchain.chat_models import init_chat_model
7
+ from langchain_openai import ChatOpenAI
8
  from dotenv import load_dotenv
9
  from langfuse import observe
10
  from langfuse.langchain import CallbackHandler
 
19
  You are a general AI assistant. I will ask you a question. Report your thoughtsYou are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
20
 
21
  You have access to browser tool like playwright through MCP. Use it to search the web when needed.
22
+
23
+ -- MUST TO FOLLOW RULE: ALAWAY WAIT UNTIL THE PAGE IS FULLY LOADED BEFORE EXTRACTING INFORMATION FROM THE PAGE. --
24
 
25
  -- Always try to use google or any popular search engine first. If required visit the pages shown in the search result to find proper information.
26
 
27
  -- Some websites are built using javascript and builds / loads the content completely on browser / client side. In such a case you may have to wait until the page is fully loaded to get the content you want. Use the browser tool to navigate, click, scroll, and interact with the web page as needed to find the information required to answer the question.
28
 
29
+ If you are asked about youtube videos:
30
+ - Try to get the information from the video description or comments section.
31
+ - If you are asked any information from the video, try to get the transcript of the video using the youtube-transcript tool available to you. Use the video URL to get the transcript.
32
 
33
  Sometimes LLM inference APIs may hit rate limit. In such a case, try to understand the response from the API. Often it contains information about when you can retry or what to do next.
34
 
35
  """
36
 
37
+ # Initialize Langfuse client
38
  # This block sets up the Langfuse callback handler for LangChain.
39
  # It initializes the Langfuse client and creates a CallbackHandler instance
40
  # only if the required API keys are available. The handler is then added to
 
55
  "transport": "stdio",
56
  "command": "npx",
57
  "args": [
58
+ "@playwright/mcp@latest",
59
+ # "--headless"
60
+ ]
61
+ },
62
+ "youtube_transcript_mcp":{
63
+ "transport": "stdio",
64
+ "command": "python",
65
+ "args": [
66
+ "mcp-servers/youtube-transcript.py"
67
  ]
68
  }
69
  })
 
71
  tools = await client.get_tools()
72
  print(tools)
73
 
74
+ # model_name = "gemini-2.0-flash"
75
+ # model_provider = "google_genai" #google_genai
76
+
77
+ # model = init_chat_model(model_name, model_provider=model_provider)
78
+
79
+
80
+ # model_name = "deepseek-ai/deepseek-v3.1"
81
+ # model_name = "deepseek-ai/deepseek-v3.1-terminus"
82
+ # model_name = "minimaxai/minimax-m2"
83
+ # model_name = "mistralai/mistral-nemotron"
84
+ # model_name = "qwen/qwen3-next-80b-a3b-instruct"
85
+ model_name = "qwen/qwen3-next-80b-a3b-thinking"
86
+ # model_name = "moonshotai/kimi-k2-instruct-0905"
87
+ # model_provider = "nvidia"
88
+
89
 
90
+ model = ChatOpenAI(
91
+ model=model_name,
92
+ openai_api_key=os.getenv("NVIDIA_API_KEY"),
93
+ openai_api_base="https://integrate.api.nvidia.com/v1"
94
+ )
95
 
96
  agent = create_agent(model, tools)
97
 
mcp-servers/youtube-transcript.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from mcp.server.fastmcp import FastMCP
2
+ from youtube_transcript_api import YouTubeTranscriptApi
3
+
4
+
5
+ mcp = FastMCP("youtube-trascript")
6
+
7
+
8
+ @mcp.tool()
9
+ def get_youtube_transcript(video_url: str) -> str:
10
+ """Fetches the transcript of a YouTube video given its URL."""
11
+
12
+ try:
13
+ # Extract video ID from URL
14
+ if "v=" in video_url:
15
+ video_id = video_url.split("v=")[1].split("&")[0]
16
+ elif "youtu.be/" in video_url:
17
+ video_id = video_url.split("youtu.be/")[1].split("?")[0]
18
+ else:
19
+ print(f"DEBUG: Invalid YouTube URL format for: {video_url}")
20
+ return "Invalid YouTube URL format."
21
+
22
+
23
+ ytt_api = YouTubeTranscriptApi()
24
+ fetched_transcript_object = ytt_api.fetch(video_id)
25
+
26
+ # Access the 'snippets' attribute of the FetchedTranscript object
27
+ # and then iterate through each snippet to get its 'text' attribute
28
+ full_transcript = "\n".join([segment.text for segment in fetched_transcript_object.snippets])
29
+
30
+ # print(f"DEBUG: Full transcript (first 200 chars): {full_transcript}...")
31
+
32
+ return full_transcript
33
+
34
+ except Exception as e:
35
+ error = f"Error fetching transcript: {str(e)}"
36
+ return error
37
+
38
+
39
+
40
+ if __name__ == "__main__":
41
+ transport = "stdio"
42
+ # transport = "streamable-http"
43
+ mcp.run(transport=transport)
requirements.txt CHANGED
@@ -11,3 +11,7 @@ langchain-google-genai==3.0.0
11
  # langchain-nvidia-ai-endpoints==0.3.19
12
  langchain==1.0.3
13
  openai==2.6.1
 
 
 
 
 
11
  # langchain-nvidia-ai-endpoints==0.3.19
12
  langchain==1.0.3
13
  openai==2.6.1
14
+ langchain-deepseek==1.0.0
15
+ langchain-openai==1.0.2
16
+ youtube-transcript-api==1.2.3
17
+