Spaces:

frkhan
/

hf-agent-course-final-assignment

Paused

App Files Files Community

frkhan commited on Nov 4, 2025

Commit

4e3f7be

1 Parent(s): 2329760

-- Added mcp tool to extract youtube transcript.

Browse files

-- Modified to system prompt to improve accuracy of the LLM.

-- Used ChatOpenAI package to consume Nvidia NIM APi.

Files changed (4) hide show

app.py +1 -1
langchain_agent.py +37 -6
mcp-servers/youtube-transcript.py +43 -0
requirements.txt +4 -0

app.py CHANGED Viewed

@@ -40,7 +40,7 @@ async def run_and_submit_all( profile: gr.OAuthProfile | None):
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
     # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
-    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     print(agent_code)
     # 2. Fetch Questions

         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
     # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
+    agent_code = f"https://huggingface.co/spaces/frkhan/hf-agent-course-final-assignment/tree/main"
     print(agent_code)
     # 2. Fetch Questions

langchain_agent.py CHANGED Viewed

@@ -1,8 +1,10 @@
 import asyncio
 from langchain.agents import create_agent
 from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain_mcp_adapters.client import MultiServerMCPClient
 from langchain.chat_models import init_chat_model
 from dotenv import load_dotenv
 from langfuse import observe
 from langfuse.langchain import CallbackHandler
@@ -17,18 +19,22 @@ class LangChainAgent:
         You are a general AI assistant. I will ask you a question. Report your thoughtsYou are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
         You have access to browser tool like playwright through MCP. Use it to search the web when needed.
         -- Always try to use google or any popular search engine first. If required visit the pages shown in the search result to find proper information.
         -- Some websites are built using javascript and builds / loads the content completely on browser / client side. In such a case you may have to wait until the page is fully loaded to get the content you want. Use the browser tool to navigate, click, scroll, and interact with the web page as needed to find the information required to answer the question.
-        If you are asked about youtube videos, try to get the information from the video description or comments section. If you are asked something from the video, try to find the subtitle / closed caption of the video. Then you can examine the text conent to find proper answer. You don't need to be able to watch the video of process the video to know the conent of the video if subtitle / closed caption is available.
         Sometimes LLM inference APIs may hit rate limit. In such a case, try to understand the response from the API. Often it contains information about when you can retry or what to do next.
         """
-                # Initialize Langfuse client
         # This block sets up the Langfuse callback handler for LangChain.
         # It initializes the Langfuse client and creates a CallbackHandler instance
         # only if the required API keys are available. The handler is then added to
@@ -49,7 +55,15 @@ class LangChainAgent:
                 "transport": "stdio",
                 "command": "npx",
                 "args": [
-                    "@playwright/mcp@latest"
                 ]
             }
         })
@@ -57,10 +71,27 @@ class LangChainAgent:
         tools = await client.get_tools()
         print(tools)
-        model_name = "gemini-2.0-flash"
-        model_provider = "google_genai" #google_genai
-        model = init_chat_model(model_name, model_provider=model_provider)
         agent = create_agent(model, tools)

 import asyncio
+import os
 from langchain.agents import create_agent
 from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain_mcp_adapters.client import MultiServerMCPClient
 from langchain.chat_models import init_chat_model
+from langchain_openai import ChatOpenAI
 from dotenv import load_dotenv
 from langfuse import observe
 from langfuse.langchain import CallbackHandler
         You are a general AI assistant. I will ask you a question. Report your thoughtsYou are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
         You have access to browser tool like playwright through MCP. Use it to search the web when needed.
+        -- MUST TO FOLLOW RULE:  ALAWAY WAIT UNTIL THE PAGE IS FULLY LOADED BEFORE EXTRACTING INFORMATION FROM THE PAGE. --
         -- Always try to use google or any popular search engine first. If required visit the pages shown in the search result to find proper information.
         -- Some websites are built using javascript and builds / loads the content completely on browser / client side. In such a case you may have to wait until the page is fully loaded to get the content you want. Use the browser tool to navigate, click, scroll, and interact with the web page as needed to find the information required to answer the question.
+        If you are asked about youtube videos:
+        - Try to get the information from the video description or comments section.
+        - If you are asked any information from the video, try to get the transcript of the video using the youtube-transcript tool available to you. Use the video URL to get the transcript.
         Sometimes LLM inference APIs may hit rate limit. In such a case, try to understand the response from the API. Often it contains information about when you can retry or what to do next.
         """
+        # Initialize Langfuse client
         # This block sets up the Langfuse callback handler for LangChain.
         # It initializes the Langfuse client and creates a CallbackHandler instance
         # only if the required API keys are available. The handler is then added to
                 "transport": "stdio",
                 "command": "npx",
                 "args": [
+                    "@playwright/mcp@latest",
+                    # "--headless"
+                ]
+            },
+            "youtube_transcript_mcp":{
+                "transport": "stdio",
+                "command": "python",
+                "args": [
+                    "mcp-servers/youtube-transcript.py"
                 ]
             }
         })
         tools = await client.get_tools()
         print(tools)
+        # model_name = "gemini-2.0-flash"
+        # model_provider = "google_genai" #google_genai
+        # model = init_chat_model(model_name, model_provider=model_provider)
+        # model_name = "deepseek-ai/deepseek-v3.1"
+        # model_name = "deepseek-ai/deepseek-v3.1-terminus"
+        # model_name = "minimaxai/minimax-m2"
+        # model_name = "mistralai/mistral-nemotron"
+        # model_name = "qwen/qwen3-next-80b-a3b-instruct"
+        model_name = "qwen/qwen3-next-80b-a3b-thinking"
+        # model_name = "moonshotai/kimi-k2-instruct-0905"
+        # model_provider = "nvidia"
+        model = ChatOpenAI(
+            model=model_name,
+            openai_api_key=os.getenv("NVIDIA_API_KEY"),
+            openai_api_base="https://integrate.api.nvidia.com/v1"
+        )
         agent = create_agent(model, tools)

mcp-servers/youtube-transcript.py ADDED Viewed

	@@ -0,0 +1,43 @@

+from mcp.server.fastmcp import FastMCP
+from youtube_transcript_api import YouTubeTranscriptApi
+mcp = FastMCP("youtube-trascript")
+@mcp.tool()
+def get_youtube_transcript(video_url: str) -> str:
+    """Fetches the transcript of a YouTube video given its URL."""
+    try:
+        # Extract video ID from URL
+        if "v=" in video_url:
+            video_id = video_url.split("v=")[1].split("&")[0]
+        elif "youtu.be/" in video_url:
+            video_id = video_url.split("youtu.be/")[1].split("?")[0]
+        else:
+            print(f"DEBUG: Invalid YouTube URL format for: {video_url}")
+            return "Invalid YouTube URL format."
+        ytt_api = YouTubeTranscriptApi()
+        fetched_transcript_object = ytt_api.fetch(video_id)
+        # Access the 'snippets' attribute of the FetchedTranscript object
+        # and then iterate through each snippet to get its 'text' attribute
+        full_transcript = "\n".join([segment.text for segment in fetched_transcript_object.snippets])
+        # print(f"DEBUG: Full transcript (first 200 chars): {full_transcript}...")
+        return full_transcript
+    except Exception as e:
+        error = f"Error fetching transcript: {str(e)}"
+        return error
+if __name__ == "__main__":
+    transport = "stdio"
+    # transport = "streamable-http"
+    mcp.run(transport=transport)

requirements.txt CHANGED Viewed

@@ -11,3 +11,7 @@ langchain-google-genai==3.0.0
 # langchain-nvidia-ai-endpoints==0.3.19
 langchain==1.0.3
 openai==2.6.1

 # langchain-nvidia-ai-endpoints==0.3.19
 langchain==1.0.3
 openai==2.6.1
+langchain-deepseek==1.0.0
+langchain-openai==1.0.2
+youtube-transcript-api==1.2.3