Bshraman commited on
Commit
08583a4
·
verified ·
1 Parent(s): f5b5628

Upload 16 files

Browse files
agent.py CHANGED
@@ -1,224 +1,77 @@
1
- """LangGraph Agent"""
2
- import os
3
- from dotenv import load_dotenv
4
- from langgraph.graph import START, StateGraph, MessagesState
5
- from langgraph.prebuilt import tools_condition
6
- from langgraph.prebuilt import ToolNode
7
- from langchain_google_genai import ChatGoogleGenerativeAI
8
- from langchain_groq import ChatGroq
9
- from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint, HuggingFaceEmbeddings
10
- from langchain_community.tools.tavily_search import TavilySearchResults
11
- from langchain_community.document_loaders import WikipediaLoader
12
- from langchain_community.document_loaders import ArxivLoader
13
- from langchain_community.vectorstores import SupabaseVectorStore
14
- from langchain_core.messages import SystemMessage, HumanMessage
15
- from langchain_core.tools import tool
16
- from langchain.tools.retriever import create_retriever_tool
17
- from supabase.client import Client, create_client
18
- from langchain_community.tools import DuckDuckGoSearchRun
19
-
20
- load_dotenv()
21
-
22
- @tool
23
- def search_answer(question: str) -> str:
24
- """
25
- This function uses the DuckDuckGoSearchRun tool to perform a search.
26
- """
27
- search = DuckDuckGoSearchRun()
28
 
29
- return search.invoke(question)
 
30
 
31
- @tool
32
- def multiply(a: int, b: int) -> int:
33
- """Multiply two numbers.
34
- Args:
35
- a: first int
36
- b: second int
37
- """
38
- return a * b
39
-
40
- @tool
41
- def add(a: int, b: int) -> int:
42
- """Add two numbers.
43
-
44
- Args:
45
- a: first int
46
- b: second int
47
- """
48
- return a + b
49
-
50
- @tool
51
- def subtract(a: int, b: int) -> int:
52
- """Subtract two numbers.
53
-
54
- Args:
55
- a: first int
56
- b: second int
57
- """
58
- return a - b
59
-
60
- @tool
61
- def divide(a: int, b: int) -> int:
62
- """Divide two numbers.
63
-
64
- Args:
65
- a: first int
66
- b: second int
67
- """
68
- if b == 0:
69
- raise ValueError("Cannot divide by zero.")
70
- return a / b
71
-
72
- @tool
73
- def modulus(a: int, b: int) -> int:
74
- """Get the modulus of two numbers.
75
-
76
- Args:
77
- a: first int
78
- b: second int
79
  """
80
- return a % b
81
-
82
- @tool
83
- def wiki_search(query: str) -> str:
84
- """Search Wikipedia for a query and return maximum 2 results.
85
-
86
- Args:
87
- query: The search query."""
88
- search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
89
- formatted_search_docs = "\n\n---\n\n".join(
90
- [
91
- f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
92
- for doc in search_docs
93
- ])
94
- return {"wiki_results": formatted_search_docs}
95
-
96
- @tool
97
- def web_search(query: str) -> str:
98
- """Search Tavily for a query and return maximum 3 results.
99
-
100
- Args:
101
- query: The search query."""
102
- search_docs = TavilySearchResults(max_results=3).invoke(query=query)
103
- formatted_search_docs = "\n\n---\n\n".join(
104
- [
105
- f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
106
- for doc in search_docs
107
- ])
108
- return {"web_results": formatted_search_docs}
109
-
110
- @tool
111
- def arvix_search(query: str) -> str:
112
- """Search Arxiv for a query and return maximum 3 result.
113
-
114
- Args:
115
- query: The search query."""
116
- search_docs = ArxivLoader(query=query, load_max_docs=3).load()
117
- formatted_search_docs = "\n\n---\n\n".join(
118
- [
119
- f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
120
- for doc in search_docs
121
- ])
122
- return {"arvix_results": formatted_search_docs}
123
-
124
-
125
-
126
- # load the system prompt from the file
127
- with open("system_prompt.txt", "r", encoding="utf-8") as f:
128
- system_prompt = f.read()
129
-
130
- # System message
131
- sys_msg = SystemMessage(content=system_prompt)
132
-
133
- # build a retriever
134
- embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2") # dim=768
135
- supabase: Client = create_client(
136
- os.environ.get("SUPABASE_URL"),
137
- os.environ.get("SUPABASE_SERVICE_KEY"))
138
- vector_store = SupabaseVectorStore(
139
- client=supabase,
140
- embedding= embeddings,
141
- table_name="documents",
142
- query_name="match_documents_langchain",
143
- )
144
- create_retriever_tool = create_retriever_tool(
145
- retriever=vector_store.as_retriever(),
146
- name="Question Search",
147
- description="A tool to retrieve similar questions from a vector store.",
148
- )
149
-
150
-
151
-
152
- tools = [
153
- multiply,
154
- add,
155
- subtract,
156
- divide,
157
- modulus,
158
- wiki_search,
159
- web_search,
160
- arvix_search,
161
- search_answer,
162
- ]
163
-
164
- # Build graph function
165
- def build_graph(provider: str = "groq"):
166
- """Build the graph"""
167
- # Load environment variables from .env file
168
- if provider == "google":
169
- # Google Gemini
170
- llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0)
171
- elif provider == "groq":
172
- # Groq https://console.groq.com/docs/models
173
- llm = ChatGroq(model="qwen-qwq-32b", temperature=0) # optional : qwen-qwq-32b gemma2-9b-it
174
- elif provider == "huggingface":
175
- # TODO: Add huggingface endpoint
176
- llm = ChatHuggingFace(
177
- llm=HuggingFaceEndpoint(
178
- url="https://api-inference.huggingface.co/models/Meta-DeepLearning/llama-2-7b-chat-hf",
179
- temperature=0,
180
- ),
181
- )
182
- else:
183
- raise ValueError("Invalid provider. Choose 'google', 'groq' or 'huggingface'.")
184
- # Bind tools to LLM
185
- llm_with_tools = llm.bind_tools(tools)
186
-
187
- # Node
188
- def assistant(state: MessagesState):
189
- """Assistant node"""
190
- return {"messages": [llm_with_tools.invoke(state["messages"])]}
191
-
192
- def retriever(state: MessagesState):
193
- """Retriever node"""
194
- similar_question = vector_store.similarity_search(state["messages"][0].content)
195
- example_msg = HumanMessage(
196
- content=f"Here I provide a similar question and answer for reference: \n\n{similar_question[0].page_content}",
197
- )
198
- return {"messages": [sys_msg] + state["messages"] + [example_msg]}
199
-
200
- builder = StateGraph(MessagesState)
201
- builder.add_node("retriever", retriever)
202
- builder.add_node("assistant", assistant)
203
- builder.add_node("tools", ToolNode(tools))
204
- builder.add_edge(START, "retriever")
205
- builder.add_edge("retriever", "assistant")
206
- builder.add_conditional_edges(
207
- "assistant",
208
- tools_condition,
209
- )
210
- builder.add_edge("tools", "assistant")
211
 
212
- # Compile graph
213
- return builder.compile()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
214
 
215
- # test
216
  if __name__ == "__main__":
217
- question = "When was a picture of St. Thomas Aquinas first added to the Wikipedia page on the Principle of double effect?"
218
- # Build the graph
219
- graph = build_graph(provider="huggingface")
220
- # Run the graph
221
- messages = [HumanMessage(content=question)]
222
- messages = graph.invoke({"messages": messages})
223
- for m in messages["messages"]:
224
- m.pretty_print()
 
1
+ # agent.py
2
+ from langchain_ollama.chat_models import ChatOllama
3
+ from langchain_core.messages import HumanMessage, ToolMessage
4
+ from tools.wiki import wikipedia_search_tool
5
+ import re, ast
6
+ from configs.config import Config
7
+ from configs.registry import TOOL_REGISTRY
8
+
9
+ # Define Variables
10
+ env = Config()
11
+ llm = env.LOCAL_LLM
12
+
13
+ tools_registery = TOOL_REGISTRY
14
+
15
+ def generate_prompt(query: str) -> str:
16
+ tool_list = "\n".join(
17
+ f"- {name}: {meta['description']}" for name, meta in tools_registery.items()
18
+ )
 
 
 
 
 
 
 
 
 
19
 
20
+ return f"""
21
+ You are a smart assistant that decides which tool to use based on user queries.
22
 
23
+ User Query: "{query}"
24
+
25
+ Available tools:
26
+ {tool_list}
27
+
28
+ Respond in this format:
29
+ Tool: [tool_name]
30
+ Tool Input: [Python dict of parameters]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
+ def parse_tool_selection(response: str) -> tuple[str, dict]:
34
+ tool_match = re.search(r"Tool:\s*(\w+)", response)
35
+ input_match = re.search(r"Tool Input:\s*(\{.*\})", response)
36
+
37
+ if not tool_match or not input_match:
38
+ raise ValueError("Failed to parse tool selection.")
39
+
40
+ tool_name = tool_match.group(1)
41
+ tool_input = ast.literal_eval(input_match.group(1))
42
+ return tool_name, tool_input
43
+
44
+ def main(query: str = None):
45
+ user_query = query.strip()
46
+
47
+ # 1. Generate selection prompt
48
+ prompt = generate_prompt(user_query)
49
+ response = llm.invoke([HumanMessage(content=prompt)])
50
+
51
+ # 2. Parse tool selection
52
+ try:
53
+ tool_name, tool_input = parse_tool_selection(response.content)
54
+ except Exception as e:
55
+ print("Error parsing tool selection:", e)
56
+ print("LLM response was:", response.content)
57
+ return
58
+
59
+ # 3. Run selected tool
60
+ tool_entry = tools_registery.get(tool_name)
61
+ if not tool_entry:
62
+ print(f"Tool '{tool_name}' not found.")
63
+ return
64
+
65
+ tool = tool_entry["tool"]
66
+
67
+ try:
68
+ result = tool.invoke(tool_input)
69
+ except Exception as e:
70
+ print(f"Error running tool '{tool_name}': {e}")
71
+ return
72
+
73
+ print("Final Answer:", result.content)
74
 
 
75
  if __name__ == "__main__":
76
+ query = "How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of English Wikipedia."
77
+ main(query)
 
 
 
 
 
 
configs/__init__.py ADDED
File without changes
configs/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (170 Bytes). View file
 
configs/__pycache__/config.cpython-313.pyc ADDED
Binary file (3.42 kB). View file
 
configs/__pycache__/registry.cpython-313.pyc ADDED
Binary file (396 Bytes). View file
 
configs/config.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+ from langchain_openai import ChatOpenAI
4
+ from llama_index.embeddings.openai import OpenAIEmbedding
5
+ from langchain_ollama import ChatOllama, OllamaEmbeddings
6
+
7
+ # Load environment variables from .env file
8
+ load_dotenv()
9
+
10
+ class Config:
11
+ LLM_PROVIDER="ollama"
12
+ if LLM_PROVIDER == "ollama":
13
+ # Ollama configuration
14
+ OLLAMA_BASE_URL="http://localhost:11434"
15
+ #LOCAL_LLM_MODEL="deepseek-r1:8b"
16
+ #LOCAL_LLM_MODEL="deepseek-r1:7b"
17
+ LOCAL_LLM_MODEL = "llama3.2"
18
+ LOCAL_LLM = ChatOllama(model=LOCAL_LLM_MODEL,
19
+ base_url=OLLAMA_BASE_URL,
20
+ temperature=0.5)
21
+ EMBED_MODEL = OllamaEmbeddings(model="nomic-embed-text")
22
+
23
+ elif LLM_PROVIDER == "openai":
24
+ OPENAI_API_KEY:str = os.getenv("OPENAI_API_KEY","")
25
+ LLM_MODEL_NAME:str = os.getenv("LLM_MODEL","gpt-3.5-turbo")
26
+ LLM_VIDEO_MODEL_NAME:str = os.getenv("LLM_VIDEO_MODEL","gpt-4o-mini")
27
+
28
+ LLM = ChatOpenAI(model=LLM_MODEL_NAME, openai_api_key=OPENAI_API_KEY)
29
+ EMBED_MODEL = OpenAIEmbedding(openai_api_key=OPENAI_API_KEY)
30
+
31
+ FILE = None
32
+
33
+ WIKI_DEFAULT_PROMPTS = {
34
+ "system": (
35
+ "You are an intelligent assistant with access to Wikipedia search results related to the user's query.\n"
36
+ "Use only the information provided in the search results to answer the question accurately.\n"
37
+ "Carefully analyze the query to determine what the user is asking.\n"
38
+ "Respond clearly and concisely, avoiding speculation or information not found in the provided content.\n"
39
+ "If the answer is not present in the search results, state that explicitly."
40
+ ),
41
+ "user": "{query}"
42
+ }
43
+
44
+ SYSTEM_MSG = f"""
45
+ You are a general-purpose AI assistant.
46
+
47
+ When I ask you a question:
48
+ - Think step by step to determine the answer.
49
+ - List your reasoning steps clearly.
50
+ - If additional information is required to answer the question, use the 'wiki' tool by providing the directive: tool_call: [wiki].
51
+ - Provide your final output using one of the following formats:
52
+ - FINAL ANSWER: [YOUR FINAL ANSWER]
53
+ - tool_call: [TOOL_NAME]
54
+
55
+ Only one of these should appear in your final output — either FINAL ANSWER or tool_call.
56
+
57
+ If you are unsure or need more information, always use the 'wiki' tool.
58
+
59
+ Final answer formatting rules:
60
+ - If the answer is a number:
61
+ - Do NOT use commas (e.g., write 1000 not 1,000).
62
+ - Do NOT include units like "$" or "%" unless explicitly requested.
63
+ - If the answer is a string:
64
+ - Do NOT use articles (e.g., "a", "an", "the").
65
+ - Do NOT use abbreviations (e.g., write "New York" instead of "NY").
66
+ - Write digits as plain text (e.g., "four" instead of "4") unless stated otherwise.
67
+ - If the answer is a comma-separated list:
68
+ - Follow the same rules above for each item depending on whether it’s a number or a string.
69
+
70
+ Do NOT include any additional arguments in tool calls.
71
+
72
+ Available tools:
73
+ - wikipedia_search_tool: Search Wikipedia.
74
+ """
75
+
76
+ config = Config()
configs/registry.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from tools.wiki import wikipedia_search_tool
3
+
4
+ TOOL_REGISTRY = {
5
+ "wikipedia_search_tool": {
6
+ "tool": wikipedia_search_tool,
7
+ "description": "Searches Wikipedia for information. Use when user asks about a topic, person, or event.",
8
+ },
9
+ }
tools/__pycache__/call_llm.cpython-313.pyc ADDED
Binary file (1.72 kB). View file
 
tools/__pycache__/formatter.cpython-313.pyc ADDED
Binary file (782 Bytes). View file
 
tools/__pycache__/llm_helper.cpython-313.pyc ADDED
Binary file (1.12 kB). View file
 
tools/__pycache__/test.cpython-313.pyc ADDED
Binary file (874 Bytes). View file
 
tools/__pycache__/wiki.cpython-313.pyc ADDED
Binary file (5.28 kB). View file
 
tools/wiki.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import sys
3
+ import os
4
+ from langchain_core.tools import tool
5
+
6
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
7
+ from utils.call_llm import llm
8
+ from configs.config import Config
9
+ env = Config()
10
+
11
+ def generate_search_string(query: str) -> str:
12
+ """
13
+ Generate an optimal Wikipedia search string from the given query.
14
+
15
+ Args:
16
+ query (str): The input query for generating the search string.
17
+
18
+ Returns:
19
+ str: A single continuous search string optimized for Wikipedia search.
20
+ """
21
+ if not query or not isinstance(query, str):
22
+ raise ValueError("Query must be a non-empty string.")
23
+
24
+ prompt = f"""
25
+ Generate an optimal Wikipedia search string from the query '{query}'. \n
26
+ Just return a single continuous search string without any additional text or formatting or quotation marks. \n
27
+ Do not include any other text or explanation."""
28
+
29
+ response = env.LOCAL_LLM.invoke(prompt)
30
+ if not response or not response.content.strip():
31
+ raise ValueError("Failed to generate a valid search string.")
32
+
33
+ return response.content.strip()
34
+
35
+ def document_store(query, chunk_size, chunk_overlap):
36
+ """Load a Wikipedia page based on the query and language."""
37
+ from langchain_community.document_loaders import WikipediaLoader
38
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
39
+ from langchain.schema.document import Document
40
+ from langchain_community.vectorstores.faiss import FAISS
41
+
42
+ embedding_model = env.EMBED_MODEL
43
+ language = "en"
44
+
45
+ search_query = generate_search_string(query)
46
+ if not search_query:
47
+ raise ValueError("Search query is empty or invalid.")
48
+
49
+ loader = WikipediaLoader(query=search_query, lang=language)
50
+ documents = loader.load()
51
+ combined_text = "".join([doc.page_content for doc in documents if doc.page_content])
52
+ if not combined_text:
53
+ raise ValueError("No text found in the loaded documents.")
54
+
55
+ splitter = RecursiveCharacterTextSplitter(
56
+ chunk_size=chunk_size,
57
+ chunk_overlap=chunk_overlap,
58
+ )
59
+
60
+ chunks = splitter.split_text(combined_text)
61
+ if not chunks:
62
+ raise ValueError("No chunks generated from the combined text.")
63
+
64
+ docs = [
65
+ Document(page_content=chunk, metadata={"source": query})
66
+ for chunk in chunks
67
+ ]
68
+ if not docs:
69
+ raise ValueError("No documents created from the chunks.")
70
+
71
+ embeddings = embedding_model.embed_documents([doc.page_content for doc in docs])
72
+ if not embeddings:
73
+ raise ValueError("No embeddings generated for the documents.")
74
+
75
+ store = FAISS.from_documents(docs, embedding=embedding_model)
76
+ return store
77
+
78
+ def search(query,chunk_size, chunk_overlap):
79
+ store = document_store(query,chunk_size, chunk_overlap)
80
+ results = store.similarity_search_with_score(query, k=5)
81
+
82
+ # Filter results based on a relevance threshold
83
+ filtered_results = []
84
+ for doc, score in results:
85
+ if score <= 0.5: # Relevance threshold
86
+ filtered_results.append((doc, score))
87
+
88
+ return filtered_results
89
+
90
+ @tool("wikipedia_search_tool")
91
+ def wikipedia_search_tool(query: str, chunk_size: int =1000, chunk_overlap: int =200):
92
+ """
93
+ Run the Wikipedia search tool with the given query and parameters.
94
+ """
95
+ print("----- Wiki Run ---")
96
+ default_prompts = env.WIKI_DEFAULT_PROMPTS
97
+
98
+ response = search(query, chunk_size=chunk_size, chunk_overlap=chunk_overlap)
99
+ if not response:
100
+ response = [("No relevant documents found.", 1.0)]
101
+
102
+ llm_input = [
103
+ {"role": "system", "content": default_prompts["system"]},
104
+ {"role": "user", "content": default_prompts["user"].format(query=query)},
105
+ {"role": "user", "content": response[0][0] if response else "No relevant documents found."}
106
+ ]
107
+
108
+ call_llm = env.LOCAL_LLM.invoke(llm_input)
109
+
110
+ return call_llm
111
+
112
+
tools/youtube.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import os
3
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
4
+
5
+ class YoutubeSearchTool:
6
+ def __init__(self, query: str, chunk_size: int = 1000, chunk_overlap: int = 200):
7
+ from configs.config import Config
8
+ from utils.generate_search_string import generate_search_string
9
+
10
+ env = Config()
11
+ self.generate_search_string = generate_search_string
12
+ self.llm = env.LLM_VIDEO_MODEL_NAME
13
+ self.embedding_model = env.EMBED_MODEL
14
+ self.query = query
15
+ self.chunk_size = chunk_size
16
+ self.chunk_overlap = chunk_overlap
17
+ self.language = "en"
18
+
19
+ def extract_youtube_link(self) -> str:
20
+ """ Generate a YouTube search URL based on the query """
21
+ import re
22
+
23
+ youtube_url_pattern = r"https?://www\.youtube\.com/watch\?v=[\w-]+"
24
+ match = re.search(youtube_url_pattern, self.query)
25
+
26
+ return match.group(0) if match else None
27
+
28
+ def video_loader(self):
29
+ """ Load a YouTube video based on the query and language """
30
+
31
+ from langchain_community.document_loaders import YoutubeLoader
32
+ from langchain_community.document_loaders.youtube import TranscriptFormat
33
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
34
+ from langchain.schema import Document
35
+
36
+ url = self.extract_youtube_link()
37
+ optimized_string = self.generate_search_string(self.query)
38
+
39
+ loader = YoutubeLoader.from_youtube_url(
40
+ url,
41
+ add_video_info=True,
42
+ transcript_format=TranscriptFormat.CHUNKS,
43
+ chunk_size_seconds=30,
44
+ language=self.language,
45
+ )
46
+
47
+ documents = loader.load()
48
+
49
+ # combined_text = "".join([doc.page_content for doc in documents])
50
+
51
+ # # Split into chunks
52
+ # splitter = RecursiveCharacterTextSplitter(
53
+ # chunk_size=self.chunk_size,
54
+ # chunk_overlap=self.chunk_overlap,
55
+ # )
56
+ # chunks = splitter.split_text(combined_text)
57
+
58
+ # return chunks
59
+
60
+ def vector_store(self):
61
+ """ Create a vector store from the video chunks """
62
+ from langchain_community.vectorstores import FAISS
63
+ from langchain_openai import OpenAIEmbeddings
64
+
65
+ chunks = self.video_loader()
66
+ if not chunks:
67
+ return "No relevant video chunks found."
68
+
69
+ docs = [
70
+ Document(page_content=chunk, metadata={"source": self.query})
71
+ for chunk in chunks
72
+ ]
73
+ self.vector_store = FAISS.from_documents(docs, embedding = OpenAIEmbeddings())
74
+
75
+ def run(self, query: str):
76
+ """ Run the YouTube search tool with the given query """
77
+ print("----- YouTube Search Tool Run ---")
78
+
79
+ store = self.vector_store()
80
+
81
+ results = store.similarity_search_with_score(query, k=1)
82
+
83
+ if not results:
84
+ return "No relevant video chunks found in the vector store."
85
+
86
+ return [
87
+ {
88
+ "content": doc.page_content,
89
+ "score": score,
90
+ "source": doc.metadata.get("source", "Unknown")
91
+ }
92
+ for doc, score in results
93
+ ]
94
+
95
+
96
+ if __name__ == "__main__":
97
+ # Example usage
98
+ query = "Examine the video at https://www.youtube.com/watch?v=1htKBjuUWec. What does Teal'c say in response to the question 'Isn't that hot?'"
99
+
100
+ youtube_tool = YoutubeSearchTool(query=query)
101
+ youtube_tool.run(query)
102
+ print(f"Search URL: {youtube_tool.extract_youtube_link(query)}")
103
+ print("Video chunks loaded successfully.")
utils/__pycache__/call_llm.cpython-313.pyc ADDED
Binary file (2.13 kB). View file
 
utils/call_llm.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_ollama import ChatOllama
2
+ from langchain_core.messages import SystemMessage
3
+
4
+ def llm(
5
+ model_name: str,
6
+ messages: list[dict],
7
+ temperature: float = 0.1,
8
+ max_tokens: int = 1024,
9
+ **kwargs,
10
+ ) -> str:
11
+ """
12
+ Calls the Ollama Chat model and returns the generated response content.
13
+ """
14
+ try:
15
+ print("[NODE] ----- Calling Ollama Chat -----")
16
+
17
+ # Construct the prompt with explicit separation for SystemMessage
18
+ prompt_parts = []
19
+ for message in messages:
20
+ if isinstance(message, dict):
21
+ prompt_parts.append(f"{message['role'].capitalize()}: {message['content']}")
22
+ elif isinstance(message, SystemMessage):
23
+ prompt_parts.append(f"System: {message.content}")
24
+ else:
25
+ prompt_parts.append(message)
26
+
27
+ prompt = "\n\n".join(prompt_parts) # Add extra separation for clarity
28
+
29
+ print(f"Constructed Prompt:\n{prompt}")
30
+
31
+ chat = ChatOllama(
32
+ model=model_name,
33
+ temperature=temperature,
34
+ max_tokens=max_tokens,
35
+ **kwargs
36
+ )
37
+
38
+ response = chat.invoke(prompt)
39
+ print("----- Ollama Chat response -----")
40
+ print(response.content)
41
+
42
+ if not response or not response.content:
43
+ print("No content returned from the Ollama Chat model.")
44
+ return "No content generated."
45
+
46
+ return response.content
47
+
48
+ except Exception as e:
49
+ print("An error occurred while calling the Ollama Chat model: %s", str(e))
50
+ raise