Agents_DeepSearch

Sleeping

App Files Files Community

rgp230 commited on Aug 21, 2025

Commit

ba78ba8

1 Parent(s): 9905f36

fix(retrained_distilbert): Fix for tensor memory leaks

Browse files

Files changed (8) hide show

requirements.txt +1 -1
src/graph/__pycache__/state_vector_nodes.cpython-312.pyc +0 -0
src/graph/state_vector_nodes.py +14 -34
src/streamlit_app.py +4 -2
src/train_bert/topic_classifier_model/model.safetensors +1 -1
src/train_bert/topic_classifier_model/tokenizer.json +1 -8
src/train_bert/topic_classifier_model/training_args.bin +1 -1
src/train_bert/train_classifier.py +2 -1

requirements.txt CHANGED Viewed

@@ -10,7 +10,7 @@ langgraph-prebuilt
 langchain-tavily
 semanticscholar
 streamlit
-transformers[torch]
 langchain_openai
 langchain_google_genai
 torch

 langchain-tavily
 semanticscholar
 streamlit
+transformers==4.55.3
 langchain_openai
 langchain_google_genai
 torch

src/graph/__pycache__/state_vector_nodes.cpython-312.pyc CHANGED Viewed

Binary files a/src/graph/__pycache__/state_vector_nodes.cpython-312.pyc and b/src/graph/__pycache__/state_vector_nodes.cpython-312.pyc differ

src/graph/state_vector_nodes.py CHANGED Viewed

@@ -12,7 +12,7 @@ import re
 from langchain_openai import ChatOpenAI
 from langchain_community.tools.semanticscholar.tool import SemanticScholarQueryRun
 from langchain_community.utilities.semanticscholar import SemanticScholarAPIWrapper
-from langchain_community.tools.tavily_search import TavilySearchResults
 import pandas as pd
 import torch.nn.functional as F
 import os
@@ -34,7 +34,7 @@ class question_model:
             ])
         for topic, keywords in state['topic_kw'].items():
             state['messages'].append(SystemMessage(content=f"For the UN SDG Goal: {topic}\n. \
-                                                Use the following keywords : {', '.join(keywords)}. Generate questions related to the topic in the country of {state['country']} using these keywords."))
         state['messages'].append(AIMessage(content="Based on the provided information, here is an enhanced list of the question: \n"))
         return state
@@ -54,17 +54,7 @@ class question_model:
         #print(predict_input)
         with torch.no_grad():
             logits = self.distilbert_model(**predict_input).logits
-            #print(logits)
-            #output = self.distilbert_model(predict_input.numpy())[0]
-            #print(output)
-            #numpy_output=output.numpy()
-            #torch_output=torch.from_numpy(numpy_output)
-            #prediction_value = torch.argmax(torch_output, dim=1).numpy()  # All answers
             prob_value=F.softmax(logits, dim=1).cpu().numpy()[0]
-            #prob_value = F.softmax(output, dim=1).cpu().numpy()[0]
-            #prediction_value = tf.argmax(output, axis=1).numpy()#All answers
-            #prob_value=tf.nn.softmax(output).numpy()[0]#Probability of TF output
             Topic_Bool=prob_value>0.4
             Topics=[]
             Keywords={}
@@ -124,7 +114,6 @@ class research_model:
         # Bind the tool to the LLM
         self.llm_with_tools = self.llm.bind_tools(self.tools)
         os.environ['TAVILY_API_KEY']=tavily_api_key
-        #self.tavily_api_key=tavily_api_key
     def direct_semantic_scholar_query(self,query: str):
@@ -143,13 +132,14 @@ class research_model:
     def direct_tavily_search(self,query: str):
         """Direct invocation of TavilySearchResults without agent"""
         # Create the tool directly
-        tavily = TavilySearchResults()
-        result = tavily.invoke(query, max_results=5, include_answer=True, include_snippet=True, include_source=True)
-        response=""
-        for r in result:
-            response +="Found a webpage: %s at %s" %(r['title'], r['url'])
-            response +="Summary of the page: %s" %r['content']
-            response +="Relevance score: %s" %r['score']
         return response
     def data_analysis(self,state:StateVector):
         df_analyst=pd.read_csv(self.local_analysis_file)
@@ -186,8 +176,7 @@ class research_model:
                     #AIMessage(content="Using publications on Semantic Scholar and my own reference data, I will answer the questions related to the Sustainable Development Goal: %s." % topic),
                     SystemMessage(content=f"Search for recent papers on {kw_string} in {country}."),
-                    SystemMessage(content=f"Search for recent news on {kw_string} in {country}."),
-                    SystemMessage(content=f"Search the internet for webpages on {kw_string} in {country}."),
                     #HumanMessage(content="Please provide a comprehensive answer to the questions based on the information gathered from the tools.")
                 ]
             state['messages'] = messages
@@ -210,9 +199,9 @@ class research_model:
         )
         self.tools=[semantic_scholar_tool,self.direct_tavily_search]
         # Bind the tool to the LLM
-        llm_with_tools = self.llm.bind_tools(tools)
-        return llm_with_tools,tools
     def tool_calling_llm(self,state:StateVector):
@@ -250,17 +239,8 @@ class research_model:
         initial_system_message.content+="\n Assess if the resources indicate a general positive or negative trend and grade progress\
             from 0-10 where 0 is very negative and 10 is very positive.\n"
         initial_system_message.content+="\n Provide detailed answers to the questions and a list of references used."
         state["messages"].append(initial_system_message)
-        '''
-        llm = ChatOpenAI(
-            temperature=0.4,
-            model_name="gpt-4o",
-            openai_api_key=openai_api_key
-        )
-        '''
-        #llm=ChatGoogleGenerativeAI(model='gemini-2.5-pro',google_api_key=google_api_key,temperature=0.3)
-        #print(state["messages"][-1].content)
         airesponse = self.llm.invoke(state["messages"][-1].content)
         # For simplicity, we just return the messages as they are
         return {"messages": [airesponse]}

 from langchain_openai import ChatOpenAI
 from langchain_community.tools.semanticscholar.tool import SemanticScholarQueryRun
 from langchain_community.utilities.semanticscholar import SemanticScholarAPIWrapper
+from langchain_tavily import TavilySearch
 import pandas as pd
 import torch.nn.functional as F
 import os
             ])
         for topic, keywords in state['topic_kw'].items():
             state['messages'].append(SystemMessage(content=f"For the UN SDG Goal: {topic}\n. \
+                                                Use the following keywords : {', '.join(keywords)}. Generate questions related to the topic in the country of {state['country']} using these keywords.\n"))
         state['messages'].append(AIMessage(content="Based on the provided information, here is an enhanced list of the question: \n"))
         return state
         #print(predict_input)
         with torch.no_grad():
             logits = self.distilbert_model(**predict_input).logits
             prob_value=F.softmax(logits, dim=1).cpu().numpy()[0]
             Topic_Bool=prob_value>0.4
             Topics=[]
             Keywords={}
         # Bind the tool to the LLM
         self.llm_with_tools = self.llm.bind_tools(self.tools)
         os.environ['TAVILY_API_KEY']=tavily_api_key
     def direct_semantic_scholar_query(self,query: str):
     def direct_tavily_search(self,query: str):
         """Direct invocation of TavilySearchResults without agent"""
         # Create the tool directly
+        tavily = TavilySearch(max_results=5, include_answer=True, include_snippet=True, include_source=True)
+        result = tavily.invoke(query)
+        answer=result['answer']
+        response="Summary Answer for all webpages: {answer} \n"
+        for r in result['results']:
+            response +="Found a webpage: %s at %s \n" %(r['title'], r['url'])
+            response +="Summary of the page: %s \n" %r['content']
+            response +="Relevance score: %s\n" %r['score']
         return response
     def data_analysis(self,state:StateVector):
         df_analyst=pd.read_csv(self.local_analysis_file)
                     #AIMessage(content="Using publications on Semantic Scholar and my own reference data, I will answer the questions related to the Sustainable Development Goal: %s." % topic),
                     SystemMessage(content=f"Search for recent papers on {kw_string} in {country}."),
+                    SystemMessage(content=f"Search the internet for webpages or news on {kw_string} in {country}."),
                     #HumanMessage(content="Please provide a comprehensive answer to the questions based on the information gathered from the tools.")
                 ]
             state['messages'] = messages
         )
         self.tools=[semantic_scholar_tool,self.direct_tavily_search]
         # Bind the tool to the LLM
+        llm_with_tools = self.llm.bind_tools(self.tools)
+        return llm_with_tools,self.tools
     def tool_calling_llm(self,state:StateVector):
         initial_system_message.content+="\n Assess if the resources indicate a general positive or negative trend and grade progress\
             from 0-10 where 0 is very negative and 10 is very positive.\n"
         initial_system_message.content+="\n Provide detailed answers to the questions and a list of references used."
+        print(initial_system_message.content)
         state["messages"].append(initial_system_message)
         airesponse = self.llm.invoke(state["messages"][-1].content)
         # For simplicity, we just return the messages as they are
         return {"messages": [airesponse]}

src/streamlit_app.py CHANGED Viewed

@@ -12,7 +12,9 @@ from state.state import StateVector
 from graph.state_vector_nodes import question_model,research_model
 from graph.graph_builder import BuildGraphOptions
 import re
 class StreamlitConfigUI:
     """
@@ -97,7 +99,7 @@ if __name__=='__main__':
     LLM_Selection=ModelSelection(user_input)
     if user_input["GENAI_API_KEY"]:llm=LLM_Selection.setup_llm_model()
     loaded_tokenizer = AutoTokenizer.from_pretrained('src/train_bert/topic_classifier_model')
-    loaded_model = AutoModelForSequenceClassification.from_pretrained('src/train_bert/topic_classifier_model', device_map='cpu')
     df_keys=pd.read_csv('src/train_bert/training_data/Keyword_Patterns.csv')
     if not user_input:

 from graph.state_vector_nodes import question_model,research_model
 from graph.graph_builder import BuildGraphOptions
 import re
+import os
+import torch
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
 class StreamlitConfigUI:
     """
     LLM_Selection=ModelSelection(user_input)
     if user_input["GENAI_API_KEY"]:llm=LLM_Selection.setup_llm_model()
     loaded_tokenizer = AutoTokenizer.from_pretrained('src/train_bert/topic_classifier_model')
+    loaded_model = AutoModelForSequenceClassification.from_pretrained('src/train_bert/topic_classifier_model',device_map='cpu')
     df_keys=pd.read_csv('src/train_bert/training_data/Keyword_Patterns.csv')
     if not user_input:

src/train_bert/topic_classifier_model/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:130c8e5deecc14277276719c7c0e24f836eb2ef7ac7558d591ebbdca195bf536
 size 267878708

 version https://git-lfs.github.com/spec/v1
+oid sha256:082360600edda6a2ffcf6bc3d16ff21db50214111947c4b970a8fd2c55c10210
 size 267878708

src/train_bert/topic_classifier_model/tokenizer.json CHANGED Viewed

@@ -6,14 +6,7 @@
     "strategy": "LongestFirst",
     "stride": 0
   },
-  "padding": {
-    "strategy": "BatchLongest",
-    "direction": "Right",
-    "pad_to_multiple_of": null,
-    "pad_id": 0,
-    "pad_type_id": 0,
-    "pad_token": "[PAD]"
-  },
   "added_tokens": [
     {
       "id": 0,

     "strategy": "LongestFirst",
     "stride": 0
   },
+  "padding": null,
   "added_tokens": [
     {
       "id": 0,

src/train_bert/topic_classifier_model/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:065a3a69c45d460583f6e974780e25e39e359f94094ab3d0d2b20048ff15fb8a
 size 5777

 version https://git-lfs.github.com/spec/v1
+oid sha256:78224673c2ba1baa79f14e017c2b6ebe9556c46b018f6441021d05569da25691
 size 5777

src/train_bert/train_classifier.py CHANGED Viewed

@@ -57,6 +57,7 @@ def buildtraining(train_df, test_df,save_directory='topic_classifier_model'):
         num_labels=len(labels),label2id=label2id,id2label=id2label)
     trainer = Trainer(
         model=model,
         args=training_args,
         train_dataset=tokenized_train,
         eval_dataset=tokenized_test,
@@ -109,4 +110,4 @@ if __name__ == '__main__':
     buildtraining(train_df, test_df)
     prediction_metrics(test_df)

         num_labels=len(labels),label2id=label2id,id2label=id2label)
     trainer = Trainer(
         model=model,
+        device_map='cpu',
         args=training_args,
         train_dataset=tokenized_train,
         eval_dataset=tokenized_test,
     buildtraining(train_df, test_df)
     prediction_metrics(test_df)