Agents_DeepSearch

Sleeping

App Files Files Community

rgp230 commited on Aug 21, 2025

Commit

f232eef

1 Parent(s): 34f7bc7

fix(remove_tf): Unblock container build by removing tf dependency

Browse files

Files changed (5) hide show

requirements.txt +0 -2
src/graph/__pycache__/state_vector_nodes.cpython-312.pyc +0 -0
src/graph/state_vector_nodes.py +39 -33
src/state/__pycache__/state.cpython-312.pyc +0 -0
src/streamlit_app.py +3 -4

requirements.txt CHANGED Viewed

@@ -9,8 +9,6 @@ langchain_huggingface
 langgraph-prebuilt
 streamlit
 transformers[torch]
-tensorflow
-tf-keras
 langchain_openai
 langchain_google_genai
 torch

 langgraph-prebuilt
 streamlit
 transformers[torch]
 langchain_openai
 langchain_google_genai
 torch

src/graph/__pycache__/state_vector_nodes.cpython-312.pyc CHANGED Viewed

Binary files a/src/graph/__pycache__/state_vector_nodes.cpython-312.pyc and b/src/graph/__pycache__/state_vector_nodes.cpython-312.pyc differ

src/graph/state_vector_nodes.py CHANGED Viewed

@@ -16,7 +16,7 @@ from langchain_community.tools.tavily_search import TavilySearchResults
 import pandas as pd
 import torch.nn.functional as F
 class question_model:
     def __init__(self,loaded_tokenizer,loaded_model, llm, df_keys):
         #self.state=StateVector
@@ -45,43 +45,49 @@ class question_model:
         #print(state)
         if not state.get('seed_question') or len(state.get('seed_question').strip())<3:
             raise ValueError("Seed question is not set in the state vector.")
-        predict_input = self.tokenizer.encode(
             text=state.get('seed_question').lower(),
             truncation=True,
             padding=True,
             return_tensors="pt")
-        output = self.distilbert_model(predict_input.numpy())[0]
-        numpy_output=output.numpy()
-        torch_output=torch.from_numpy(numpy_output)
-        prediction_value = torch.argmax(torch_output, dim=1).numpy()  # All answers
-        prob_value=F.softmax(torch_output).numpy()[0]
-        #prob_value = F.softmax(output, dim=1).cpu().numpy()[0]
-        #prediction_value = tf.argmax(output, axis=1).numpy()#All answers
-        #prob_value=tf.nn.softmax(output).numpy()[0]#Probability of TF output
-        Topic_Bool=prob_value>0.4
-        Topics=[]
-        Keywords={}
-        for index, key in enumerate(sdg_goals):
-            if not Topic_Bool[index]:continue
-            #print(sdg_goals[key])
-            Topics.append((index+1,sdg_goals[key]))
-        #print(Topics)
-        for i,t in Topics:
-            kw_patterns=self.df_keys[self.df_keys['topic_num']==i]['keywords'].values[0].split(',')
-            Keywords[t] = re.findall(r'%s' %("|".join(kw_patterns)),state['seed_question'])
-            if not Keywords[t]:
-                Keywords[t] = kw_patterns
-                state['messages'].append(AIMessage(content="Will add keywords for the topic: %s \n" % t ))
-        state['topic'] = Topics
-        state['topic_kw'] = Keywords
-        if not state.get('country'):
-            state['messages'].append(AIMessage(content="Country is not set. Please provide a country. \n"))
-            return state
-        elif not state.get('topic'):
-            state['messages'].append(AIMessage(content="Missing topic please ask a question about the 17 Sustainable Development Goals. Graph will terminate. \n"))
-        state['messages'].append(AIMessage(content="Topics are: %s and keywords found: %s.\n Proceeding to prompt creation. \n" \
-                                        %(", ".join(Keywords.keys()), ", ".join([kw for kws in Keywords.values() for kw in kws]))))
         return state
     def should_continue(self, state:StateVector) -> str:

 import pandas as pd
 import torch.nn.functional as F
+torch.classes.__path__ = []
 class question_model:
     def __init__(self,loaded_tokenizer,loaded_model, llm, df_keys):
         #self.state=StateVector
         #print(state)
         if not state.get('seed_question') or len(state.get('seed_question').strip())<3:
             raise ValueError("Seed question is not set in the state vector.")
+        #print(state.get('seed_question').lower())
+        predict_input = self.tokenizer(
             text=state.get('seed_question').lower(),
             truncation=True,
             padding=True,
             return_tensors="pt")
+        #print(predict_input)
+        with torch.no_grad():
+            logits = self.distilbert_model(**predict_input).logits
+            #print(logits)
+            #output = self.distilbert_model(predict_input.numpy())[0]
+            #print(output)
+            #numpy_output=output.numpy()
+            #torch_output=torch.from_numpy(numpy_output)
+            #prediction_value = torch.argmax(torch_output, dim=1).numpy()  # All answers
+            prob_value=F.softmax(logits, dim=1).cpu().numpy()[0]
+            #prob_value = F.softmax(output, dim=1).cpu().numpy()[0]
+            #prediction_value = tf.argmax(output, axis=1).numpy()#All answers
+            #prob_value=tf.nn.softmax(output).numpy()[0]#Probability of TF output
+            Topic_Bool=prob_value>0.4
+            Topics=[]
+            Keywords={}
+            for index, key in enumerate(sdg_goals):
+                if not Topic_Bool[index]:continue
+                #print(sdg_goals[key])
+                Topics.append((index+1,sdg_goals[key]))
+            #print(Topics)
+            for i,t in Topics:
+                kw_patterns=self.df_keys[self.df_keys['topic_num']==i]['keywords'].values[0].split(',')
+                Keywords[t] = re.findall(r'%s' %("|".join(kw_patterns)),state['seed_question'])
+                if not Keywords[t]:
+                    Keywords[t] = kw_patterns
+                    state['messages'].append(AIMessage(content="Will add keywords for the topic: %s \n" % t ))
+            state['topic'] = Topics
+            state['topic_kw'] = Keywords
+            if not state.get('country'):
+                state['messages'].append(AIMessage(content="Country is not set. Please provide a country. \n"))
+                return state
+            elif not state.get('topic'):
+                state['messages'].append(AIMessage(content="Missing topic please ask a question about the 17 Sustainable Development Goals. Graph will terminate. \n"))
+            state['messages'].append(AIMessage(content="Topics are: %s and keywords found: %s.\n Proceeding to prompt creation. \n" \
+                                            %(", ".join(Keywords.keys()), ", ".join([kw for kws in Keywords.values() for kw in kws]))))
         return state
     def should_continue(self, state:StateVector) -> str:

src/state/__pycache__/state.cpython-312.pyc CHANGED Viewed

Binary files a/src/state/__pycache__/state.cpython-312.pyc and b/src/state/__pycache__/state.cpython-312.pyc differ

src/streamlit_app.py CHANGED Viewed

@@ -2,10 +2,9 @@ import configparser
 import altair as alt
 import streamlit as st
 from typing import List, Optional
-from transformers import DistilBertTokenizerFast, TFDistilBertForSequenceClassification
 from langchain_core.messages import AnyMessage, AIMessage,SystemMessage, HumanMessage,AIMessageChunk
 from streamlitui.constants import unsdg_countries
 from llm.llm_setup import ModelSelection
 import pandas as pd
@@ -94,8 +93,8 @@ if __name__=='__main__':
     user_input=ui.load_streamlit_ui()
     LLM_Selection=ModelSelection(user_input)
     if user_input["GENAI_API_KEY"]:llm=LLM_Selection.setup_llm_model()
-    loaded_tokenizer = DistilBertTokenizerFast.from_pretrained('src/train_bert/topic_classifier_model')
-    loaded_model = TFDistilBertForSequenceClassification.from_pretrained('src/train_bert/topic_classifier_model')
     df_keys=pd.read_csv('src/train_bert/training_data/Keyword_Patterns.csv')
     if not user_input:

 import altair as alt
 import streamlit as st
 from typing import List, Optional
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
 from langchain_core.messages import AnyMessage, AIMessage,SystemMessage, HumanMessage,AIMessageChunk
 from streamlitui.constants import unsdg_countries
 from llm.llm_setup import ModelSelection
 import pandas as pd
     user_input=ui.load_streamlit_ui()
     LLM_Selection=ModelSelection(user_input)
     if user_input["GENAI_API_KEY"]:llm=LLM_Selection.setup_llm_model()
+    loaded_tokenizer = AutoTokenizer.from_pretrained('src/train_bert/topic_classifier_model')
+    loaded_model = AutoModelForSequenceClassification.from_pretrained('src/train_bert/topic_classifier_model')
     df_keys=pd.read_csv('src/train_bert/training_data/Keyword_Patterns.csv')
     if not user_input: