Spaces:
Sleeping
Sleeping
Samuel Thomas
commited on
Commit
·
17a32cd
1
Parent(s):
f2a058a
remove cuda
Browse files
tools.py
CHANGED
|
@@ -807,9 +807,9 @@ def call_llm_with_memory_management(state: AgentState, llm_model) -> AgentState:
|
|
| 807 |
new_state = state.copy() # Start with a copy of the input state
|
| 808 |
|
| 809 |
try:
|
| 810 |
-
if torch.cuda.is_available():
|
| 811 |
-
|
| 812 |
-
|
| 813 |
|
| 814 |
print(f"Invoking LLM with {len(messages_for_llm)} messages.")
|
| 815 |
# This is where you call your actual LLM
|
|
@@ -846,8 +846,8 @@ def call_llm_with_memory_management(state: AgentState, llm_model) -> AgentState:
|
|
| 846 |
print("🚨 CUDA OOM detected during LLM call! Implementing emergency cleanup...")
|
| 847 |
error_message_content = f"LLM failed due to Out of Memory: {str(e)}."
|
| 848 |
try:
|
| 849 |
-
if torch.cuda.is_available():
|
| 850 |
-
|
| 851 |
gc.collect()
|
| 852 |
except Exception as cleanup_e:
|
| 853 |
print(f"Emergency OOM cleanup failed: {cleanup_e}")
|
|
@@ -859,9 +859,10 @@ def call_llm_with_memory_management(state: AgentState, llm_model) -> AgentState:
|
|
| 859 |
new_state["done"] = True # Mark as done to prevent loops on LLM failure
|
| 860 |
finally:
|
| 861 |
try:
|
| 862 |
-
|
| 863 |
-
|
| 864 |
-
|
|
|
|
| 865 |
except Exception:
|
| 866 |
pass # Avoid error in cleanup hiding the main error
|
| 867 |
|
|
@@ -1289,9 +1290,9 @@ def call_tool_with_memory_management(state: AgentState) -> AgentState:
|
|
| 1289 |
# Clear CUDA cache before processing
|
| 1290 |
try:
|
| 1291 |
import torch
|
| 1292 |
-
if torch.cuda.is_available():
|
| 1293 |
-
|
| 1294 |
-
|
| 1295 |
except:
|
| 1296 |
pass
|
| 1297 |
|
|
@@ -1374,8 +1375,8 @@ def call_tool_with_memory_management(state: AgentState) -> AgentState:
|
|
| 1374 |
# Clear CUDA cache after processing
|
| 1375 |
try:
|
| 1376 |
import torch
|
| 1377 |
-
if torch.cuda.is_available():
|
| 1378 |
-
|
| 1379 |
except:
|
| 1380 |
pass
|
| 1381 |
|
|
@@ -1671,8 +1672,8 @@ def run_agent(myagent, state: AgentState):
|
|
| 1671 |
#del hf_pipe
|
| 1672 |
#del model_vqa
|
| 1673 |
#del processor_vqa
|
| 1674 |
-
torch.cuda.empty_cache()
|
| 1675 |
-
torch.cuda.ipc_collect()
|
| 1676 |
gc.collect()
|
| 1677 |
print("Released GPU memory after FINAL ANSWER.")
|
| 1678 |
# Re-initialize for the next run
|
|
|
|
| 807 |
new_state = state.copy() # Start with a copy of the input state
|
| 808 |
|
| 809 |
try:
|
| 810 |
+
#if torch.cuda.is_available():
|
| 811 |
+
# torch.cuda.empty_cache()
|
| 812 |
+
# print(f"🧹 Pre-LLM CUDA cache cleared. Memory: {torch.cuda.memory_allocated()/1024**2:.1f}MB")
|
| 813 |
|
| 814 |
print(f"Invoking LLM with {len(messages_for_llm)} messages.")
|
| 815 |
# This is where you call your actual LLM
|
|
|
|
| 846 |
print("🚨 CUDA OOM detected during LLM call! Implementing emergency cleanup...")
|
| 847 |
error_message_content = f"LLM failed due to Out of Memory: {str(e)}."
|
| 848 |
try:
|
| 849 |
+
#if torch.cuda.is_available():
|
| 850 |
+
# torch.cuda.empty_cache()
|
| 851 |
gc.collect()
|
| 852 |
except Exception as cleanup_e:
|
| 853 |
print(f"Emergency OOM cleanup failed: {cleanup_e}")
|
|
|
|
| 859 |
new_state["done"] = True # Mark as done to prevent loops on LLM failure
|
| 860 |
finally:
|
| 861 |
try:
|
| 862 |
+
pass
|
| 863 |
+
#if torch.cuda.is_available():
|
| 864 |
+
# torch.cuda.empty_cache()
|
| 865 |
+
# print(f"🧹 Post-LLM CUDA cache cleared. Memory: {torch.cuda.memory_allocated()/1024**2:.1f}MB")
|
| 866 |
except Exception:
|
| 867 |
pass # Avoid error in cleanup hiding the main error
|
| 868 |
|
|
|
|
| 1290 |
# Clear CUDA cache before processing
|
| 1291 |
try:
|
| 1292 |
import torch
|
| 1293 |
+
#if torch.cuda.is_available():
|
| 1294 |
+
# torch.cuda.empty_cache()
|
| 1295 |
+
# print(f"🧹 Cleared CUDA cache. Memory: {torch.cuda.memory_allocated()/1024**2:.1f}MB")
|
| 1296 |
except:
|
| 1297 |
pass
|
| 1298 |
|
|
|
|
| 1375 |
# Clear CUDA cache after processing
|
| 1376 |
try:
|
| 1377 |
import torch
|
| 1378 |
+
#if torch.cuda.is_available():
|
| 1379 |
+
# torch.cuda.empty_cache()
|
| 1380 |
except:
|
| 1381 |
pass
|
| 1382 |
|
|
|
|
| 1672 |
#del hf_pipe
|
| 1673 |
#del model_vqa
|
| 1674 |
#del processor_vqa
|
| 1675 |
+
#torch.cuda.empty_cache()
|
| 1676 |
+
#torch.cuda.ipc_collect()
|
| 1677 |
gc.collect()
|
| 1678 |
print("Released GPU memory after FINAL ANSWER.")
|
| 1679 |
# Re-initialize for the next run
|