Spaces:
Runtime error
Runtime error
Update RockPaperScissor/services/LLM_service.py
Browse files
RockPaperScissor/services/LLM_service.py
CHANGED
|
@@ -1,21 +1,12 @@
|
|
|
|
|
| 1 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 2 |
import torch
|
| 3 |
from typing import List, Dict, Optional
|
| 4 |
import re
|
| 5 |
import os
|
| 6 |
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
import spaces
|
| 10 |
-
from spaces import GPU
|
| 11 |
-
print("[LLMService] spaces.GPU imported successfully")
|
| 12 |
-
SPACES_AVAILABLE = True
|
| 13 |
-
except ImportError:
|
| 14 |
-
print("[LLMService] spaces.GPU not available, using fallback")
|
| 15 |
-
SPACES_AVAILABLE = False
|
| 16 |
-
def GPU(f):
|
| 17 |
-
return f
|
| 18 |
-
spaces = None
|
| 19 |
|
| 20 |
# Global model variables for GPU functions
|
| 21 |
_model = None
|
|
@@ -91,6 +82,9 @@ def generate_text_with_gpu(prompt_text: str, max_tokens: int = 150):
|
|
| 91 |
print("[LLMService] GPU generation completed.")
|
| 92 |
return response
|
| 93 |
|
|
|
|
|
|
|
|
|
|
| 94 |
class LLMService:
|
| 95 |
def __init__(self):
|
| 96 |
# Use a reliable model that works well with Zero GPU
|
|
@@ -107,7 +101,7 @@ class LLMService:
|
|
| 107 |
# Model will be loaded on-demand in GPU context
|
| 108 |
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 109 |
print(f"[LLMService] Target device: {self.device}")
|
| 110 |
-
print(
|
| 111 |
|
| 112 |
def generate_with_model(self, prompt_text: str, max_tokens: int = 150):
|
| 113 |
"""Use the top-level GPU function"""
|
|
@@ -258,4 +252,5 @@ Your advice:
|
|
| 258 |
print(f"[LLMService] Error during cleanup: {e}")
|
| 259 |
|
| 260 |
# Create a singleton instance
|
| 261 |
-
llm_service_instance = LLMService()
|
|
|
|
|
|
| 1 |
+
import spaces # Import unconditionally at the top
|
| 2 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 3 |
import torch
|
| 4 |
from typing import List, Dict, Optional
|
| 5 |
import re
|
| 6 |
import os
|
| 7 |
|
| 8 |
+
print("[LLMService] Starting LLM service initialization...")
|
| 9 |
+
print("[LLMService] Spaces imported successfully")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
# Global model variables for GPU functions
|
| 12 |
_model = None
|
|
|
|
| 82 |
print("[LLMService] GPU generation completed.")
|
| 83 |
return response
|
| 84 |
|
| 85 |
+
# Print confirmation that GPU function is registered
|
| 86 |
+
print(f"[LLMService] GPU function registered: {generate_text_with_gpu.__name__}")
|
| 87 |
+
|
| 88 |
class LLMService:
|
| 89 |
def __init__(self):
|
| 90 |
# Use a reliable model that works well with Zero GPU
|
|
|
|
| 101 |
# Model will be loaded on-demand in GPU context
|
| 102 |
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 103 |
print(f"[LLMService] Target device: {self.device}")
|
| 104 |
+
print("[LLMService] Service initialization completed")
|
| 105 |
|
| 106 |
def generate_with_model(self, prompt_text: str, max_tokens: int = 150):
|
| 107 |
"""Use the top-level GPU function"""
|
|
|
|
| 252 |
print(f"[LLMService] Error during cleanup: {e}")
|
| 253 |
|
| 254 |
# Create a singleton instance
|
| 255 |
+
llm_service_instance = LLMService()
|
| 256 |
+
print("[LLMService] Service instance created successfully")
|