afr2903 commited on
Commit
35e47c9
·
1 Parent(s): d980649

[fix]: Fixes for ZeroGPU deployment

Browse files
Files changed (3) hide show
  1. app.py +4 -1
  2. requirements.txt +21 -19
  3. requirements_cpu.txt +21 -0
app.py CHANGED
@@ -12,6 +12,7 @@ import random
12
  from types import SimpleNamespace
13
 
14
  import gradio as gr
 
15
 
16
  # Add paths for imports
17
  sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
@@ -67,6 +68,7 @@ tasks = Tasks()
67
  # Lazy-loaded fine-tuned model
68
  _finetuned_llm = None
69
 
 
70
  def get_finetuned_model():
71
  """Lazy load the fine-tuned model using llama-cpp-python"""
72
  global _finetuned_llm
@@ -78,7 +80,7 @@ def get_finetuned_model():
78
  repo_id="diegohc/rbrgs-finetuning",
79
  filename="q4/unsloth.Q4_K_M.gguf",
80
  n_ctx=4096,
81
- n_gpu_layers=0, # CPU only for HF Spaces (set higher for GPU)
82
  verbose=False
83
  )
84
  print("Fine-tuned model loaded successfully!")
@@ -87,6 +89,7 @@ def get_finetuned_model():
87
  raise
88
  return _finetuned_llm
89
 
 
90
  def inference_finetuned(command: str) -> list:
91
  """
92
  Run inference on the fine-tuned model.
 
12
  from types import SimpleNamespace
13
 
14
  import gradio as gr
15
+ from spaces import GPU
16
 
17
  # Add paths for imports
18
  sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
 
68
  # Lazy-loaded fine-tuned model
69
  _finetuned_llm = None
70
 
71
+ @GPU(duration=60) # Reserve GPU for 60 seconds
72
  def get_finetuned_model():
73
  """Lazy load the fine-tuned model using llama-cpp-python"""
74
  global _finetuned_llm
 
80
  repo_id="diegohc/rbrgs-finetuning",
81
  filename="q4/unsloth.Q4_K_M.gguf",
82
  n_ctx=4096,
83
+ n_gpu_layers=-1, # Use all GPU layers with ZeroGPU
84
  verbose=False
85
  )
86
  print("Fine-tuned model loaded successfully!")
 
89
  raise
90
  return _finetuned_llm
91
 
92
+ @GPU(duration=30) # Reserve GPU for 30 seconds per inference
93
  def inference_finetuned(command: str) -> list:
94
  """
95
  Run inference on the fine-tuned model.
requirements.txt CHANGED
@@ -1,19 +1,21 @@
1
- # FRIDA Command Interpreter - HuggingFace Space Requirements
2
-
3
- # BAML for structured LLM outputs
4
- baml-py==0.89.0
5
-
6
- # Embeddings and vector storage (for grounding stage)
7
- chromadb>=1.0.0
8
- sentence-transformers>=4.0.0
9
-
10
- # Local fine-tuned model inference
11
- llama-cpp-python>=0.3.0
12
-
13
- # Utilities
14
- pydantic>=2.0.0
15
- python-dotenv>=1.0.0
16
- termcolor>=2.0.0
17
-
18
- # Note: gradio is pre-installed on HuggingFace Spaces
19
- # Required environment variable: OPENROUTER_API_KEY
 
 
 
1
+ # FRIDA Command Interpreter - HuggingFace Space Requirements (ZeroGPU Optimized)
2
+
3
+ # BAML for structured LLM outputs
4
+ baml-py==0.89.0
5
+
6
+ # Embeddings and vector storage (for grounding stage)
7
+ chromadb>=1.0.0
8
+ sentence-transformers>=4.0.0
9
+
10
+ # Local fine-tuned model inference - CUDA pre-built wheels for ZeroGPU
11
+ # This uses pre-built CUDA 12.4 wheels to avoid compilation
12
+ --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124
13
+ llama-cpp-python==0.3.16
14
+
15
+ # Utilities
16
+ pydantic>=2.0.0
17
+ python-dotenv>=1.0.0
18
+ termcolor>=2.0.0
19
+
20
+ # Note: gradio is pre-installed on HuggingFace Spaces
21
+ # Required environment variable: OPENROUTER_API_KEY (optional, for API-based models)
requirements_cpu.txt ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # FRIDA Command Interpreter - HuggingFace Space Requirements
2
+
3
+ # BAML for structured LLM outputs
4
+ baml-py==0.89.0
5
+
6
+ # Embeddings and vector storage (for grounding stage)
7
+ chromadb>=1.0.0
8
+ sentence-transformers>=4.0.0
9
+
10
+ # Local fine-tuned model inference
11
+ # Use pre-built wheels to avoid compilation timeout
12
+ --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu
13
+ llama-cpp-python==0.3.16
14
+
15
+ # Utilities
16
+ pydantic>=2.0.0
17
+ python-dotenv>=1.0.0
18
+ termcolor>=2.0.0
19
+
20
+ # Note: gradio is pre-installed on HuggingFace Spaces
21
+ # Required environment variable: OPENROUTER_API_KEY