TinyLlama-API

Running

Drag2121 commited on Sep 17, 2024

Commit

f98b266

1 Parent(s): 4e963c4

gemma

Files changed (2) hide show

app.py CHANGED Viewed

@@ -12,7 +12,7 @@ logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 app = FastAPI()
-MODEL_NAME = 'phi3:mini'
 @lru_cache()
 def get_llm():

 logger = logging.getLogger(__name__)
 app = FastAPI()
+MODEL_NAME = 'gemma2:2b'
 @lru_cache()
 def get_llm():

start.sh CHANGED Viewed

@@ -9,8 +9,8 @@ export CUDA_VISIBLE_DEVICES=0  # Use the first GPU if available
 ollama serve &  # Use GPU 0 if available
 # Pull the model if not already present
-if ! ollama list | grep -q "phi3:mini"; then
-    ollama pull phi3:mini
 fi
 # Wait for Ollama to start up (use a more robust check)

 ollama serve &  # Use GPU 0 if available
 # Pull the model if not already present
+if ! ollama list | grep -q "gemma2:2b"; then
+    ollama pull gemma2:2b
 fi
 # Wait for Ollama to start up (use a more robust check)