StarCoder2-3B / main.py
AjinkyaPagare's picture
fix: python3.10, disable flash-attn, reduce memory for HF Spaces CPU
c07b229
import os
import sys
import subprocess
import uvicorn
# Dynamically import config
try:
from app.config import MODEL_PATH, FASTAPI_PORT, LLAMA_PORT, LLAMA_SERVER_PATH, MODEL_NAME
except ImportError:
print("Error: Could not import app.config. Ensure you are running run.py from its own folder.")
sys.exit(1)
def check_setup():
"""Verifies that model file and llama-server.exe binary exist before running."""
model_exists = os.path.exists(MODEL_PATH)
bin_exists = os.path.exists(LLAMA_SERVER_PATH)
if not model_exists or not bin_exists:
print("=======================================================")
print("WARNING: Required files are missing!")
if not model_exists:
print(f"- Model file not found at: {MODEL_PATH}")
if not bin_exists:
print(f"- llama-server.exe binary not found at: {LLAMA_SERVER_PATH}")
print("\nRunning setup.py automatically to download the optimized model and llama.cpp binaries...")
print("=======================================================\n")
# Run setup.py
subprocess.run([sys.executable, "setup.py"], check=True)
if __name__ == "__main__":
# Check if files exist
check_setup()
# Launch FastAPI Server
print("\n=======================================================")
print(f"Starting IDE Code Completion Engine...")
print(f"Model: {MODEL_NAME}")
print(f"FastAPI Host: http://127.0.0.1:{FASTAPI_PORT}")
print(f"Swagger UI Docs: http://127.0.0.1:{FASTAPI_PORT}/docs")
print(f"llama-server Backend Port: {LLAMA_PORT}")
print("=======================================================\n")
try:
# Run FastAPI app
host = os.environ.get("FASTAPI_HOST", "0.0.0.0")
uvicorn.run("app.main:app", host=host, port=FASTAPI_PORT, reload=False)
except KeyboardInterrupt:
print("\nShutdown signal received. Terminating servers gracefully...")
sys.exit(0)