Dmitry Beresnev commited on
Commit
e80973f
·
1 Parent(s): 84bb7ea

fix dockerfile

Browse files
Files changed (1) hide show
  1. app.py +27 -9
app.py CHANGED
@@ -58,25 +58,43 @@ def start_llama_server(model_id: str) -> subprocess.Popen:
58
  ]
59
 
60
  print(f"Starting llama-server with model: {model_id}")
 
 
61
  process = subprocess.Popen(
62
  cmd,
63
  stdout=subprocess.PIPE,
64
- stderr=subprocess.PIPE,
65
- preexec_fn=os.setsid if os.name != 'nt' else None
 
 
66
  )
67
 
68
- # Wait for server to be ready
69
- max_retries = 60
70
  for i in range(max_retries):
 
 
 
 
 
 
 
71
  try:
72
- response = requests.get(f"{LLAMA_SERVER_URL}/health", timeout=1)
73
- if response.status_code == 200:
 
74
  print(f"llama-server ready after {i+1} seconds")
75
  return process
76
- except:
77
- time.sleep(1)
 
 
 
 
 
 
78
 
79
- raise RuntimeError("llama-server failed to start")
80
 
81
 
82
  def stop_llama_server():
 
58
  ]
59
 
60
  print(f"Starting llama-server with model: {model_id}")
61
+ print("This may take 2-3 minutes to download and load the model...")
62
+
63
  process = subprocess.Popen(
64
  cmd,
65
  stdout=subprocess.PIPE,
66
+ stderr=subprocess.STDOUT,
67
+ preexec_fn=os.setsid if os.name != 'nt' else None,
68
+ text=True,
69
+ bufsize=1
70
  )
71
 
72
+ # Wait for server to be ready (increased timeout for model download)
73
+ max_retries = 300 # 5 minutes
74
  for i in range(max_retries):
75
+ # Check if process died
76
+ if process.poll() is not None:
77
+ stdout, _ = process.communicate()
78
+ print(f"llama-server exited with code {process.returncode}")
79
+ print(f"Output: {stdout}")
80
+ raise RuntimeError("llama-server process died")
81
+
82
  try:
83
+ # Try root endpoint instead of /health
84
+ response = requests.get(f"{LLAMA_SERVER_URL}/", timeout=2)
85
+ if response.status_code in [200, 404]: # 404 is ok, means server is up
86
  print(f"llama-server ready after {i+1} seconds")
87
  return process
88
+ except requests.exceptions.ConnectionError:
89
+ # Server not ready yet
90
+ pass
91
+ except Exception as e:
92
+ # Other errors, keep waiting
93
+ pass
94
+
95
+ time.sleep(1)
96
 
97
+ raise RuntimeError("llama-server failed to start within 5 minutes")
98
 
99
 
100
  def stop_llama_server():