Spaces:

ResearchEngineering
/

AGI

Running

Dmitry Beresnev commited on Dec 3, 2025

Commit

e80973f

1 Parent(s): 84bb7ea

fix dockerfile

Files changed (1) hide show

app.py CHANGED Viewed

@@ -58,25 +58,43 @@ def start_llama_server(model_id: str) -> subprocess.Popen:
     ]
     print(f"Starting llama-server with model: {model_id}")
     process = subprocess.Popen(
         cmd,
         stdout=subprocess.PIPE,
-        stderr=subprocess.PIPE,
-        preexec_fn=os.setsid if os.name != 'nt' else None
     )
-    # Wait for server to be ready
-    max_retries = 60
     for i in range(max_retries):
         try:
-            response = requests.get(f"{LLAMA_SERVER_URL}/health", timeout=1)
-            if response.status_code == 200:
                 print(f"llama-server ready after {i+1} seconds")
                 return process
-        except:
-            time.sleep(1)
-    raise RuntimeError("llama-server failed to start")
 def stop_llama_server():

     ]
     print(f"Starting llama-server with model: {model_id}")
+    print("This may take 2-3 minutes to download and load the model...")
     process = subprocess.Popen(
         cmd,
         stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        preexec_fn=os.setsid if os.name != 'nt' else None,
+        text=True,
+        bufsize=1
     )
+    # Wait for server to be ready (increased timeout for model download)
+    max_retries = 300  # 5 minutes
     for i in range(max_retries):
+        # Check if process died
+        if process.poll() is not None:
+            stdout, _ = process.communicate()
+            print(f"llama-server exited with code {process.returncode}")
+            print(f"Output: {stdout}")
+            raise RuntimeError("llama-server process died")
         try:
+            # Try root endpoint instead of /health
+            response = requests.get(f"{LLAMA_SERVER_URL}/", timeout=2)
+            if response.status_code in [200, 404]:  # 404 is ok, means server is up
                 print(f"llama-server ready after {i+1} seconds")
                 return process
+        except requests.exceptions.ConnectionError:
+            # Server not ready yet
+            pass
+        except Exception as e:
+            # Other errors, keep waiting
+            pass
+        time.sleep(1)
+    raise RuntimeError("llama-server failed to start within 5 minutes")
 def stop_llama_server():