Spaces:

TeamGenKI
/

LLMServer

Paused

App Files Files Community

AurelioAguirre commited on Nov 7, 2024

Commit

5de8cee

1 Parent(s): 10d4b3b

Fixing model download issue v9

Browse files

Files changed (3) hide show

Dockerfile +12 -18
test-client/__init__.py +0 -0
test-client/client.py +0 -103

Dockerfile CHANGED Viewed

@@ -24,38 +24,32 @@ COPY requirements.txt .
 # Install Python dependencies
 RUN pip install --no-cache-dir -r requirements.txt
-# Copy the rest of the application
-COPY . .
 # Create checkpoints directory with proper permissions
-RUN mkdir -p /app/checkpoints && \
-    chmod 777 /app/checkpoints
 # The token will be passed during build time
 ARG HF_TOKEN
 ENV HF_TOKEN=${HF_TOKEN}
-# Download both models using litgpt
-# Only proceed if HF_TOKEN is provided
 RUN if [ -n "$HF_TOKEN" ]; then \
-        python -c "from huggingface_hub import login; from litgpt.cli import download; login('${HF_TOKEN}'); \
-        download('meta-llama/Llama-2-3b-chat-hf', '/app/checkpoints'); \
-        download('mistralai/Mistral-7B-Instruct-v0.3', '/app/checkpoints')"; \
     else \
         echo "No Hugging Face token provided. Models will need to be downloaded separately."; \
     fi
 # Set environment variables
 ENV LLM_ENGINE_HOST=0.0.0.0
-ENV LLM_ENGINE_PORT=8001
-# Update MODEL_PATH for the new model
-ENV MODEL_PATH=/app/checkpoints/mistralai/Mistral-7B-Instruct-v0.3
-# Expose both ports:
-# 8001 for FastAPI
-# 7860 for Hugging Face Spaces
-EXPOSE 8001 7860
 # Command to run the application
 CMD ["python", "main/main.py"]

 # Install Python dependencies
 RUN pip install --no-cache-dir -r requirements.txt
 # Create checkpoints directory with proper permissions
+RUN mkdir -p /app/main/checkpoints && \
+    chmod 777 /app/main/checkpoints
 # The token will be passed during build time
 ARG HF_TOKEN
 ENV HF_TOKEN=${HF_TOKEN}
+# Download model using litgpt command line with correct checkpoint path
 RUN if [ -n "$HF_TOKEN" ]; then \
+        litgpt download mistralai/Mistral-7B-Instruct-v0.3 --access_token ${HF_TOKEN} --checkpoint_dir /app/main/checkpoints; \
     else \
         echo "No Hugging Face token provided. Models will need to be downloaded separately."; \
+        exit 1; \
     fi
+# Copy the rest of the application
+COPY . .
 # Set environment variables
 ENV LLM_ENGINE_HOST=0.0.0.0
+ENV LLM_ENGINE_PORT=7860
+ENV MODEL_PATH=/app/main/checkpoints/mistralai/Mistral-7B-Instruct-v0.3
+# Expose port 7860 for Hugging Face Spaces
+EXPOSE 7860
 # Command to run the application
 CMD ["python", "main/main.py"]

test-client/__init__.py DELETED Viewed

File without changes

test-client/client.py DELETED Viewed

@@ -1,103 +0,0 @@
-import logging
-import requests
-from typing import Optional, Dict, Any
-import json
-class LLMEngineClient:
-    def __init__(self, base_url: str, timeout: int = 10):
-        # Remove /api suffix and ensure proper formatting
-        self.base_url = base_url.rstrip('/')
-        self.timeout = timeout
-        self.logger = logging.getLogger(__name__)
-        # Set up logging
-        logging.basicConfig(
-            level=logging.INFO,
-            format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
-        )
-        self.logger.info(f"Initialized client with base URL: {self.base_url}")
-    def _make_request(self, method: str, endpoint: str, data: Optional[Dict] = None) -> Dict[str, Any]:
-        """Make HTTP request with detailed error handling"""
-        url = f"{self.base_url}/{endpoint.lstrip('/')}"
-        self.logger.info(f"Making {method} request to: {url}")
-        try:
-            headers = {
-                'Accept': 'application/json',
-                'Content-Type': 'application/json' if data else 'application/json'
-            }
-            response = requests.request(
-                method=method,
-                url=url,
-                json=data if data else None,
-                timeout=self.timeout,
-                headers=headers
-            )
-            # Log response details for debugging
-            self.logger.debug(f"Response status code: {response.status_code}")
-            self.logger.debug(f"Response headers: {response.headers}")
-            self.logger.debug(f"Response content: {response.text[:500]}")
-            # Check if the response is HTML
-            content_type = response.headers.get('content-type', '')
-            if 'text/html' in content_type:
-                self.logger.error(f"Received HTML response. URL might be incorrect or service might be down.")
-                self.logger.error(f"Attempted URL: {url}")
-                raise ValueError(f"Server returned HTML instead of JSON. Please check if the URL {url} is correct.")
-            response.raise_for_status()
-            return response.json()
-        except requests.exceptions.ConnectionError as e:
-            self.logger.error(f"Failed to connect to {url}: {str(e)}")
-            raise ConnectionError(f"Could not connect to LLM Engine at {url}. Is the service running?")
-        except requests.exceptions.Timeout as e:
-            self.logger.error(f"Request to {url} timed out after {self.timeout}s")
-            raise TimeoutError(f"Request timed out after {self.timeout} seconds")
-        except requests.exceptions.RequestException as e:
-            self.logger.error(f"Request failed: {str(e)}")
-            raise
-    def check_health(self) -> Dict[str, Any]:
-        """Check if the service is running and get health status"""
-        return self._make_request('GET', 'health')
-    def initialize_model(self, config: Dict[str, Any]) -> Dict[str, Any]:
-        """Initialize the model with given configuration"""
-        return self._make_request('POST', 'initialize', data=config)
-    def generate_text(self, request: Dict[str, Any]) -> Dict[str, Any]:
-        """Generate text using the initialized model"""
-        return self._make_request('POST', 'generate', data=request)
-def test_connection():
-    """Test the connection to the LLM Engine"""
-    # When running on Spaces, we need to use the gradio-provided URL
-    base_url = "https://teamgenki-llm-engine.hf.space"
-    client = LLMEngineClient(base_url)
-    try:
-        # Try each endpoint
-        client.logger.info("Testing root endpoint...")
-        root_response = client._make_request('GET', '')
-        client.logger.info(f"Root endpoint response: {root_response}")
-        client.logger.info("Testing health endpoint...")
-        health_status = client.check_health()
-        client.logger.info(f"Health endpoint response: {health_status}")
-        return True
-    except Exception as e:
-        client.logger.error(f"Connection test failed: {str(e)}")
-        return False
-if __name__ == "__main__":
-    test_connection()