Spaces:

TeamGenKI
/

LLMServer

Paused

App Files Files Community

AurelioAguirre commited on Nov 7, 2024

Commit

10d4b3b

1 Parent(s): 840a4e4

Connection with api. Added test-client

Browse files

Files changed (3) hide show

main/main.py +30 -25
test-client/__init__.py +0 -0
test-client/client.py +103 -0

main/main.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 from typing import Optional, Union
 import torch
@@ -13,15 +14,21 @@ from huggingface_hub.hf_api import HfApi
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-# Initialize FastAPI with root path for Spaces
 app = FastAPI(
     title="LLM Engine Service",
-    # This is crucial for Hugging Face Spaces
-    root_path="/",
-    # Add OpenAPI configs
-    openapi_url="/api/openapi.json",
-    docs_url="/api/docs",
-    redoc_url="/api/redoc"
 )
 # Global variable to store the LLM instance
@@ -49,14 +56,13 @@ class GenerateRequest(BaseModel):
 @app.get("/")
 async def root():
     """Root endpoint to verify service is running"""
-    space_url = "https://teamgenki-llm-engine.hf.space"
     return {
         "status": "running",
         "service": "LLM Engine",
         "endpoints": {
-            "initialize": f"{space_url}/initialize",
-            "generate": f"{space_url}/generate",
-            "health": f"{space_url}/health"
         }
     }
@@ -132,9 +138,6 @@ async def generate(request: GenerateRequest):
     try:
         if request.stream:
-            # For streaming responses, we need to handle differently
-            # This is a placeholder as the actual streaming implementation
-            # would need to use StreamingResponse from FastAPI
             raise HTTPException(
                 status_code=400,
                 detail="Streaming is not currently supported through the API"
@@ -190,23 +193,25 @@ async def health_check():
 def main():
     # Load environment variables or configuration here
     host = os.getenv("LLM_ENGINE_HOST", "0.0.0.0")
-    port = int(os.getenv("LLM_ENGINE_PORT", "7860"))  # Changed to 7860 for Spaces
-    # Log the service URLs
-    space_url = "https://teamgenki-llm-engine.hf.space"
-    logger.info(f"Service will be available at: {space_url}")
-    logger.info(f"API endpoints:")
-    logger.info(f"  Initialize: {space_url}/initialize")
-    logger.info(f"  Generate: {space_url}/generate")
-    logger.info(f"  Health: {space_url}/health")
     # Start the server
     uvicorn.run(
         app,
         host=host,
         port=port,
-        log_level="info",
-        reload=False
     )
 if __name__ == "__main__":

 from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
 from typing import Optional, Union
 import torch
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+# Initialize FastAPI with simplified configuration
 app = FastAPI(
     title="LLM Engine Service",
+    docs_url="/docs",
+    redoc_url="/redoc",
+    openapi_url="/openapi.json"
+)
+# Add CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
 )
 # Global variable to store the LLM instance
 @app.get("/")
 async def root():
     """Root endpoint to verify service is running"""
     return {
         "status": "running",
         "service": "LLM Engine",
         "endpoints": {
+            "initialize": "/initialize",
+            "generate": "/generate",
+            "health": "/health"
         }
     }
     try:
         if request.stream:
             raise HTTPException(
                 status_code=400,
                 detail="Streaming is not currently supported through the API"
 def main():
     # Load environment variables or configuration here
     host = os.getenv("LLM_ENGINE_HOST", "0.0.0.0")
+    port = int(os.getenv("LLM_ENGINE_PORT", "7860"))  # Default to 7860 for Spaces
+    # Log startup information
+    logger.info(f"Starting LLM Engine service on {host}:{port}")
+    logger.info("Available endpoints:")
+    logger.info("  - /")
+    logger.info("  - /health")
+    logger.info("  - /initialize")
+    logger.info("  - /generate")
+    logger.info("  - /docs")
+    logger.info("  - /redoc")
+    logger.info("  - /openapi.json")
     # Start the server
     uvicorn.run(
         app,
         host=host,
         port=port,
+        log_level="info"
     )
 if __name__ == "__main__":

test-client/__init__.py ADDED Viewed

File without changes

test-client/client.py ADDED Viewed

	@@ -0,0 +1,103 @@

+import logging
+import requests
+from typing import Optional, Dict, Any
+import json
+class LLMEngineClient:
+    def __init__(self, base_url: str, timeout: int = 10):
+        # Remove /api suffix and ensure proper formatting
+        self.base_url = base_url.rstrip('/')
+        self.timeout = timeout
+        self.logger = logging.getLogger(__name__)
+        # Set up logging
+        logging.basicConfig(
+            level=logging.INFO,
+            format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+        )
+        self.logger.info(f"Initialized client with base URL: {self.base_url}")
+    def _make_request(self, method: str, endpoint: str, data: Optional[Dict] = None) -> Dict[str, Any]:
+        """Make HTTP request with detailed error handling"""
+        url = f"{self.base_url}/{endpoint.lstrip('/')}"
+        self.logger.info(f"Making {method} request to: {url}")
+        try:
+            headers = {
+                'Accept': 'application/json',
+                'Content-Type': 'application/json' if data else 'application/json'
+            }
+            response = requests.request(
+                method=method,
+                url=url,
+                json=data if data else None,
+                timeout=self.timeout,
+                headers=headers
+            )
+            # Log response details for debugging
+            self.logger.debug(f"Response status code: {response.status_code}")
+            self.logger.debug(f"Response headers: {response.headers}")
+            self.logger.debug(f"Response content: {response.text[:500]}")
+            # Check if the response is HTML
+            content_type = response.headers.get('content-type', '')
+            if 'text/html' in content_type:
+                self.logger.error(f"Received HTML response. URL might be incorrect or service might be down.")
+                self.logger.error(f"Attempted URL: {url}")
+                raise ValueError(f"Server returned HTML instead of JSON. Please check if the URL {url} is correct.")
+            response.raise_for_status()
+            return response.json()
+        except requests.exceptions.ConnectionError as e:
+            self.logger.error(f"Failed to connect to {url}: {str(e)}")
+            raise ConnectionError(f"Could not connect to LLM Engine at {url}. Is the service running?")
+        except requests.exceptions.Timeout as e:
+            self.logger.error(f"Request to {url} timed out after {self.timeout}s")
+            raise TimeoutError(f"Request timed out after {self.timeout} seconds")
+        except requests.exceptions.RequestException as e:
+            self.logger.error(f"Request failed: {str(e)}")
+            raise
+    def check_health(self) -> Dict[str, Any]:
+        """Check if the service is running and get health status"""
+        return self._make_request('GET', 'health')
+    def initialize_model(self, config: Dict[str, Any]) -> Dict[str, Any]:
+        """Initialize the model with given configuration"""
+        return self._make_request('POST', 'initialize', data=config)
+    def generate_text(self, request: Dict[str, Any]) -> Dict[str, Any]:
+        """Generate text using the initialized model"""
+        return self._make_request('POST', 'generate', data=request)
+def test_connection():
+    """Test the connection to the LLM Engine"""
+    # When running on Spaces, we need to use the gradio-provided URL
+    base_url = "https://teamgenki-llm-engine.hf.space"
+    client = LLMEngineClient(base_url)
+    try:
+        # Try each endpoint
+        client.logger.info("Testing root endpoint...")
+        root_response = client._make_request('GET', '')
+        client.logger.info(f"Root endpoint response: {root_response}")
+        client.logger.info("Testing health endpoint...")
+        health_status = client.check_health()
+        client.logger.info(f"Health endpoint response: {health_status}")
+        return True
+    except Exception as e:
+        client.logger.error(f"Connection test failed: {str(e)}")
+        return False
+if __name__ == "__main__":
+    test_connection()