Spaces:

TeamGenKI
/

LLMServer

Paused

App Files Files Community

AurelioAguirre commited on Dec 6, 2024

Commit

cbb7bf7

1 Parent(s): 142c3be

This should work then

Browse files

Files changed (4) hide show

Dockerfile +3 -3
main/app.py +3 -107
main/test_locally.py +56 -0
main/utils/validation.py +34 -1

Dockerfile CHANGED Viewed

@@ -1,4 +1,4 @@
-FROM python:3.9
 RUN useradd -m -u 1000 user
 USER user
@@ -14,5 +14,5 @@ COPY --chown=user main/ /app/main
 EXPOSE 7860
-# Use same command as working version
-CMD ["python", "-m", "main.app"]

+FROM python:3.10
 RUN useradd -m -u 1000 user
 USER user
 EXPOSE 7860
+# We run the app object in the app.py file in the main folder.
+CMD ["uvicorn", "main.app:app", "--host", "0.0.0.0", "--port", "7860"]

main/app.py CHANGED Viewed

@@ -2,44 +2,11 @@ import yaml
 import sys
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
-import uvicorn
-from .api import LLMApi
 from .routes import router, init_router
 from .utils.logging import setup_logger
-from huggingface_hub import login
-from pathlib import Path
-from dotenv import load_dotenv
-import os
-def validate_hf():
-    """
-    Validate Hugging Face authentication.
-    Checks for .env file, loads environment variables, and attempts HF login if token exists.
-    """
-    logger = setup_logger(config, "hf_validation")
-    # Check for .env file
-    env_path = Path('.env')
-    if env_path.exists():
-        logger.info("Found .env file, loading environment variables")
-        load_dotenv()
-    else:
-        logger.warning("No .env file found. Fine if you're on Huggingface, but you need one to run locally on your PC.")
-    # Check for HF token
-    hf_token = os.getenv('HF_TOKEN')
-    if not hf_token:
-        logger.error("No HF_TOKEN found in environment variables")
-        return False
-    try:
-        # Attempt login
-        login(token=hf_token)
-        logger.info("Successfully authenticated with Hugging Face")
-        return True
-    except Exception as e:
-        logger.error(f"Failed to authenticate with Hugging Face: {str(e)}")
-        return False
 def load_config():
     """Load configuration from yaml file"""
@@ -49,6 +16,7 @@ def load_config():
 def create_app():
     config = load_config()
     logger = setup_logger(config, "main")
     logger.info("Starting LLM API server")
     app = FastAPI(
@@ -74,76 +42,4 @@ def create_app():
     logger.info("FastAPI application created successfully")
     return app
-def test_locally():
-    """Run local tests for development and debugging"""
-    config = load_config()
-    logger = setup_logger(config, "test")
-    logger.info("Starting local tests")
-    api = LLMApi(config)
-    model_name = config["model"]["defaults"]["model_name"]
-    logger.info(f"Testing with model: {model_name}")
-    # Test download
-    logger.info("Testing model download...")
-    api.download_model(model_name)
-    logger.info("Download complete")
-    # Test initialization
-    logger.info("Initializing model...")
-    api.initialize_model(model_name)
-    logger.info("Model initialized")
-    # Test embedding
-    test_text = "Dette er en test av embeddings generering fra en teknisk tekst om HMS rutiner på arbeidsplassen."
-    logger.info("Testing embedding generation...")
-    embedding = api.generate_embedding(test_text)
-    logger.info(f"Generated embedding of length: {len(embedding)}")
-    logger.info(f"First few values: {embedding[:5]}")
-    # Test generation
-    test_prompts = [
-        "Tell me what happens in a nuclear reactor.",
-    ]
-    # Test regular generation
-    logger.info("Testing regular generation:")
-    for prompt in test_prompts:
-        logger.info(f"Prompt: {prompt}")
-        response = api.generate_response(
-            prompt=prompt,
-            system_message="You are a helpful assistant."
-        )
-        logger.info(f"Response: {response}")
-    # Test streaming generation
-    logger.info("Testing streaming generation:")
-    logger.info(f"Prompt: {test_prompts[0]}")
-    for chunk in api.generate_stream(
-            prompt=test_prompts[0],
-            system_message="You are a helpful assistant."
-    ):
-        print(chunk, end="", flush=True)
-    print("\n")
-    logger.info("Local tests completed")
-app = create_app()
-if __name__ == "__main__":
-    config = load_config()
-    #validate_hf()
-    if len(sys.argv) > 1 and sys.argv[1] == "test":
-        test_locally()
-    else:
-        uvicorn.run(
-            "main.app:app",
-            host=config["server"]["host"],
-            port=config["server"]["port"],
-            log_level="trace",
-            reload=True,
-            workers=1,
-            access_log=False,
-            use_colors=True
-        )

 import sys
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from .routes import router, init_router
 from .utils.logging import setup_logger
+from .utils.validation import validate_hf
 def load_config():
     """Load configuration from yaml file"""
 def create_app():
     config = load_config()
     logger = setup_logger(config, "main")
+    validate_hf(setup_logger, config)
     logger.info("Starting LLM API server")
     app = FastAPI(
     logger.info("FastAPI application created successfully")
     return app
+app = create_app()

main/test_locally.py ADDED Viewed

	@@ -0,0 +1,56 @@

+def test_locally(load_config, setup_logger, LLMApi):
+    """Run local tests for development and debugging"""
+    config = load_config()
+    logger = setup_logger(config, "test")
+    logger.info("Starting local tests")
+    api = LLMApi(config)
+    model_name = config["model"]["defaults"]["model_name"]
+    logger.info(f"Testing with model: {model_name}")
+    # Test download
+    logger.info("Testing model download...")
+    api.download_model(model_name)
+    logger.info("Download complete")
+    # Test initialization
+    logger.info("Initializing model...")
+    api.initialize_model(model_name)
+    logger.info("Model initialized")
+    # Test embedding
+    test_text = "Dette er en test av embeddings generering fra en teknisk tekst om HMS rutiner på arbeidsplassen."
+    logger.info("Testing embedding generation...")
+    embedding = api.generate_embedding(test_text)
+    logger.info(f"Generated embedding of length: {len(embedding)}")
+    logger.info(f"First few values: {embedding[:5]}")
+    # Test generation
+    test_prompts = [
+        "Tell me what happens in a nuclear reactor.",
+    ]
+    # Test regular generation
+    logger.info("Testing regular generation:")
+    for prompt in test_prompts:
+        logger.info(f"Prompt: {prompt}")
+        response = api.generate_response(
+            prompt=prompt,
+            system_message="You are a helpful assistant."
+        )
+        logger.info(f"Response: {response}")
+    # Test streaming generation
+    logger.info("Testing streaming generation:")
+    logger.info(f"Prompt: {test_prompts[0]}")
+    for chunk in api.generate_stream(
+            prompt=test_prompts[0],
+            system_message="You are a helpful assistant."
+    ):
+        print(chunk, end="", flush=True)
+    print("\n")
+    logger.info("Local tests completed")

main/utils/validation.py CHANGED Viewed

@@ -1,5 +1,8 @@
 from typing import Dict, Any
 from pathlib import Path
 def validate_model_path(model_path: Path) -> bool:
     """Validate that a model path exists and contains necessary files"""
@@ -20,4 +23,34 @@ def validate_generation_params(params: Dict[str, Any]) -> Dict[str, Any]:
     if 'max_new_tokens' in validated:
         validated['max_new_tokens'] = max(1, min(4096, validated['max_new_tokens']))
-    return validated

 from typing import Dict, Any
 from pathlib import Path
+from dotenv import load_dotenv
+from huggingface_hub import login
+import os
 def validate_model_path(model_path: Path) -> bool:
     """Validate that a model path exists and contains necessary files"""
     if 'max_new_tokens' in validated:
         validated['max_new_tokens'] = max(1, min(4096, validated['max_new_tokens']))
+    return validated
+def validate_hf(setup_logger, config):
+    """
+    Validate Hugging Face authentication.
+    Checks for .env file, loads environment variables, and attempts HF login if token exists.
+    """
+    logger = setup_logger(config, "hf_validation")
+    # Check for .env file
+    env_path = Path('.env')
+    if env_path.exists():
+        logger.info("Found .env file, loading environment variables")
+        load_dotenv()
+    else:
+        logger.warning("No .env file found. Fine if you're on Huggingface, but you need one to run locally on your PC.")
+    # Check for HF token
+    hf_token = os.getenv('HF_TOKEN')
+    if not hf_token:
+        logger.error("No HF_TOKEN found in environment variables")
+        return False
+    try:
+        # Attempt login
+        login(token=hf_token)
+        logger.info("Successfully authenticated with Hugging Face")
+        return True
+    except Exception as e:
+        logger.error(f"Failed to authenticate with Hugging Face: {str(e)}")
+        return False