Neuromind / hybrid_model.py

Upload 27 files

b0c3c60 verified 14 days ago

2.61 kB

from dataclasses import dataclass from typing import Optional import logging import torch from cpufeature import CPUFeature from petals.constants import PUBLIC_INITIAL_PEERS # Configure logging logging.basicConfig( level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler("app.log"), logging.StreamHandler() ] ) logger = logging.getLogger(__name__) @dataclass class ModelInfo: repo: str adapter: Optional[str] = None MODELS = [ ModelInfo(repo="meta-llama/Llama-2-70b-chat-hf"), ModelInfo(repo="stabilityai/StableBeluga2"), ModelInfo(repo="enoch/llama-65b-hf"), ModelInfo(repo="enoch/llama-65b-hf", adapter="timdettmers/guanaco-65b"), ModelInfo(repo="bigscience/bloomz"), # Add more models here ModelInfo(repo="roda-1"), ModelInfo(repo="kubu-hai.model.h5-2", adapter="kubu-hai.model.mat-2"), ] DEFAULT_MODEL_NAME = "enoch/llama-65b-hf" INITIAL_PEERS = PUBLIC_INITIAL_PEERS # Set this to a list of multiaddrs to connect to a private swarm instead of the public one, for example: # INITIAL_PEERS = ['/ip4/10.1.2.3/tcp/31234/p2p/QmcXhze98AcgGQDDYna23s4Jho96n8wkwLJv78vxtFNq44'] DEVICE = "cuda" if torch.cuda.is_available() else "cpu" if DEVICE == "cuda": TORCH_DTYPE = "auto" elif CPUFeature["AVX512f"] and CPUFeature["OS_AVX512"]: TORCH_DTYPE = torch.bfloat16 else: TORCH_DTYPE = torch.float32 # You can use bfloat16 in this case too, but it will be slow STEP_TIMEOUT = 10 * 60 # Changed from 5 minutes to 10 minutes MAX_SESSIONS = 50 # Has effect only for API v1 (HTTP-based) logger.info("Configuration setup complete.") # Example preprocess and postprocess functions def preprocess(data): logger.debug("Preprocessing data") # Add your preprocessing steps here return data def postprocess(data): logger.debug("Postprocessing data") # Add your postprocessing steps here return data # Example model class class MyModel(torch.nn.Module): def __init__(self): super(MyModel, self).__init__() # Define your model layers here def forward(self, x): # Define the forward pass return x # Initialize model model = MyModel().to(DEVICE) # Example hybrid function def hybrid_function(data): # Preprocessing on CPU data_cpu = data.to("cpu") preprocessed_data = preprocess(data_cpu) # Move data to GPU for inference if available preprocessed_data = preprocessed_data.to(DEVICE) output = model(preprocessed_data) # Postprocessing on CPU output_cpu = output.to("cpu") result = postprocess(output_cpu) return result # Example usage data = torch.randn(100, 10).to(DEVICE) # Example data result = hybrid_function(data) logger.info("Processing complete.")