| from abc import ABC, abstractmethod | |
| from typing import Dict, Iterator, List, Optional, Tuple, Union | |
| import torch | |
| class EngineBase(ABC): | |
| """ | |
| Abstract base class for engine interfaces that support generation, weight updating, and memory control. | |
| This base class provides a unified API for both HTTP-based engines and engines. | |
| """ | |
| def generate( | |
| self, | |
| prompt: Optional[Union[List[str], str]] = None, | |
| sampling_params: Optional[Union[List[Dict], Dict]] = None, | |
| input_ids: Optional[Union[List[List[int]], List[int]]] = None, | |
| image_data: Optional[Union[List[str], str]] = None, | |
| return_logprob: Optional[Union[List[bool], bool]] = False, | |
| logprob_start_len: Optional[Union[List[int], int]] = None, | |
| top_logprobs_num: Optional[Union[List[int], int]] = None, | |
| token_ids_logprob: Optional[Union[List[List[int]], List[int]]] = None, | |
| lora_path: Optional[Union[List[Optional[str]], Optional[str]]] = None, | |
| custom_logit_processor: Optional[Union[List[str], str]] = None, | |
| return_hidden_states: Optional[bool] = None, | |
| stream: Optional[bool] = None, | |
| bootstrap_host: Optional[Union[List[str], str]] = None, | |
| bootstrap_port: Optional[Union[List[int], int]] = None, | |
| bootstrap_room: Optional[Union[List[int], int]] = None, | |
| data_parallel_rank: Optional[int] = None, | |
| ) -> Union[Dict, Iterator[Dict]]: | |
| """Generate outputs based on given inputs.""" | |
| pass | |
| def flush_cache(self): | |
| """Flush the cache of the engine.""" | |
| pass | |
| def update_weights_from_tensor( | |
| self, | |
| named_tensors: List[Tuple[str, torch.Tensor]], | |
| load_format: Optional[str] = None, | |
| flush_cache: bool = True, | |
| ): | |
| """Update model weights with in-memory tensor data.""" | |
| pass | |
| def load_lora_adapter(self, lora_name: str, lora_path: str): | |
| """Load a new LoRA adapter without re-launching the engine.""" | |
| pass | |
| def unload_lora_adapter(self, lora_name: str): | |
| """Unload a LoRA adapter without re-launching the engine.""" | |
| pass | |
| def release_memory_occupation(self): | |
| """Release GPU memory occupation temporarily.""" | |
| pass | |
| def resume_memory_occupation(self): | |
| """Resume GPU memory occupation which is previously released.""" | |
| pass | |
| def shutdown(self): | |
| """Shutdown the engine and clean up resources.""" | |
| pass | |
Xet Storage Details
- Size:
- 2.59 kB
- Xet hash:
- ad7bcaf162075503fc73e25d3f5dff69d78034fc45333e188b6de790955a94ab
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.