Spaces:
Running on L40S
Running on L40S
| import os | |
| import warnings | |
| from inference.interface import PenguinVLQwen3GradioInterface | |
| from inference.server import PenguinVLQwen3DirectClient | |
| from inference.server.direct_client import ensure_flash_attn_installed, preload_model | |
| warnings.filterwarnings( | |
| "ignore", | |
| message=r"`torch\.distributed\.reduce_op` is deprecated, please use `torch\.distributed\.ReduceOp` instead", | |
| category=FutureWarning, | |
| ) | |
| def main(): | |
| model_path = os.getenv("MODEL_PATH", "tencent/Penguin-VL-8B") | |
| ensure_flash_attn_installed() | |
| if os.getenv("PRELOAD_MODEL_ON_STARTUP", "1") == "1": | |
| try: | |
| preload_model(model_path) | |
| except Exception as exc: | |
| print(f"Startup model preload skipped: {exc}") | |
| model_client = PenguinVLQwen3DirectClient( | |
| model_path=model_path, | |
| ) | |
| interface = PenguinVLQwen3GradioInterface( | |
| model_client, | |
| example_dir=os.getenv("EXAMPLE_DIR", "./assets/inputs"), | |
| server_name=os.getenv("GRADIO_SERVER_NAME", "0.0.0.0"), | |
| server_port=int(os.getenv("PORT", "7860")), | |
| ) | |
| interface.launch() | |
| if __name__ == "__main__": | |
| main() | |