Penguin-VL / app.py
lkeab's picture
Disable SSR and suppress spaces runtime warning
e8fc2cf verified
import os
import warnings
from inference.interface import PenguinVLQwen3GradioInterface
from inference.server import PenguinVLQwen3DirectClient
from inference.server.direct_client import ensure_flash_attn_installed, preload_model
warnings.filterwarnings(
"ignore",
message=r"`torch\.distributed\.reduce_op` is deprecated, please use `torch\.distributed\.ReduceOp` instead",
category=FutureWarning,
)
def main():
model_path = os.getenv("MODEL_PATH", "tencent/Penguin-VL-8B")
ensure_flash_attn_installed()
if os.getenv("PRELOAD_MODEL_ON_STARTUP", "1") == "1":
try:
preload_model(model_path)
except Exception as exc:
print(f"Startup model preload skipped: {exc}")
model_client = PenguinVLQwen3DirectClient(
model_path=model_path,
)
interface = PenguinVLQwen3GradioInterface(
model_client,
example_dir=os.getenv("EXAMPLE_DIR", "./assets/inputs"),
server_name=os.getenv("GRADIO_SERVER_NAME", "0.0.0.0"),
server_port=int(os.getenv("PORT", "7860")),
)
interface.launch()
if __name__ == "__main__":
main()