Penguin-VL-8B / app.py
lkeab's picture
Install flash-attn at startup with no-build-isolation
56ca6aa verified
import os
from inference.interface import PenguinVLQwen3GradioInterface
from inference.server import PenguinVLQwen3DirectClient
from inference.server.direct_client import ensure_flash_attn_installed
def main():
ensure_flash_attn_installed()
model_client = PenguinVLQwen3DirectClient(
model_path=os.getenv("MODEL_PATH", "tencent/Penguin-VL-8B"),
)
interface = PenguinVLQwen3GradioInterface(
model_client,
example_dir=os.getenv("EXAMPLE_DIR", "./assets/inputs"),
server_name=os.getenv("GRADIO_SERVER_NAME", "0.0.0.0"),
server_port=int(os.getenv("PORT", "7860")),
)
interface.launch()
if __name__ == "__main__":
main()