Spaces:

benjamin-paine
/

nvidia-cosmos-prompt-upsampler

Runtime error

benjamin-paine commited on Mar 1, 2025

Commit

badc904

verified ·

1 Parent(s): cf0849b

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,22 +2,33 @@ import gradio as gr
 import spaces
 import torch
 import torch.amp as amp
-from transformers import pipeline
-repo_id = "appmana/Cosmos-1.0-Prompt-Upsampler-12B-Text2World-hf"
-upsampler = pipeline("text-generation", repo_id, torch_dtype=torch.bfloat16)
 @spaces.GPU
 def upsample(prompt):
-    return upsampler(
-        [{"role": "user", "content": f"Upsample the short caption to a long caption: {prompt}"}],
-        do_sample=False,
-        max_new_tokens=512
-    )[0]["generated_text"][-1]["content"]
 demo = gr.Interface(
     title="NVIDIA Cosmos 🌌 Prompt Upsampler",
-    description="""Upsample prompts using NVIDIA's 12B Cosmos model, based on Mistral NeMo 12B. This space uses the HuggingFace Transformers version at bfloat16 precision.
     [[cosmos]](https://huggingface.co/nvidia/Cosmos-1.0-Prompt-Upsampler-12B-Text2World) [[transformers]](https://huggingface.co/appmana/Cosmos-1.0-Prompt-Upsampler-12B-Text2World-hf) [[gguf]](https://huggingface.co/mradermacher/Cosmos-1.0-Prompt-Upsampler-12B-Text2World-hf-GGUF)""",
     fn=upsample,

 import spaces
 import torch
 import torch.amp as amp
+from huggingface_hub import hf_hub_download
+from llama_cpp import Llama
+model = hf_hub_download(
+    "mradermacher/Cosmos-1.0-Prompt-Upsampler-12B-Text2World-hf-GGUF",
+    filename="Cosmos-1.0-Prompt-Upsampler-12B-Text2World-hf.Q8_0.gguf",
+)
+llama = Llama(
+    model,
+    n_gpu_layers=-1,
+    n_ctx=2048,
+    verbose=False
+)
 @spaces.GPU
 def upsample(prompt):
+    completion = llama.create_chat_completion(
+        messages=[{"role": "user", "content": f"Upsample the short caption to a long caption: {prompt}"}],
+        max_tokens=512,
+    )
+    response = completion["choices"][0]["message"]["content"]
+    return response
 demo = gr.Interface(
     title="NVIDIA Cosmos 🌌 Prompt Upsampler",
+    description="""Upsample prompts using NVIDIA's 12B Cosmos model, based on Mistral NeMo 12B. This space uses llama.cpp with the Q8-0 quantized GGUF checkpoint.
     [[cosmos]](https://huggingface.co/nvidia/Cosmos-1.0-Prompt-Upsampler-12B-Text2World) [[transformers]](https://huggingface.co/appmana/Cosmos-1.0-Prompt-Upsampler-12B-Text2World-hf) [[gguf]](https://huggingface.co/mradermacher/Cosmos-1.0-Prompt-Upsampler-12B-Text2World-hf-GGUF)""",
     fn=upsample,