Spaces:
Running on Zero
Running on Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -25,7 +25,6 @@ from models_data import MODELS, FAMILIES, get_models_by_family
|
|
| 25 |
_HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 26 |
if _HF_TOKEN:
|
| 27 |
from huggingface_hub import login
|
| 28 |
-
# Se agrega add_to_git_credential=False para evitar advertencias de sobreescritura de token
|
| 29 |
login(token=_HF_TOKEN, add_to_git_credential=False)
|
| 30 |
|
| 31 |
try:
|
|
@@ -673,7 +672,7 @@ _ZEROGPU_NOTICE = """
|
|
| 673 |
<div class="zgn-title">⚡ ZeroGPU Latency</div>
|
| 674 |
GPU allocation happens on every message in this serverless Space — expect a brief wait before the first token.
|
| 675 |
<div class="zgn-divider"></div>
|
| 676 |
-
<div class="zgn-title">
|
| 677 |
Due to ZeroGPU constraints, each message is processed independently.
|
| 678 |
The model has <strong>no conversation history</strong> — it starts fresh on every reply.
|
| 679 |
</div>
|
|
@@ -824,7 +823,6 @@ def _dual_loading_html(label: str, color: str, model_name: str) -> str:
|
|
| 824 |
|
| 825 |
_MODEL_CHOICES = [(meta["name"], mid) for mid, meta in MODELS.items()]
|
| 826 |
|
| 827 |
-
# Corrección Gradio 6.0: css y theme movidos a launch()
|
| 828 |
with gr.Blocks(title="Gemma Explorer") as demo:
|
| 829 |
|
| 830 |
current_model_state = gr.State(value=None)
|
|
@@ -902,11 +900,11 @@ with gr.Blocks(title="Gemma Explorer") as demo:
|
|
| 902 |
|
| 903 |
gr.HTML("""
|
| 904 |
<div class="zerogpu-notice" style="margin-bottom:16px">
|
| 905 |
-
<div class="zgn-title">
|
| 906 |
Send the same prompt to two models and compare their responses.
|
| 907 |
Models are loaded and run sequentially — Model A first, then Model B.
|
| 908 |
<div class="zgn-divider"></div>
|
| 909 |
-
<div class="zgn-title">
|
| 910 |
Same ZeroGPU constraints apply — each turn is processed independently with no context history.
|
| 911 |
</div>
|
| 912 |
""")
|
|
@@ -965,7 +963,6 @@ with gr.Blocks(title="Gemma Explorer") as demo:
|
|
| 965 |
card_html_list = [card_html_components[mid] for mid in MODELS]
|
| 966 |
_n_cards = len(card_html_list)
|
| 967 |
|
| 968 |
-
# Corrección: Uso de una función auxiliar para evitar el error del generador con lambda
|
| 969 |
def make_load_fn(model_id):
|
| 970 |
def load_fn():
|
| 971 |
yield from load_model_stream(model_id, _n_cards)
|
|
@@ -1058,7 +1055,7 @@ with gr.Blocks(title="Gemma Explorer") as demo:
|
|
| 1058 |
_load_weights(model_a)
|
| 1059 |
except Exception as exc:
|
| 1060 |
_purge_model()
|
| 1061 |
-
err = f"
|
| 1062 |
yield (hist_a + [{"role": "user", "content": user_msg}, {"role": "assistant", "content": err}],
|
| 1063 |
hist_b, gr.update(), gr.update(value=None), _unlock, _unlock,
|
| 1064 |
gr.update(visible=False))
|
|
@@ -1084,7 +1081,7 @@ with gr.Blocks(title="Gemma Explorer") as demo:
|
|
| 1084 |
_load_weights(model_b)
|
| 1085 |
except Exception as exc:
|
| 1086 |
_purge_model()
|
| 1087 |
-
err = f"
|
| 1088 |
yield (hist_a,
|
| 1089 |
hist_b + [{"role": "user", "content": user_msg}, {"role": "assistant", "content": err}],
|
| 1090 |
gr.update(), gr.update(value=None), _unlock, _unlock, gr.update(visible=False))
|
|
@@ -1117,7 +1114,6 @@ with gr.Blocks(title="Gemma Explorer") as demo:
|
|
| 1117 |
|
| 1118 |
if __name__ == "__main__":
|
| 1119 |
_debug = os.environ.get("GRADIO_DEBUG", "0") == "1"
|
| 1120 |
-
# Corrección Gradio 6.0: css y theme ahora se pasan en launch()
|
| 1121 |
demo.launch(
|
| 1122 |
debug=_debug,
|
| 1123 |
css=CSS,
|
|
|
|
| 25 |
_HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 26 |
if _HF_TOKEN:
|
| 27 |
from huggingface_hub import login
|
|
|
|
| 28 |
login(token=_HF_TOKEN, add_to_git_credential=False)
|
| 29 |
|
| 30 |
try:
|
|
|
|
| 672 |
<div class="zgn-title">⚡ ZeroGPU Latency</div>
|
| 673 |
GPU allocation happens on every message in this serverless Space — expect a brief wait before the first token.
|
| 674 |
<div class="zgn-divider"></div>
|
| 675 |
+
<div class="zgn-title">No Memory</div>
|
| 676 |
Due to ZeroGPU constraints, each message is processed independently.
|
| 677 |
The model has <strong>no conversation history</strong> — it starts fresh on every reply.
|
| 678 |
</div>
|
|
|
|
| 823 |
|
| 824 |
_MODEL_CHOICES = [(meta["name"], mid) for mid, meta in MODELS.items()]
|
| 825 |
|
|
|
|
| 826 |
with gr.Blocks(title="Gemma Explorer") as demo:
|
| 827 |
|
| 828 |
current_model_state = gr.State(value=None)
|
|
|
|
| 900 |
|
| 901 |
gr.HTML("""
|
| 902 |
<div class="zerogpu-notice" style="margin-bottom:16px">
|
| 903 |
+
<div class="zgn-title">Dual Chat — Side-by-Side Comparison</div>
|
| 904 |
Send the same prompt to two models and compare their responses.
|
| 905 |
Models are loaded and run sequentially — Model A first, then Model B.
|
| 906 |
<div class="zgn-divider"></div>
|
| 907 |
+
<div class="zgn-title">No Memory</div>
|
| 908 |
Same ZeroGPU constraints apply — each turn is processed independently with no context history.
|
| 909 |
</div>
|
| 910 |
""")
|
|
|
|
| 963 |
card_html_list = [card_html_components[mid] for mid in MODELS]
|
| 964 |
_n_cards = len(card_html_list)
|
| 965 |
|
|
|
|
| 966 |
def make_load_fn(model_id):
|
| 967 |
def load_fn():
|
| 968 |
yield from load_model_stream(model_id, _n_cards)
|
|
|
|
| 1055 |
_load_weights(model_a)
|
| 1056 |
except Exception as exc:
|
| 1057 |
_purge_model()
|
| 1058 |
+
err = f"Failed to load {name_a}: {exc}"
|
| 1059 |
yield (hist_a + [{"role": "user", "content": user_msg}, {"role": "assistant", "content": err}],
|
| 1060 |
hist_b, gr.update(), gr.update(value=None), _unlock, _unlock,
|
| 1061 |
gr.update(visible=False))
|
|
|
|
| 1081 |
_load_weights(model_b)
|
| 1082 |
except Exception as exc:
|
| 1083 |
_purge_model()
|
| 1084 |
+
err = f"Failed to load {name_b}: {exc}"
|
| 1085 |
yield (hist_a,
|
| 1086 |
hist_b + [{"role": "user", "content": user_msg}, {"role": "assistant", "content": err}],
|
| 1087 |
gr.update(), gr.update(value=None), _unlock, _unlock, gr.update(visible=False))
|
|
|
|
| 1114 |
|
| 1115 |
if __name__ == "__main__":
|
| 1116 |
_debug = os.environ.get("GRADIO_DEBUG", "0") == "1"
|
|
|
|
| 1117 |
demo.launch(
|
| 1118 |
debug=_debug,
|
| 1119 |
css=CSS,
|