Spaces:

beta3
/

Gemma_Hub_Explorer

Running on Zero

App Files Files Community

beta3 commited on 2 days ago

Commit

ec673b5

verified ·

1 Parent(s): f48efe6

Update app.py

Browse files

Files changed (1) hide show

app.py +5 -9

app.py CHANGED Viewed

@@ -25,7 +25,6 @@ from models_data import MODELS, FAMILIES, get_models_by_family
 _HF_TOKEN = os.environ.get("HF_TOKEN")
 if _HF_TOKEN:
     from huggingface_hub import login
-    # Se agrega add_to_git_credential=False para evitar advertencias de sobreescritura de token
     login(token=_HF_TOKEN, add_to_git_credential=False)
 try:
@@ -673,7 +672,7 @@ _ZEROGPU_NOTICE = """
   <div class="zgn-title">⚡ ZeroGPU Latency</div>
   GPU allocation happens on every message in this serverless Space — expect a brief wait before the first token.
   <div class="zgn-divider"></div>
-  <div class="zgn-title">🧠 No Memory</div>
   Due to ZeroGPU constraints, each message is processed independently.
   The model has <strong>no conversation history</strong> — it starts fresh on every reply.
 </div>
@@ -824,7 +823,6 @@ def _dual_loading_html(label: str, color: str, model_name: str) -> str:
 _MODEL_CHOICES = [(meta["name"], mid) for mid, meta in MODELS.items()]
-# Corrección Gradio 6.0: css y theme movidos a launch()
 with gr.Blocks(title="Gemma Explorer") as demo:
     current_model_state = gr.State(value=None)
@@ -902,11 +900,11 @@ with gr.Blocks(title="Gemma Explorer") as demo:
             gr.HTML("""
             <div class="zerogpu-notice" style="margin-bottom:16px">
-              <div class="zgn-title">⚔️ Dual Chat — Side-by-Side Comparison</div>
               Send the same prompt to two models and compare their responses.
               Models are loaded and run sequentially — Model A first, then Model B.
               <div class="zgn-divider"></div>
-              <div class="zgn-title">🧠 No Memory</div>
               Same ZeroGPU constraints apply — each turn is processed independently with no context history.
             </div>
             """)
@@ -965,7 +963,6 @@ with gr.Blocks(title="Gemma Explorer") as demo:
     card_html_list = [card_html_components[mid] for mid in MODELS]
     _n_cards = len(card_html_list)
-    # Corrección: Uso de una función auxiliar para evitar el error del generador con lambda
     def make_load_fn(model_id):
         def load_fn():
             yield from load_model_stream(model_id, _n_cards)
@@ -1058,7 +1055,7 @@ with gr.Blocks(title="Gemma Explorer") as demo:
             _load_weights(model_a)
         except Exception as exc:
             _purge_model()
-            err = f"❌ Failed to load {name_a}: {exc}"
             yield (hist_a + [{"role": "user", "content": user_msg}, {"role": "assistant", "content": err}],
                    hist_b, gr.update(), gr.update(value=None), _unlock, _unlock,
                    gr.update(visible=False))
@@ -1084,7 +1081,7 @@ with gr.Blocks(title="Gemma Explorer") as demo:
             _load_weights(model_b)
         except Exception as exc:
             _purge_model()
-            err = f"❌ Failed to load {name_b}: {exc}"
             yield (hist_a,
                    hist_b + [{"role": "user", "content": user_msg}, {"role": "assistant", "content": err}],
                    gr.update(), gr.update(value=None), _unlock, _unlock, gr.update(visible=False))
@@ -1117,7 +1114,6 @@ with gr.Blocks(title="Gemma Explorer") as demo:
 if __name__ == "__main__":
     _debug = os.environ.get("GRADIO_DEBUG", "0") == "1"
-    # Corrección Gradio 6.0: css y theme ahora se pasan en launch()
     demo.launch(
         debug=_debug,
         css=CSS,

 _HF_TOKEN = os.environ.get("HF_TOKEN")
 if _HF_TOKEN:
     from huggingface_hub import login
     login(token=_HF_TOKEN, add_to_git_credential=False)
 try:
   <div class="zgn-title">⚡ ZeroGPU Latency</div>
   GPU allocation happens on every message in this serverless Space — expect a brief wait before the first token.
   <div class="zgn-divider"></div>
+  <div class="zgn-title">No Memory</div>
   Due to ZeroGPU constraints, each message is processed independently.
   The model has <strong>no conversation history</strong> — it starts fresh on every reply.
 </div>
 _MODEL_CHOICES = [(meta["name"], mid) for mid, meta in MODELS.items()]
 with gr.Blocks(title="Gemma Explorer") as demo:
     current_model_state = gr.State(value=None)
             gr.HTML("""
             <div class="zerogpu-notice" style="margin-bottom:16px">
+              <div class="zgn-title">Dual Chat — Side-by-Side Comparison</div>
               Send the same prompt to two models and compare their responses.
               Models are loaded and run sequentially — Model A first, then Model B.
               <div class="zgn-divider"></div>
+              <div class="zgn-title">No Memory</div>
               Same ZeroGPU constraints apply — each turn is processed independently with no context history.
             </div>
             """)
     card_html_list = [card_html_components[mid] for mid in MODELS]
     _n_cards = len(card_html_list)
     def make_load_fn(model_id):
         def load_fn():
             yield from load_model_stream(model_id, _n_cards)
             _load_weights(model_a)
         except Exception as exc:
             _purge_model()
+            err = f"Failed to load {name_a}: {exc}"
             yield (hist_a + [{"role": "user", "content": user_msg}, {"role": "assistant", "content": err}],
                    hist_b, gr.update(), gr.update(value=None), _unlock, _unlock,
                    gr.update(visible=False))
             _load_weights(model_b)
         except Exception as exc:
             _purge_model()
+            err = f"Failed to load {name_b}: {exc}"
             yield (hist_a,
                    hist_b + [{"role": "user", "content": user_msg}, {"role": "assistant", "content": err}],
                    gr.update(), gr.update(value=None), _unlock, _unlock, gr.update(visible=False))
 if __name__ == "__main__":
     _debug = os.environ.get("GRADIO_DEBUG", "0") == "1"
     demo.launch(
         debug=_debug,
         css=CSS,