Spaces:

VeuReu
/

schat

Running on Zero

App Files Files Community

VeuReu commited on 3 days ago

Commit

73cad8e

verified ·

1 Parent(s): cb4cd52

Update app.py

Browse files

Files changed (1) hide show

app.py +107 -74

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# app.py — veureu/schat (Salamandra 7B Instruct · ZeroGPU) — compatible con ENGINE
 from __future__ import annotations
 import os, json
 from typing import List, Dict, Any, Optional, Tuple
@@ -40,8 +40,8 @@ def _lazy_load() -> Tuple[AutoTokenizer, AutoModelForCausalLM]:
 def _build_prompt(prompt: str, system: Optional[str]) -> str:
     """
-    Si el tokenizer posee 'chat_template', lo usamos con mensajes [system?, user].
-    Si no, hacemos un prompt plano con system arriba.
     """
     tok, _ = _lazy_load()
     messages = []
@@ -52,54 +52,56 @@ def _build_prompt(prompt: str, system: Optional[str]) -> str:
     chat_template = getattr(tok, "chat_template", None)
     if chat_template:
         return tok.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-    # Fallback sin chat template
     sys_part = (f"<<SYS>>\n{system.strip()}\n<</SYS>>\n\n" if system and system.strip() else "")
-    return sys_part + f"### Instrucción\n{prompt}\n\n### Respuesta\n"
-@spaces.GPU  # usa GPU si está disponible (ZeroGPU)
-def _generate_with_tools(
-    messages: List[Dict[str, str]],
-    tools: List[Dict[str, Any]],
-    max_new_tokens: int = 512,
-    temperature: float = 0.7,
-    top_p: float = 0.95,
-) -> Dict[str, Any]:
-    tok, model = _lazy_load()
-    tools_md = _render_tools_md(tools)
-    prompt = _compose_chat_prompt(messages, tools_md)
-    inputs = tok(prompt, return_tensors="pt").to(DEVICE)
-    with torch.inference_mode():
-        out = model.generate(
-            **inputs,
-            max_new_tokens=int(max_new_tokens),
-            temperature=float(temperature),
-            top_p=float(top_p),
-            do_sample=True if temperature > 0 else False,
-            pad_token_id=tok.eos_token_id,
-            eos_token_id=tok.eos_token_id,
-        )
-    text = tok.decode(out[0], skip_special_tokens=True).strip()
-    # Si el modelo devuelve un bloque JSON con 'tool_calls', lo intentamos extraer.
-    tool_calls: List[Dict[str, Any]] = []
-    try:
-        # busca el último {...} que contenga "tool_calls"
-        matches = list(re.finditer(r"\{.*?\"tool_calls\".*?\}", text, flags=re.S))
-        if matches:
-            block = text[matches[-1].start():matches[-1].end()]
-            obj = json.loads(block)
-            tc = obj.get("tool_calls", [])
-            if isinstance(tc, list):
-                tool_calls = tc
-    except Exception:
-        pass
-    tool_results = maybe_execute_tool_calls(tool_calls) if tool_calls else []
-    return {"text": text, "tool_calls": tool_calls, "tool_results": tool_results}
-@spaces.GPU  # usa GPU si está disponible (ZeroGPU)
 def _generate(
     prompt: str,
     system: str = "",
@@ -124,58 +126,89 @@ def _generate(
     return tok.decode(out[0], skip_special_tokens=True).strip()
 # ------------------- Gradio Endpoints -------------------
-# 1) /predict — lo que espera el ENGINE (solo 'prompt' → string)
 def predict_for_engine(prompt: str) -> str:
     return _generate(prompt=prompt, system="", max_new_tokens=512, temperature=0.7, top_p=0.95)
-# 2) /generate — más controles (prompt + system + params)
 def generate_advanced(prompt: str, system: str, max_new_tokens: int, temperature: float, top_p: float) -> str:
     return _generate(prompt=prompt, system=system, max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p)
 def salamandra_chat_endpoint(prompt: str) -> Dict[str, Any]:
     global _salamandra
     if _salamandra is None:
-        _salamandra = SalamandraClient()   # usa tu clase
     try:
         text = _salamandra.chat(prompt)
     except Exception as e:
-        text = f"Error ejecutando SalamandraClient: {str(e)}"
     return {"text": text}
-def resumir_frases(frase, num_palabras):
-    num_palabras = int(num_palabras)
-    prompt = f"Instrució: Resumeix la següent frase en {num_palabras} paraules. Input: {frase}"
     result = generate_advanced(prompt=prompt, system="", max_new_tokens=512, temperature=0.7, top_p=0.95)
     if "assistant" in result:
         clean_output = result.split("assistant", 1)[1].strip().split("\n")[0]
     else:
-        clean_output = frase
     return clean_output
-def identity_manager (frase, persona):
     prompt = f"""Instrucció: Substitueix el subjecte de la frase per la persona indicada, mantenint la resta igual.
-        Frase: {frase}
-        Substitució: {persona}
         Resposta:"""
     result = generate_advanced(prompt=prompt, system="", max_new_tokens=512, temperature=0.7, top_p=0.95)
     if "assistant" in result:
         clean_output = result.split("assistant", 1)[1].strip().split("\n")[0]
     else:
-        clean_output = frase
     return clean_output
-def free_narration (srt_final):
     prompt = f"""Instrucció: Converteix aquesta audiodescripció en una narració lliure breu, natural i coherent.,
         input: {srt_final}
         output:
     """
     result = generate_advanced(prompt=prompt, system="", max_new_tokens=512, temperature=0.7, top_p=0.95)
     if "assistant" in result:
         clean_output = result.split("assistant", 1)[1].strip().split("\n")[0]
     else:
-        clean_output = frase
     return clean_output
 # ------------------- HTTP (opcional, clientes puros) -------------------
@@ -217,17 +250,6 @@ with gr.Blocks(title="Salamandra 7B Instruct · ZeroGPU",css=custom_css,theme=gr
     gr.Button("Probar /predict").click(predict_for_engine, [in_prompt_engine], out_engine, api_name="predict", concurrency_limit=1)
     gr.Markdown("---")
-    gr.Markdown('<h2 style="text-align:center">Sortida del model Salamandra a partir d’una petició</h2>')
-    with gr.Row():
-        prompt = gr.Textbox(label="prompt", lines=10)
-    with gr.Row():
-        btn2 = gr.Button("Generar", variant="primary")
-    with gr.Row():
-        out2 = gr.JSON(label="Salida")
-    btn2.click(salamandra_chat_endpoint, [prompt], out2, api_name="generate_out_from_prompt", concurrency_limit=1)
-    gr.Markdown("---")
     gr.Markdown('<h2 style="text-align:center">Resumir frases</h2>')
     with gr.Row():
         with gr.Column(scale=1):
@@ -239,7 +261,7 @@ with gr.Blocks(title="Salamandra 7B Instruct · ZeroGPU",css=custom_css,theme=gr
         btn_resumir = gr.Button("Resumir", variant="primary")
     btn_resumir.click(
-        resumir_frases,
         inputs=[frase, num_paraules],
         outputs=out_resumir,
         api_name="resumir",
@@ -269,7 +291,7 @@ with gr.Blocks(title="Salamandra 7B Instruct · ZeroGPU",css=custom_css,theme=gr
     with gr.Row():
         with gr.Column(scale=1):
             srt = gr.Textbox(label="Audiodescripció", value="(AD)\nTOTS CANTANT: avui celebrem la nostra festa major\nAINA: som hi tots a ballar", lines=3)
-            btn_modificar = gr.Button("Generar audiodescripció", variant="primary")
         with gr.Column(scale=1):
             narració_lliure = gr.Textbox(label="Narració lliure", lines=18)
@@ -281,4 +303,15 @@ with gr.Blocks(title="Salamandra 7B Instruct · ZeroGPU",css=custom_css,theme=gr
         concurrency_limit=1
     )
 demo.queue(max_size=16).launch()

+# app.py — veureu/schat (Salamandra 7B Instruct · ZeroGPU) — compatible with ENGINE
 from __future__ import annotations
 import os, json
 from typing import List, Dict, Any, Optional, Tuple
 def _build_prompt(prompt: str, system: Optional[str]) -> str:
     """
+    If the tokenizer has 'chat_template', use it with messages [system?, user].
+    Otherwise, create a plain prompt with system at the top.
     """
     tok, _ = _lazy_load()
     messages = []
     chat_template = getattr(tok, "chat_template", None)
     if chat_template:
         return tok.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+    # Fallback without chat template
     sys_part = (f"<<SYS>>\n{system.strip()}\n<</SYS>>\n\n" if system and system.strip() else "")
+    return sys_part + f"### Instrucció\n{prompt}\n\n### Resposta\n"
+#@spaces.GPU  # use GPU if available (ZeroGPU)
+#def _generate_with_tools(
+#    messages: List[Dict[str, str]],
+#    tools: List[Dict[str, Any]],
+#    max_new_tokens: int = 512,
+#    temperature: float = 0.7,
+#    top_p: float = 0.95,
+#) -> Dict[str, Any]:
+#    tok, model = _lazy_load()
+#    tools_md = _render_tools_md(tools)
+#    prompt = _compose_chat_prompt(messages, tools_md)
+#    inputs = tok(prompt, return_tensors="pt").to(DEVICE)
+#    with torch.inference_mode():
+#        out = model.generate(
+#            **inputs,
+#            max_new_tokens=int(max_new_tokens),
+#            temperature=float(temperature),
+#            top_p=float(top_p),
+#            do_sample=True if temperature > 0 else False,
+#            pad_token_id=tok.eos_token_id,
+#            eos_token_id=tok.eos_token_id,
+#        )
+#    text = tok.decode(out[0], skip_special_tokens=True).strip()
+#    # If the model returns a JSON block with 'tool_calls', try to extract it
+#    tool_calls: List[Dict[str, Any]] = []
+#    try:
+#        # Search for the last {...} containing "tool_calls"
+#        matches = list(re.finditer(r"\{.*?\"tool_calls\".*?\}", text, flags=re.S))
+#        if matches:
+#            block = text[matches[-1].start():matches[-1].end()]
+#            obj = json.loads(block)
+#            tc = obj.get("tool_calls", [])
+#            if isinstance(tc, list):
+#                tool_calls = tc
+#    except Exception:
+#        pass
+    # Execute the extracted tool calls if any
+#    tool_results = maybe_execute_tool_calls(tool_calls) if tool_calls else []
+#    return {"text": text, "tool_calls": tool_calls, "tool_results": tool_results}
+@spaces.GPU # use GPU if available (ZeroGPU)
 def _generate(
     prompt: str,
     system: str = "",
     return tok.decode(out[0], skip_special_tokens=True).strip()
 # ------------------- Gradio Endpoints -------------------
+# 1) /predict — what ENGINE expects (only 'prompt' → string)
 def predict_for_engine(prompt: str) -> str:
     return _generate(prompt=prompt, system="", max_new_tokens=512, temperature=0.7, top_p=0.95)
+# 2) /generate — more controls (prompt + system + params)
 def generate_advanced(prompt: str, system: str, max_new_tokens: int, temperature: float, top_p: float) -> str:
     return _generate(prompt=prompt, system=system, max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p)
 def salamandra_chat_endpoint(prompt: str) -> Dict[str, Any]:
     global _salamandra
     if _salamandra is None:
+        _salamandra = SalamandraClient()   # use your class
     try:
         text = _salamandra.chat(prompt)
     except Exception as e:
+        text = f"Error running SalamandraClient: {str(e)}"
     return {"text": text}
+def resume_sentence(sentence, num_words):
+    """
+    Summarizes the given sentence in the specified number of words.
+    Parameters:
+    - sentence (str): The sentence to summarize.
+    - num_words (int): The number of words for the summary.
+    Returns:
+    - str: The summarized sentence.
+    """
+    num_words = int(num_words)
+    # Prompt the model to summarize the sentence
+    prompt = f"Instrució: Resumeix la següent frase en {num_words} paraules. Input: {sentence}"
     result = generate_advanced(prompt=prompt, system="", max_new_tokens=512, temperature=0.7, top_p=0.95)
+    # Clean the output if it contains 'assistant' role
     if "assistant" in result:
         clean_output = result.split("assistant", 1)[1].strip().split("\n")[0]
     else:
+        clean_output = sentence
     return clean_output
+def identity_manager(sentence, person):
+    """
+    Replaces the subject of the sentence with the indicated person, keeping the rest unchanged.
+    """
     prompt = f"""Instrucció: Substitueix el subjecte de la frase per la persona indicada, mantenint la resta igual.
+        Frase: {sentence}
+        Substitució: {person}
         Resposta:"""
+    # Generate the modified sentence using the advanced generator
     result = generate_advanced(prompt=prompt, system="", max_new_tokens=512, temperature=0.7, top_p=0.95)
+    # Clean the output if it contains 'assistant' role
     if "assistant" in result:
         clean_output = result.split("assistant", 1)[1].strip().split("\n")[0]
     else:
+        clean_output = sentence
     return clean_output
+def free_narration(srt_text):
+    """
+    Converts the given audio description into a short, natural, and coherent free narration.
+    """
     prompt = f"""Instrucció: Converteix aquesta audiodescripció en una narració lliure breu, natural i coherent.,
         input: {srt_final}
         output:
     """
+    # Generate the free narration using the advanced generator
     result = generate_advanced(prompt=prompt, system="", max_new_tokens=512, temperature=0.7, top_p=0.95)
+    # Clean the output if it contains 'assistant' role
     if "assistant" in result:
         clean_output = result.split("assistant", 1)[1].strip().split("\n")[0]
     else:
+        clean_output = srt_text  # fallback to original input
     return clean_output
 # ------------------- HTTP (opcional, clientes puros) -------------------
     gr.Button("Probar /predict").click(predict_for_engine, [in_prompt_engine], out_engine, api_name="predict", concurrency_limit=1)
     gr.Markdown("---")
     gr.Markdown('<h2 style="text-align:center">Resumir frases</h2>')
     with gr.Row():
         with gr.Column(scale=1):
         btn_resumir = gr.Button("Resumir", variant="primary")
     btn_resumir.click(
+        resume_sentence,
         inputs=[frase, num_paraules],
         outputs=out_resumir,
         api_name="resumir",
     with gr.Row():
         with gr.Column(scale=1):
             srt = gr.Textbox(label="Audiodescripció", value="(AD)\nTOTS CANTANT: avui celebrem la nostra festa major\nAINA: som hi tots a ballar", lines=3)
+            btn_modificar = gr.Button("Generar narració lliure", variant="primary")
         with gr.Column(scale=1):
             narració_lliure = gr.Textbox(label="Narració lliure", lines=18)
         concurrency_limit=1
     )
+    gr.Markdown('<h2 style="text-align:center">Sortida del model Salamandra a partir d’una petició</h2>')
+    with gr.Row():
+        prompt = gr.Textbox(label="prompt", lines=10)
+    with gr.Row():
+        btn2 = gr.Button("Generar", variant="primary")
+    with gr.Row():
+        out2 = gr.JSON(label="Salida")
+    btn2.click(salamandra_chat_endpoint, [prompt], out2, api_name="generate_out_from_prompt", concurrency_limit=1)
+    gr.Markdown("---")
 demo.queue(max_size=16).launch()