Spaces:

caarleexx
/

IZAaa-C

Runtime error

App Files Files Community

caarleexx commited on Nov 27, 2025

Commit

52fbcd4

verified ·

1 Parent(s): 298db72

Update app.py

Browse files

Files changed (1) hide show

app.py +125 -146

app.py CHANGED Viewed

@@ -1,180 +1,159 @@
 import gradio as gr
 import os
-import json
-from pathlib import Path
 from datetime import datetime
-try:
-    from llama_cpp import Llama
-    LLM_AVAILABLE = True
-except Exception as e:
-    LLM_AVAILABLE = False
-    _err = str(e)
-# ==========================
-# CONFIG
-# ==========================
-MODEL_PATH = os.environ.get("MODEL_PATH", "models/gemma-2-2b-it-Q4_K_M.gguf")
-TEMP_A = 0.0     # deterministic
-TEMP_M = 0.9     # exploratory
-TOP_P = 0.95
-MAX_TOKENS = 256
-# ==========================
-# UTILITIES
-# ==========================
-def load_llm():
-    if not LLM_AVAILABLE:
-        return None
-    return Llama(
-        model_path=MODEL_PATH,
-        n_ctx=4096,
-        n_threads=8,
-        verbose=False
-    )
-llm = load_llm()
-def generate(prompt, temperature):
-    if not LLM_AVAILABLE or llm is None:
-        return f"(LLM não disponível: {_err})"
-    out = llm.create_completion(
-        prompt=prompt,
-        temperature=temperature,
-        max_tokens=MAX_TOKENS,
-        top_p=TOP_P
-    )
-    return out["choices"][0]["text"].strip()
 def divergence(a, b):
-    if not a or not b:
         return 1.0
-    wa = set(a.split())
-    wb = set(b.split())
-    inter = wa & wb
-    union = wa | wb
-    if not union:
-        return 1.0
-    return round(1 - len(inter)/len(union), 3)
-# ==========================
-# DATASET SAVE
-# ==========================
-SAVE_DIR = Path("outputs")
-SAVE_DIR.mkdir(exist_ok=True)
-def save_dataset(question, respA, respM, div_score):
-    timestamp = datetime.utcnow().isoformat()
-    base = SAVE_DIR / f"dataset_{timestamp}"
-    md_path = base.with_suffix(".md")
-    json_path = base.with_suffix(".json")
-    # Markdown
-    md = f"""
-# Registro A-M
-Gerado: {timestamp}
-## Pergunta
-{question}
-## Resposta A (determinística)
-{respA}
-## Resposta M (exploratória)
-{respM}
-### Divergência
-{div_score}
-"""
-    md_path.write_text(md, encoding="utf-8")
-    # JSON
-    data = {
         "timestamp": timestamp,
         "question": question,
-        "response_A": respA,
-        "response_M": respM,
-        "divergence": div_score
     }
-    json_path.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
-    return str(md_path), str(json_path)
-# ==========================
-# GRADIO UI
-# ==========================
-def run(question):
-    if not question.strip():
-        return ["", "", "", ""]
-    prompt = f"Pergunta: {question}\nResponda de forma direta:"
-    respA = generate(prompt, TEMP_A)
-    respM = generate(prompt, TEMP_M)
-    div_score = divergence(respA, respM)
-    return respA, respM, str(div_score), ""
-def save_btn(question, respA, respM, div_score):
-    if not question or not respA:
-        return "Nada para salvar."
-    md_path, json_path = save_dataset(question, respA, respM, div_score)
-    return f"Salvo!\nMD: {md_path}\nJSON: {json_path}"
-with gr.Blocks(title="A-M Divergence Generator (Gemma 2B)") as demo:
-    gr.Markdown("""
-    # 🔵 A-M Divergence Generator
-    Gera **duas respostas** usando o mesmo modelo Gemma:
-    - **A** → Determinística (Temperature 0.0)
-    - **M** → Exploratório (Temperature 0.9)
-    - Mostra divergência → Se ≈0 = iguais, se ≈1 = totalmente diferentes
-    - Permite salvar dataset `.md` + `.json` automaticamente
-    """)
-    with gr.Row():
-        question = gr.Textbox(label="Pergunta", placeholder="Digite sua pergunta aqui...", lines=3)
-    run_btn = gr.Button("Gerar A / M")
-    with gr.Row():
-        respA = gr.Textbox(label="Resposta A (determinística)")
-        respM = gr.Textbox(label="Resposta M (exploratória)")
-    div = gr.Textbox(label="Divergência (0 = igual, 1 = totalmente diferente)")
-    status = gr.Textbox(label="Status / Logs")
-    save = gr.Button("Salvar como Dataset (.md + .json)")
-    run_btn.click(
-        fn=run,
-        inputs=[question],
-        outputs=[respA, respM, div, status]
-    )
-    save.click(
-        fn=save_btn,
-        inputs=[question, respA, respM, div],
-        outputs=[status]
-    )
-demo.launch()

+# ---------------------------------------------------------
+#  app.py — Twin-Branch A–M Reasoning Demo (Gemma 3n)
+#  Carlos Rodrigues — 2025
+# ---------------------------------------------------------
+import torch
 import gradio as gr
+from huggingface_hub import snapshot_download
+from transformers import AutoTokenizer, AutoModelForCausalLM
 import os
 from datetime import datetime
+import re
+# ---------------------------------------------------------
+# CONFIGURAÇÃO DO MODELO (troque aqui se quiser outro)
+# ---------------------------------------------------------
+MODEL_REPO = "google/gemma-3n-E2B-it-litert-lm"
+LOCAL_MODEL_DIR = snapshot_download(
+    repo_id=MODEL_REPO,
+    allow_patterns=["*.json", "*.bin", "*.model", "*.safetensors", "*.txt"],
+)
+# ---------------------------------------------------------
+# CARREGAR TOKENIZER & MODELO
+# ---------------------------------------------------------
+tokenizer = AutoTokenizer.from_pretrained(LOCAL_MODEL_DIR)
+model = AutoModelForCausalLM.from_pretrained(
+    LOCAL_MODEL_DIR,
+    torch_dtype=torch.float32,
+    device_map="auto"   # usa GPU se tiver
+)
+# ---------------------------------------------------------
+# FUNÇÕES AUXILIARES
+# ---------------------------------------------------------
+def clean_text(t):
+    t = t.replace("<s>", "").replace("</s>", "")
+    t = re.sub(r"\s+", " ", t)
+    return t.strip()
+def generate_answer(prompt, temperature=0.0, top_p=1.0, max_new_tokens=180):
+    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+    with torch.no_grad():
+        output = model.generate(
+            **inputs,
+            max_new_tokens=max_new_tokens,
+            temperature=temperature,
+            top_p=top_p,
+            do_sample=temperature > 0
+        )
+    decoded = tokenizer.decode(output[0])
+    return clean_text(decoded.replace(prompt, ""))
 def divergence(a, b):
+    """
+    Divergência simples: token overlap (0=igual, 1=muito diferente)
+    """
+    sa, sb = a.split(), b.split()
+    if len(sa) == 0 or len(sb) == 0:
         return 1.0
+    common = len(set(sa) & set(sb))
+    union = len(set(sa) | set(sb))
+    sim = common / union
+    return round(1 - sim, 3)
+# ---------------------------------------------------------
+# PIPELINE A–M
+# ---------------------------------------------------------
+def twin_branch_pipeline(question):
+    timestamp = datetime.utcnow().isoformat() + "Z"
+    prompt = f"Pergunta: {question}\nResponda claramente em até 120 palavras:\n"
+    # A — determinístico
+    answer_A = generate_answer(prompt, temperature=0.0, top_p=1.0)
+    # M — exploratório (descobre divergências)
+    answer_M = generate_answer(prompt, temperature=0.9, top_p=0.95)
+    # Divergência
+    div_score = divergence(answer_A, answer_M)
+    # Decisão simples
+    if div_score < 0.15:
+        decision = "Ambas respostas convergem — usar Resposta A."
+    elif div_score < 0.40:
+        decision = "Diferença moderada — sugerir resposta A, mas avisar ambiguidade."
+    elif div_score < 0.75:
+        decision = "As respostas divergem — investigar as duas interpretações."
+    else:
+        decision = "Alta divergência — perguntar ao operador para sanear intenção."
+    # Retorno estruturado
+    return {
         "timestamp": timestamp,
         "question": question,
+        "A (determinística)": answer_A,
+        "M (exploratória)": answer_M,
+        "Divergência (0=igual,1=muito diferente)": div_score,
+        "Decisão da Máquina": decision
     }
+# ---------------------------------------------------------
+# INTERFACE GRADIO
+# ---------------------------------------------------------
+def run_interface(question):
+    result = twin_branch_pipeline(question)
+    md = f"""
+# 🔎 Twin-Reasoning A–M (Gemma-3n E2B)
+**Pergunta:**
+{result['question']}
+---
+## 🟦 Resposta A (determinística)
+{result['A (determinística)']}
+## 🟧 Resposta M (exploratória)
+{result['M (exploratória)']}
+---
+### 📊 Divergência
+**{result['Divergência (0=igual,1=muito diferente)']}**
+### 🧭 Decisão Interna
+**{result['Decisão da Máquina']}**
+---
+*Gerado em:* `{result['timestamp']}`
+    """
+    return md
+# ---------------------------------------------------------
+# LANÇAR A APP
+# ---------------------------------------------------------
+demo = gr.Interface(
+    fn=run_interface,
+    inputs=gr.Textbox(label="Digite uma pergunta:", placeholder="Ex: É legítimo filmar alguém em via pública?"),
+    outputs=gr.Markdown(),
+    title="Twin-Reasoner A–M — Gemma 3n",
+    description="Simulador de Arquitetura A–M (Geração + Verificação) baseado em pesos congelados.",
+)
+if __name__ == "__main__":
+    demo.launch()