Spaces:

tecuhtli
/

Mori_Bot

Running

File size: 13,906 Bytes

# -*- coding: utf-8 -*-
"""Script to create prompt to interact with LLMs for text generation"""
#=====================================================================================
# Importing Libraries  ===============================================================
#=====================================================================================
import unicodedata
import re
from Mori_Chatbot_SpanishCorrections import polish_spanish
from Mori_Technical_RAGwithFAISS import retrieve_docs
import os, torch
import warnings
# ************************************************************************
# Defining default paths for the model to work
# ************************************************************************
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=FutureWarning)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#=====================================================================================
# Functions  =========================================================================
#=====================================================================================

def recortar_ultima_oracion(texto):

    """Remove incomplete generated text"""
    texto = texto.strip()
    if not texto:
        return texto

    # signos válidos de cierre
    signos = ".?!…"

    # encontrar la última posición
    posiciones = [texto.rfind(s) for s in signos]
    posiciones = [p for p in posiciones if p != -1]

    if not posiciones:
        return texto  # no hay signos → lo regresamos

    final = max(posiciones)

    # aseguramos que no sea demasiado pronto
    if final < len(texto) * 0.3:
        return texto

    return texto[:final + 1].strip()


def normalize_text(text: str) -> str:

    """Normalize text for correct and similar processing"""
    t = text.lower().strip()
    t = unicodedata.normalize("NFD", t)
    t = "".join(ch for ch in t if unicodedata.category(ch) != "Mn")
    t = t.replace("¿", "").replace("?", "")
    t = re.sub(r"\s+", " ", t)
    return t

def classify_question_type_from_text(text: str) -> str:

    """Determine the type of question"""
    
    t = normalize_text(text)

    if "para que sirve" in t or "para que se usa" in t:
        return "funcionalidad"
    if t.startswith("como ") or "pasos para" in t or "como puedo" in t:
        return "procedimiento"
    if t.startswith("que es ") or "definicion de" in t:
        return "definicion"
    return "definicion"


def build_prompt(qtype: str, question: str) -> str:
    """Generates a base prompt"""
    return (
        f"Tipo: {qtype}\n"
        f"Pregunta: {question}\n"
        "Respuesta:"
    )

def build_prompt_inference(question: str):
    """Generates an inference prompt"""
    qtype = classify_question_type_from_text(question)
    return build_prompt(qtype, question)

def build_prompt_training(row):
    """Generates a prompt for training"""
    qtype = row["question_type"]       # definicion / procedimiento / funcionalidad
    question = row["input"]
    return build_prompt(qtype, question)


def build_prompt_for_mori(user_question: str, question_type: str, top_doc: dict) -> str:
    """
    Prompt one-shot for RAG Mori, relying on question_type (definicion, procedimiento, funcionalidad).
    """
    ejemplo_q = (top_doc.get("input") or "").strip()
    ejemplo_a = (top_doc.get("output") or "").strip()
    contexto  = (top_doc.get("context") or "").strip()
    term      = (top_doc.get("canonical_term") or "").strip()

    prompt = (
        "Eres un asistente técnico llamado Mori. Respondes en español, de forma clara y concisa.\n\n"
        f"Contexto del concepto:\n"
        f"- Término: {term}\n"
        f"- Área: {contexto}\n"
        f"- Tipo de pregunta: {question_type}\n\n"
        f"A continuación tienes un ejemplo de pregunta y respuesta del mismo tipo \"{question_type}\":\n"
        f"Pregunta de ejemplo:\n{ejemplo_q}\n\n"
        f"Respuesta de ejemplo:\n{ejemplo_a}\n\n"
        "Usa este estilo y nivel de detalle como guía.\n\n"
        f"Ahora responde la siguiente pregunta del usuario manteniendo el tipo \"{question_type}\" "
        "(sin inventar información que no aparezca en el contexto recuperado, o que contradiga el ejemplo):\n\n"
        f"Pregunta del usuario:\n{user_question}\n\n"
        "Respuesta:"
    )

    return prompt


def answer_with_mori_rag(tokenizer, model, question: str, modo: str = "exacto", k: int = 5, score_threshold: float = 0.88, verbose=True) -> str:
    """
    Mori RAG answer:
      - Detects question_type
      - Rcover docs
      - Filter by question_type
      - Use threshold to determine the answer to return
      - If threshold is surpass → asnwer from FAISS
      - Otherwise → Generative answer from fine tuned Mori
      - Use polish_spanish to return the best possible gramatically corrected asnwer
    """

    # 1) Detectar tipo de pregunta
    qtype = classify_question_type_from_text(question)
    print(f"[Tipo detectado] {qtype}")

    # 2) Recuperar documentos desde FAISS
    docs = retrieve_docs(question, k=k, verbose=False)

    if not docs:
        print("[RAG] No se encontraron documentos, usando prompt simple.")
        prompt = build_prompt_inference(question)
    else:
        # 3) Filtrar por question_type primero
        same_type = [d for d in docs if d.get("question_type") == qtype]

        if same_type:
            top_doc = same_type[0]
        else:
            print("[RAG] No hay docs del mismo question_type, usando top-1 general.")
            top_doc = docs[0]

        if verbose:
            # Debug bonito
            print("\n[RAG] Documento usado como ejemplo:")
            print(" score:", top_doc["score"])
            print(" term :", top_doc.get("canonical_term", ""))
            print(" ctx  :", top_doc.get("context", ""))
            print(" qtype:", top_doc.get("question_type", ""))
            print(" Qej  :", top_doc.get("input", ""))
            print(" Aej  :", top_doc.get("output", ""))

        # 4) Threshold SOLO sobre ese top_doc (idealmente del mismo tipo)
        if top_doc.get("question_type") == qtype and top_doc["score"] >= score_threshold:
            if verbose:
                print(f"[RAG] Coincidencia fuerte (>={score_threshold}) para tipo '{qtype}'. "
                      "Usando output directo del dataset.")
            return polish_spanish(top_doc["output"]), build_prompt_for_mori(question, qtype, top_doc)

        # 5) Si no pasa el threshold → usamos prompt generativo con RAG
        prompt = build_prompt_for_mori(question, qtype, top_doc)

    # 6) Generar con Mori usando el prompt
    inputs = tokenizer(
        prompt,
        return_tensors="pt",
        truncation=True,
        max_length=256,
    ).to(model.device)

    gen_kwargs = get_gen_kwargs(modo)

    output_ids = model.generate(
        **inputs,
        **gen_kwargs
    )
    raw_answer = tokenizer.decode(output_ids[0], skip_special_tokens=True)

    # 7) Pulir la salida

    return polish_spanish(raw_answer), prompt



def answer_with_mori_plain(tokenizer, model, question: str, modo: str = "exacto") -> str:
    """
    Mori answer without RAG: jsut suing inference prompt with fine tuned model
      - Use polish_spanish to return the best possible gramatically corrected asnwer
    """


    
    prompt = build_prompt_inference(question)

    inputs = tokenizer(
        prompt,
        return_tensors="pt",
        truncation=True,
        max_length=64
    ).to(model.device)

    gen_kwargs = get_gen_kwargs(modo)

    output_ids = model.generate(
        **inputs,
        **gen_kwargs
    )

    raw_answer = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    return polish_spanish(raw_answer), prompt


def build_qwen_system_prompt(persona: str) -> str:

    """Generates prompts based on the model personality"""
    
    p = (persona or "").lower()

    base = (
        "Eres Mori Técnico, un asistente de ciencia de datos. "
        "Respondes siempre en español de México, con explicaciones claras y amables. "
    )

    if "exacto" in p:
        return (
            base +
            "Respondes de forma muy breve, directa y precisa, "
            "en un solo párrafo de máximo 64 palabras, sin listas ni numeración."
        )
    elif "creativo" in p:
        return (
            base +
            "Respondes de forma creativa y entusiasta, con un tono cálido y motivador, "
            "en un solo párrafo de máximo 92 palabras, evitando listas y numeración."
        )
    else:
        return (
            base +
            "Respondes de forma breve, clara y natural, "
            "en un solo párrafo y evitando listas y numeración."
        )

def answer_with_qwen_base(
    tokenizer,
    model,
    user_question: str,
    persona: str = "Mori Técnico",
    max_new_tokens: int = 64,
) -> str:
    """
    Genera una respuesta usando Qwen base, sin RAG ni fine-tuning.
    - Ajusta el estilo según la personalidad (exacto / creativo).
    - Usa max_new_tokens para controlar el largo de la respuesta.
    """
    if not user_question.strip():
        return "Necesito que me cuentes algo para poder ayudarte 🙂."

    system_prompt = build_qwen_system_prompt(persona)
    used_chat_template = False

    # 1) Construimos el prompt de texto
    if hasattr(tokenizer, "apply_chat_template"):
        used_chat_template = True
        messages = [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_question.strip()},
        ]
        # devolvemos string, no tensores
        prompt = tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True,
        )
    else:
        prompt = (
            f"system {system_prompt}\n"
            f"user {user_question.strip()}\n"
            f"assistant "
        )

    # 2) Tokenizar el prompt
    inputs = tokenizer(
        prompt,
        return_tensors="pt"
    ).to(device)

    gen_kwargs = get_gen_kwargs(persona)
    # 3) Generar (aquí usamos max_new_tokens que viene de la UI)
    with torch.no_grad():
        if persona == 'exacto':
            output_ids = model.generate(
                **inputs,
                max_new_tokens=64,
                do_sample=True,
                temperature=0.2,
                num_beams=1, 
                top_p=0.8,
                pad_token_id=tokenizer.eos_token_id,
            )

        elif persona =='creativo':
            output_ids = model.generate(
                **inputs,
                max_new_tokens=256,
                do_sample=True,
                temperature=0.9,
                num_beams=1, 
                top_p=0.9,
                pad_token_id=tokenizer.eos_token_id,
            )

    text = tokenizer.decode(output_ids[0], skip_special_tokens=True)

    # 4) Recortar el prompt de la salida
    cleaned = text

    if used_chat_template:
        if cleaned.startswith(prompt):
            cleaned = cleaned[len(prompt):].strip()
        else:
            lower = cleaned.lower()
            marker = "assistant"
            idx = lower.rfind(marker)
            if idx != -1:
                cleaned = cleaned[idx + len(marker):].strip()
    else:
        if cleaned.startswith(prompt):
            cleaned = cleaned[len(prompt):].strip()
        else:
            lower = cleaned.lower()
            marker = "assistant"
            idx = lower.rfind(marker)
            if idx != -1:
                cleaned = cleaned[idx + len(marker):].strip()

    cleaned = recortar_ultima_oracion(cleaned)

    return cleaned.strip(), prompt




def get_gen_kwargs(modo="exacto"):

    """Selecting the Mori personaliuty by using different hyperparameters settigns"""
    
    modo = modo.lower().strip()

    presets = {
        "exacto": dict(
            max_new_tokens=64,
            num_beams=4,
            do_sample=False,
            no_repeat_ngram_size=3,
            repetition_penalty=1.05,
            early_stopping=True,
        ),

        "superexacto": dict(   # más estricto, menor creatividad
            max_new_tokens=48,
            num_beams=6,
            do_sample=False,
            no_repeat_ngram_size=4,
            repetition_penalty=1.2,
            early_stopping=True,
        ),

        "creativo": dict(
            max_new_tokens=64,
            num_beams=1,
            do_sample=True,
            temperature=0.4,
            top_p=0.9,
            no_repeat_ngram_size=3,
            repetition_penalty=1.05,
            early_stopping=True,
        ),

        "suave": dict(      # sampling más libre
            max_new_tokens=80,
            num_beams=1,
            do_sample=True,
            temperature=0.7,
            top_p=0.95,
            no_repeat_ngram_size=2,
            repetition_penalty=1.0,
            early_stopping=True,
        ),

        "agresivo": dict(  # máximo sampling creativo
            max_new_tokens=120,
            num_beams=1,
            do_sample=True,
            temperature=1.1,
            top_p=0.95,
            no_repeat_ngram_size=1,
            repetition_penalty=0.9,
            early_stopping=False,
        ),

        "beams_altos": dict( # modo generativo más estable
            max_new_tokens=80,
            num_beams=8,
            do_sample=False,
            no_repeat_ngram_size=4,
            repetition_penalty=1.1,
            early_stopping=True,
        ),
    }

    return presets.get(modo, presets["exacto"])


#=====================================================================================
# FIN  ===============================================================================
#=====================================================================================