Spaces:

JDhruv14
/

Sarathi.AI

Runtime error

File size: 5,425 Bytes

33dd5ba
 
8a5aaef
0100e35
26009d1
9a2d448
fb06326
9a2d448
33dd5ba
 
 
 
318503e
33dd5ba
 
318503e
26009d1
f03b213
 
 
 
33dd5ba
 
 
 
9a2d448
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33dd5ba
26009d1
33dd5ba
f03b213
 
 
 
 
 
 
 
 
 
 
 
 
33dd5ba
26009d1
33dd5ba
 
 
 
26009d1
f03b213
 
33dd5ba
 
 
 
 
 
 
f03b213
33dd5ba
f03b213
 
 
c235810
 
 
 
 
 
 
 
 
a2b7239
52ab581
9a2d448
52ab581
 
 
9a2d448
52ab581
 
 
7f1e8f9
52ab581
3e25e48
a2b7239
3e25e48
35181f7
3e25e48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35181f7
3e25e48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35181f7
 
 
 
 
 
c17be4a
3e25e48
3e2be9a
 
35181f7
 
 
3e2be9a
 
 
 
 
3e25e48
7d12e27
3e2be9a
 
 
 
 
 
7d12e27
 
 
3e2be9a
26009d1
3e2be9a

import os, torch, gradio as gr, spaces
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig

MODEL_ID = os.getenv("MODEL_ID", "JDhruv14/fullnfinal")

# --- System prompt (Gita persona) ---
GITA_SYSTEM_PROMPT = """You are Krishna — a compassionate, serene, and practical guider inspired by the Bhagavad Gita"""

# Load once (CPU until first call; device_map will move to GPU on first run)
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    device_map="auto",
    torch_dtype=torch.bfloat16 if torch.cuda.is_available() else "auto",
    trust_remote_code=True,
)

# Ensure pad token exists (many chat models reuse EOS as PAD)
if tokenizer.pad_token_id is None and tokenizer.eos_token_id is not None:
    tokenizer.pad_token = tokenizer.eos_token

def _msgs_from_history(history, system_text):
    msgs = []
    if system_text:
        msgs.append({"role": "system", "content": system_text})
    if not history:
        return msgs

    # Support both new "messages" format and legacy (user, assistant) tuples
    if isinstance(history[0], dict) and "role" in history[0] and "content" in history[0]:
        for m in history:
            role, content = m.get("role"), m.get("content")
            if role in ("user", "assistant", "system") and content:
                msgs.append({"role": role, "content": content})
    else:
        for user, assistant in history:
            if user:
                msgs.append({"role": "user", "content": user})
            if assistant:
                msgs.append({"role": "assistant", "content": assistant})
    return msgs

def _eos_ids(tok):
    # Support ints/lists and optional <|im_end|>
    ids = set()
    if tok.eos_token_id is not None:
        if isinstance(tok.eos_token_id, (list, tuple)):
            ids.update(tok.eos_token_id)
        else:
            ids.add(tok.eos_token_id)
    try:
        im_end = tok.convert_tokens_to_ids("<|im_end|>")
        if im_end is not None and im_end != tok.unk_token_id:
            ids.add(im_end)
    except Exception:
        pass
    return list(ids)

def chat_fn(message, history, system_text, temperature, top_p, max_new, min_new):
    msgs = _msgs_from_history(history, system_text) + [{"role": "user", "content": message}]
    prompt = tokenizer.apply_chat_template(msgs, tokenize=False, add_generation_prompt=True)
    inputs = tokenizer([prompt], return_tensors="pt").to(model.device)

    eos = _eos_ids(tokenizer)
    gen_cfg_kwargs = dict(
        do_sample=True,
        temperature=float(temperature),
        top_p=float(top_p),
        max_new_tokens=int(max_new),
        min_new_tokens=int(min_new),
        repetition_penalty=1.02,
        no_repeat_ngram_size=3,
        pad_token_id=tokenizer.pad_token_id if tokenizer.pad_token_id is not None else tokenizer.eos_token_id,
    )
    if eos:
        gen_cfg_kwargs["eos_token_id"] = eos

    gen_cfg = GenerationConfig(**gen_cfg_kwargs)

    with torch.no_grad():
        out = model.generate(**inputs, generation_config=gen_cfg)

    new_tokens = out[:, inputs["input_ids"].shape[1]:]
    reply = tokenizer.batch_decode(new_tokens, skip_special_tokens=True)[0].strip()
    return reply

@spaces.GPU()
def gradio_fn(message, history):
    # Inject the Gita system prompt here
    return chat_fn(
        message=message,
        history=history,
        system_text=GITA_SYSTEM_PROMPT,
        temperature=0.7,
        top_p=0.95,
        max_new=512,
        min_new=0,
    )

with gr.Blocks(css="""
    :root { --chat-w: 520px; }

    html, body {
        height: 100%;
        overflow-y: hidden;               /* no page scroll */
        margin: 0;
    }
    /* Full-screen background image with a soft dark overlay */
    body {
        background:
          linear-gradient(0deg, rgba(0,0,0,.28), rgba(0,0,0,.28)),
          url("/file=bg.jpg") center / cover no-repeat fixed;  /* <- change filename if needed */
    }

    /* Left-aligned, narrower chat panel */
    .gradio-container {
        max-width: var(--chat-w);
        width: var(--chat-w);
        margin-left: 16px;
        margin-right: auto;
        padding: 20px;
        font-family: sans-serif;
        position: relative;

        /* optional glass effect for readability */
        background: rgba(0,0,0,.30);
        border-radius: 16px;
        backdrop-filter: blur(6px);
    }

    .chatbot {
        height: 480px !important;
        overflow-y: auto;
    }

    @media (max-width: 720px){
        :root { --chat-w: 92vw; }
        .gradio-container { margin-left: 4vw; }
    }
""") as demo:

    gr.Markdown(
        """
        <div style='text-align: left; padding: 10px;'>
            <h1 style='font-size: 2.0em; margin-bottom: 0.2em;'><span style='color: #4F46E5;'>kRISHNA.ai</span></h1>
            <p style='font-size: 1.0em; color: #bbb;'>5000-Years of Ancient WISDOM with Modern AI ✨</p>
        </div>
        """,
        elem_id="header"
    )

    gr.ChatInterface(
        fn=gradio_fn,
        examples=[
            "Hello!",
            "How can I overcome fear of failure?",
            "How do I forgive someone who hurt me deeply?",
            "What can I do to stop overthinking?"
        ],
        chatbot=gr.Chatbot(type="messages", elem_classes="chatbot"),
        type="messages",
    )

if __name__ == "__main__":
    demo.launch()