Spaces:

JDhruv14
/

Sarathi.AI

Runtime error

App Files Files Community

JDhruv14 commited on Oct 1

Commit

e8c693f

verified ·

1 Parent(s): e51e513

Update app.py

Browse files

Files changed (1) hide show

app.py +103 -76

app.py CHANGED Viewed

@@ -1,94 +1,121 @@
-import os, torch, gradio as gr, spaces
-from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
-from peft import PeftModel
-# ---- IDs (can override from Space Secrets) ----
-BASE_ID    = os.getenv("BASE_ID",    "Qwen/Qwen2.5-3B-Instruct")
-ADAPTER_ID = os.getenv("ADAPTER_ID", "JDhruv14/Gita-FT-v2-Qwen2.5-3B")
-# ---- Load tokenizer & base model ----
-tokenizer = AutoTokenizer.from_pretrained(BASE_ID, trust_remote_code=True)
-model = AutoModelForCausalLM.from_pretrained(
-    BASE_ID,
     device_map="auto",
-    torch_dtype=torch.bfloat16 if torch.cuda.is_available() else "auto",
-    trust_remote_code=True,
 )
-# Apply LoRA adapter
-model = PeftModel.from_pretrained(model, ADAPTER_ID)
-model.eval()
-def _eos_ids(tok):
-    ids = {tok.eos_token_id}
-    im_end = tok.convert_tokens_to_ids("<|im_end|>")
-    if im_end is not None:
-        ids.add(im_end)
-    return list(ids)
-def _format_history(history, system_text):
-    msgs = []
-    if system_text:
-        msgs.append({"role": "system", "content": system_text})
-    for user, assistant in history:
-        if user:
-            msgs.append({"role": "user", "content": user})
-        if assistant:
-            msgs.append({"role": "assistant", "content": assistant})
-    return msgs
-@spaces.GPU(duration=120)   # keep for ZeroGPU; remove this decorator if using a normal GPU Space
-def chat_fn(message, history, system_text, temperature, top_p, max_new_tokens, min_new_tokens):
-    msgs = _format_history(history, system_text) + [{"role": "user", "content": message}]
-    prompt = tokenizer.apply_chat_template(msgs, tokenize=False, add_generation_prompt=True)
-    inputs = tokenizer([prompt], return_tensors="pt").to(model.device)
-    gen_cfg = GenerationConfig(
-        do_sample=True,
-        temperature=float(temperature),
-        top_p=float(top_p),
-        max_new_tokens=int(max_new_tokens),
-        min_new_tokens=int(min_new_tokens),
-        repetition_penalty=1.02,
-        no_repeat_ngram_size=3,
-        eos_token_id=_eos_ids(tokenizer),
-        pad_token_id=tokenizer.eos_token_id,
-    )
     with torch.no_grad():
-        outputs = model.generate(**inputs, generation_config=gen_cfg)
-    # show only the assistant reply (slice off the prompt)
-    new_tokens = outputs[:, inputs["input_ids"].shape[1]:]
-    reply = tokenizer.batch_decode(new_tokens, skip_special_tokens=True)[0].strip()
-    return reply
-with gr.Blocks() as demo:
-    gr.Markdown(
-        "<h1 style='text-align:center'>Gita Assistant (Qwen2.5-3B + LoRA)</h1>"
-        "<p style='text-align:center'>Ask in English / हिंदी / ગુજરાતી. The assistant cites verses when relevant.</p>"
-    )
-    system_box  = gr.Textbox(
-        value="Reply in the user’s language with 2–3 concrete points (200–400 words); cite Gita verses when relevant.",
-        label="System prompt",
     )
-    temperature = gr.Slider(0.1, 1.2, value=0.7, step=0.05, label="temperature")
-    top_p       = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="top_p")
-    max_new     = gr.Slider(64, 1024, value=512, step=16, label="max_new_tokens")
-    min_new     = gr.Slider(0, 512, value=160, step=8, label="min_new_tokens")
-    gr.ChatInterface(
-        fn=chat_fn,  # def chat_fn(message, history, system_text, temperature, top_p, max_new, min_new)
-        additional_inputs=[system_box, temperature, top_p, max_new, min_new],
-        chatbot=gr.Chatbot(height=520, type="tuples"),  # keep tuple history; no behavior change
         examples=[
-            ["How do I practice Nishkama Karma at work?", system_box.value, 0.7, 0.9, 512, 160],
-            ["What does 3.19 teach about duty without attachment?", system_box.value, 0.7, 0.9, 512, 160],
-            ["How to overcome fear of failure according to the Gita?", system_box.value, 0.7, 0.9, 512, 160],
         ],
     )
 if __name__ == "__main__":
     demo.launch()

+import torch
+torch._dynamo.config.disable = True
+from collections.abc import Iterator
+from transformers import (
+    Gemma3ForConditionalGeneration,
+    TextIteratorStreamer,
+    Gemma3Processor,
+    Gemma3nForConditionalGeneration,
+)
+import gradio as gr
+import os
+import spaces
+# Load environment variables
+model_3n_id = os.getenv("MODEL_3N_ID", "JDhruv14/merged_model")
+# Load model and processor
+model_3n = Gemma3nForConditionalGeneration.from_pretrained(
+    model_3n_id,
+    dtype=torch.bfloat16,
     device_map="auto",
+    attn_implementation="eager"
 )
+input_processor = Gemma3Processor.from_pretrained(model_3n_id)
+def infer_text(messages, max_new_tokens=300, temperature=1.0, top_p=0.95, top_k=64, repetition_penalty=1.1):
+    chat_template = []
+    for turn in messages:
+        if turn[0]:
+            chat_template.append({"role": "user", "content": [{"type": "text", "text": turn[0]}]})
+        if turn[1]:
+            chat_template.append({"role": "assistant", "content": [{"type": "text", "text": turn[1]}]})
+    chat_template.append({"role": "assistant", "content": [{"type": "text", "text": ""}]})
+    inputs = input_processor.apply_chat_template(
+        chat_template,
+        add_generation_prompt=True,
+        tokenize=True,
+        return_dict=True,
+        return_tensors="pt",
+    ).to(device=model_3n.device, dtype=torch.bfloat16)
     with torch.no_grad():
+        output_tokens = model_3n.generate(
+            **inputs,
+            max_new_tokens=max_new_tokens,
+            temperature=temperature,
+            top_p=top_p,
+            top_k=top_k,
+            repetition_penalty=repetition_penalty,
+            do_sample=True,
+        )
+    generated_text = input_processor.batch_decode(
+        output_tokens[:, inputs['input_ids'].shape[1]:], skip_special_tokens=True
+    )[0]
+    return generated_text.strip()
+@spaces.GPU()
+def gradio_fn(message, history):
+    response = infer_text(history + [(message, None)])
+    return response
+with gr.Blocks(css="""
+    .gradio-container {
+        max-width: 600px;
+        margin: auto;
+        padding: 20px;
+        font-family: sans-serif;
+        position: relative;
+        }
+    .chatbot {
+        height: 500px !important;
+        overflow-y: auto;
+        }
+    .corner {
+       position: fixed;
+       bottom: 2px;
+       z-index: 9999;
+       pointer-events: none;
+        }
+    #left { left: 2px; }
+    #right { right: 2px; }
+    .corner img {
+       height: 500px;  /* fixed height */
+       width: auto;    /* auto to keep aspect ratio */
+        }
+    """) as demo:
+    gr.Markdown(
+    """
+        <div style='text-align: center; padding: 10px;'>
+        <h1 style='font-size: 2.2em; margin-bottom: 0.2em;'>🤖 <span style='color: #4F46E5;'>kRISHNA.ai</span></h1>
+        <p style='font-size: 1.1em; color: #555;'>5000-Years of Ancient WISDOM with Modern AI ✨</p>
+        </div>
+    """,
+    elem_id="header"
     )
+    chat = gr.ChatInterface(
+        fn=gradio_fn,
         examples=[
+            "Hello!",
+            "How can I overcome fear of failure?",
+            "How do I forgive someone who hurt me deeply?",
+            "What can I do to stop overthinking?"
         ],
+        chatbot=gr.Chatbot(elem_classes="chatbot"),
+        theme="compact",
     )
+    gr.HTML(f"""
+      <div id="left" class="corner">
+        <img src="https://huggingface.co/spaces/p2kalita/kRISHNA.ai/resolve/main/assets/Arjun.png" alt="Arjun">
+      </div>
+      <div id="right" class="corner">
+        <img src="https://huggingface.co/spaces/p2kalita/kRISHNA.ai/resolve/main/assets/Krishna.png" alt="Krishna">
+      </div>
+    """)
 if __name__ == "__main__":
     demo.launch()