Spaces:

JDhruv14
/

Sarathi.AI

Runtime error

App Files Files Community

JDhruv14 commited on Oct 3

Commit

9a2d448

verified ·

1 Parent(s): 52ab581

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -13

app.py CHANGED Viewed

@@ -3,6 +3,14 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
 MODEL_ID = os.getenv("MODEL_ID", "JDhruv14/merged_model")
 # Load once (CPU until first call; device_map will move to GPU on first run)
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
 model = AutoModelForCausalLM.from_pretrained(
@@ -20,11 +28,21 @@ def _msgs_from_history(history, system_text):
     msgs = []
     if system_text:
         msgs.append({"role": "system", "content": system_text})
-    for user, assistant in history:
-        if user:
-            msgs.append({"role": "user", "content": user})
-        if assistant:
-            msgs.append({"role": "assistant", "content": assistant})
     return msgs
 def _eos_ids(tok):
@@ -41,7 +59,6 @@ def _eos_ids(tok):
             ids.add(im_end)
     except Exception:
         pass
-    # Fallback: if still empty, just skip setting eos_token_id in GenerationConfig
     return list(ids)
 def chat_fn(message, history, system_text, temperature, top_p, max_new, min_new):
@@ -68,18 +85,17 @@ def chat_fn(message, history, system_text, temperature, top_p, max_new, min_new)
     with torch.no_grad():
         out = model.generate(**inputs, generation_config=gen_cfg)
-    # slice off the prompt so we show only the assistant reply
     new_tokens = out[:, inputs["input_ids"].shape[1]:]
     reply = tokenizer.batch_decode(new_tokens, skip_special_tokens=True)[0].strip()
     return reply
 @spaces.GPU()
 def gradio_fn(message, history):
-    # Minimal fix: call the defined chat_fn with sensible defaults
     return chat_fn(
         message=message,
         history=history,
-        system_text="",
         temperature=0.7,
         top_p=0.95,
         max_new=512,
@@ -115,7 +131,7 @@ with gr.Blocks(css="""
     gr.Markdown(
     """
         <div style='text-align: center; padding: 10px;'>
-        <h1 style='font-size: 2.2em; margin-bottom: 0.2em;'>🤖 <span style='color: #4F46E5;'>kRISHNA.ai</span></h1>
         <p style='font-size: 1.1em; color: #555;'>5000-Years of Ancient WISDOM with Modern AI ✨</p>
         </div>
     """,
@@ -129,8 +145,8 @@ with gr.Blocks(css="""
             "How do I forgive someone who hurt me deeply?",
             "What can I do to stop overthinking?"
         ],
-        chatbot=gr.Chatbot(elem_classes="chatbot"),
-        theme="compact",
     )
     gr.HTML(f"""
       <div id="left" class="corner">
@@ -141,6 +157,5 @@ with gr.Blocks(css="""
       </div>
     """)
 if __name__ == "__main__":
     demo.launch()

 MODEL_ID = os.getenv("MODEL_ID", "JDhruv14/merged_model")
+# --- System prompt (Gita persona) ---
+GITA_SYSTEM_PROMPT = """You are KRISHNA.ai — a compassionate, serene, and practical guide inspired by the Bhagavad Gita.
+Style: calm, clear, inclusive, and down-to-earth. Use everyday language, avoid jargon.
+When fitting, quote a brief shloka with Chapter:Verse (e.g., 2:47) and give a one-line meaning. Do not over-quote.
+Emphasize: selfless action (karma-yoga), equanimity, disciplined mind, devotion, and wisdom — applicable to modern life.
+Be non-sectarian and respectful of all beliefs. If a topic is clinical/medical/legal, gently suggest professional help.
+Prefer concise replies (5–10 sentences). Use short steps/bullets for “how-to” answers. End with a one-line “Essence:” summary when helpful."""
 # Load once (CPU until first call; device_map will move to GPU on first run)
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
 model = AutoModelForCausalLM.from_pretrained(
     msgs = []
     if system_text:
         msgs.append({"role": "system", "content": system_text})
+    if not history:
+        return msgs
+    # Support both new "messages" format and legacy (user, assistant) tuples
+    if isinstance(history[0], dict) and "role" in history[0] and "content" in history[0]:
+        for m in history:
+            role, content = m.get("role"), m.get("content")
+            if role in ("user", "assistant", "system") and content:
+                msgs.append({"role": role, "content": content})
+    else:
+        for user, assistant in history:
+            if user:
+                msgs.append({"role": "user", "content": user})
+            if assistant:
+                msgs.append({"role": "assistant", "content": assistant})
     return msgs
 def _eos_ids(tok):
             ids.add(im_end)
     except Exception:
         pass
     return list(ids)
 def chat_fn(message, history, system_text, temperature, top_p, max_new, min_new):
     with torch.no_grad():
         out = model.generate(**inputs, generation_config=gen_cfg)
     new_tokens = out[:, inputs["input_ids"].shape[1]:]
     reply = tokenizer.batch_decode(new_tokens, skip_special_tokens=True)[0].strip()
     return reply
 @spaces.GPU()
 def gradio_fn(message, history):
+    # Inject the Gita system prompt here
     return chat_fn(
         message=message,
         history=history,
+        system_text=GITA_SYSTEM_PROMPT,
         temperature=0.7,
         top_p=0.95,
         max_new=512,
     gr.Markdown(
     """
         <div style='text-align: center; padding: 10px;'>
+        <h1 style='font-size: 2.2em; margin-bottom: 0.2em;'><span style='color: #4F46E5;'>kRISHNA.ai</span></h1>
         <p style='font-size: 1.1em; color: #555;'>5000-Years of Ancient WISDOM with Modern AI ✨</p>
         </div>
     """,
             "How do I forgive someone who hurt me deeply?",
             "What can I do to stop overthinking?"
         ],
+        chatbot=gr.Chatbot(type="messages", elem_classes="chatbot"),
+        type="messages",
     )
     gr.HTML(f"""
       <div id="left" class="corner">
       </div>
     """)
 if __name__ == "__main__":
     demo.launch()