Spaces:

SBK
/

chat

Sleeping

App Files Files Community

SBK commited on Jul 17, 2025

Commit

9885321

verified ·

1 Parent(s): 4f5c655

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -18

app.py CHANGED Viewed

@@ -6,9 +6,12 @@ import threading
 # === Model loading ===
 model_path = "SBK/sbk-llm-1"  # Using your HF model
 tokenizer = AutoTokenizer.from_pretrained(model_path)
-model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32)
 device = "cuda" if torch.cuda.is_available() else "cpu"
-model.to(device)
 # === System prompt / default behavior ===
 SYSTEM_PROMPT = """You are a helpful, honest, and factual assistant trained to answer only about me *Saptarshi Bhattacharya*. You were fine-tuned on factual data derived from his work, projects, skills, internships, and engineering experiences.
@@ -24,7 +27,7 @@ Your job is to help users understand what Saptarshi has done, what he's good at,
 Your goal is to represent him truthfully and make his work accessible and understandable to potential collaborators or employers, without overselling or faking.
 """
-BLOCKED_KEYWORDS = ["kill", "harm", "violence", "bomb", "suicide"]  # simple guardrail
 MAX_TOKENS = 512
 # === Streaming generation ===
@@ -64,33 +67,55 @@ def generate_response(history, system_prompt):
         yield partial_message
 # === Gradio interface ===
-with gr.Blocks() as demo:
-    gr.Markdown("## 🧠 Chat with SBK LLM")
-    system_prompt = gr.Textbox(label="System Prompt", value=SYSTEM_PROMPT, lines=2)
-    chatbot = gr.Chatbot()  # Using default tuple format
-    msg = gr.Textbox(label="Your Message", placeholder="Ask me anything...", lines=1)
-    clear = gr.Button("Clear")
-    history = gr.State([])  # memory for session
     def respond(user_message, chat_history, system_prompt):
-        # Append user message to history
-        chat_history.append((user_message, ""))
-        # Generate response
         full_response = ""
         for response in generate_response(chat_history, system_prompt):
             full_response = response
             chat_history[-1] = (user_message, full_response)
             yield chat_history
-        return "", chat_history
     msg.submit(
         respond,
         [msg, chatbot, system_prompt],
-        [chatbot, history],
     )
-    clear.click(lambda: ([], []), outputs=[chatbot, history])
-demo.queue().launch(share=True)

 # === Model loading ===
 model_path = "SBK/sbk-llm-1"  # Using your HF model
 tokenizer = AutoTokenizer.from_pretrained(model_path)
+model = AutoModelForCausalLM.from_pretrained(
+    model_path,
+    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+    device_map="auto"
+)
 device = "cuda" if torch.cuda.is_available() else "cpu"
 # === System prompt / default behavior ===
 SYSTEM_PROMPT = """You are a helpful, honest, and factual assistant trained to answer only about me *Saptarshi Bhattacharya*. You were fine-tuned on factual data derived from his work, projects, skills, internships, and engineering experiences.
 Your goal is to represent him truthfully and make his work accessible and understandable to potential collaborators or employers, without overselling or faking.
 """
+BLOCKED_KEYWORDS = ["kill", "harm", "violence", "bomb", "suicide"]
 MAX_TOKENS = 512
 # === Streaming generation ===
         yield partial_message
 # === Gradio interface ===
+with gr.Blocks(title="SBK LLM Chat") as demo:
+    gr.Markdown("## � Chat with SBK LLM - Professional Portfolio Assistant")
+    with gr.Row():
+        with gr.Column(scale=1):
+            system_prompt = gr.Textbox(label="System Instructions", value=SYSTEM_PROMPT, lines=8)
+        with gr.Column(scale=3):
+            chatbot = gr.Chatbot(height=400)
+            msg = gr.Textbox(label="Your Message", placeholder="Ask about Saptarshi's professional experience...", lines=2)
+            with gr.Row():
+                submit_btn = gr.Button("Submit")
+                clear_btn = gr.Button("Clear Chat")
+    history = gr.State([])
     def respond(user_message, chat_history, system_prompt):
+        chat_history = chat_history + [(user_message, "")]
         full_response = ""
         for response in generate_response(chat_history, system_prompt):
             full_response = response
             chat_history[-1] = (user_message, full_response)
             yield chat_history
+        return chat_history
+    # Connect components
     msg.submit(
         respond,
         [msg, chatbot, system_prompt],
+        [chatbot],
+        queue=True
+    )
+    submit_btn.click(
+        respond,
+        [msg, chatbot, system_prompt],
+        [chatbot],
+        queue=True
+    )
+    clear_btn.click(
+        lambda: ([], []),
+        outputs=[chatbot, history],
+        queue=False
     )
+# Launch with sharing enabled
+demo.queue(max_size=20).launch(
+    share=True,
+    server_name="0.0.0.0",
+    server_port=7860,
+    show_error=True
+)