Spaces:

Ultronprime
/

Emails2go

Build error

App Files Files Community

Ultronprime commited on Jan 22, 2025

Commit

37162ec

verified ·

1 Parent(s): 7818f69

Update app.py

Browse files

Files changed (1) hide show

app.py +89 -82

app.py CHANGED Viewed

@@ -2,129 +2,136 @@ import gradio as gr
 import spaces
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
 css = """
 footer {visibility: hidden}
-.message-wrap {padding: 10px}
-.assistant-message pre {background-color: #f6f8fa; padding: 12px; border-radius: 8px}
 """
 model_name = "ngxson/MiniThinky-v2-1B-Llama-3.2"
 device = "cuda" if torch.cuda.is_available() else "cpu"
-# Initialize tokenizer and model
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForCausalLM.from_pretrained(
-    model_name,
-    torch_dtype=torch.float16,
-    device_map="auto"
-)
-EXAMPLES = [
-    "Solve the equation x^2 - 3x + 2 = 0",
-    "Lily is three times older than her son. In 15 years, she will be twice as old as him. How old is she now?",
-    "Write python code to compute the nth fibonacci number."
-]
-def format_message(message, history):
-    base_prompt = "You are MiniThinky, a helpful AI assistant. You always think before giving the answer. Use <|thinking|> before thinking and <|answer|> before giving the answer."
-    # Format conversation history
-    messages = [{"role": "system", "content": base_prompt}]
-    # Add conversation history
-    for human, assistant in history:
-        messages.append({"role": "user", "content": human})
-        messages.append({"role": "assistant", "content": assistant})
-    # Add current message
     messages.append({"role": "user", "content": message})
-    return tokenizer.apply_chat_template(messages, tokenize=False)
-@spaces.GPU(duration=60)
-def generate(message, history):
     try:
-        # Format prompt with history
-        prompt = format_message(message, history)
-        # Encode prompt
-        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048)
-        inputs = inputs.to(device)
-        # Generate response
         outputs = model.generate(
             **inputs,
             max_new_tokens=512,
-            do_sample=True,
             temperature=0.7,
-            top_p=0.9,
-            repetition_penalty=1.2,
-            pad_token_id=tokenizer.eos_token_id
         )
-        # Decode response
-        response = tokenizer.decode(outputs[0], skip_special_tokens=False)
         response = response.split(message)[-1].strip()
-        # Split thinking and answer parts
-        thinking = ""
-        answer = response
-        if "<|thinking|>" in response:
-            parts = response.split("<|thinking|>", 1)
-            if len(parts) > 1:
-                thinking = parts[1].split("<|answer|>")[0].strip()
-                answer = parts[1].split("<|answer|>")[1].strip()
-        # Format final response
-        final_response = f"🤔 Thinking:\n{thinking}\n\n✨ Answer:\n{answer}"
-        return final_response
     except Exception as e:
-        return f"Error: {str(e)}"
 with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
-    gr.HTML(
-        """
-        <div style='text-align: center'>
-            <h1>MiniThinky Chat Assistant</h1>
-            <p>A helpful AI assistant that thinks before answering.</p>
-        </div>
-        """
-    )
     chatbot = gr.Chatbot(
-        label="Conversation",
-        height=500,
     )
     with gr.Row():
         txt = gr.Textbox(
             placeholder="Type your message here...",
-            show_label=False,
             scale=4
         )
-        btn = gr.Button("Send", scale=1)
-    clear = gr.ClearButton([txt, chatbot])
-    # Example buttons
     with gr.Row():
-        for example in EXAMPLES:
-            gr.Button(example).click(
-                lambda msg: gr.update(value=msg),
-                [example],
-                [txt]
-            )
-    txt.submit(generate, [txt, chatbot], [chatbot]).then(
-        lambda: "", None, [txt]
-    )
-    btn.click(generate, [txt, chatbot], [chatbot]).then(
-        lambda: "", None, [txt]
-    )
 if __name__ == "__main__":
-    demo.queue(max_size=20, api_open=False).launch()

 import spaces
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
+import re
 css = """
 footer {visibility: hidden}
+.message-wrap {max-width: 900px}
+.bot {background-color: #f7f7f8}
+.user {background-color: white}
+.message {padding: 20px; margin: 10px}
+.thinking {color: #666; font-style: italic; border-left: 3px solid #666; padding-left: 10px; margin: 10px 0}
+.answer {margin-top: 10px}
 """
 model_name = "ngxson/MiniThinky-v2-1B-Llama-3.2"
 device = "cuda" if torch.cuda.is_available() else "cpu"
+try:
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    model = AutoModelForCausalLM.from_pretrained(
+        model_name,
+        torch_dtype=torch.float16,
+        device_map="auto"
+    )
+except Exception as e:
+    print(f"Error loading model: {e}")
+    raise gr.Error("Failed to load model. Please try again later.")
+SYSTEM_MESSAGE = "You are MiniThinky, a helpful AI assistant. You always think before giving the answer. Use <|thinking|> before thinking and <|answer|> before giving the answer."
+def parse_response(text):
+    """Parse thinking and answer from response"""
+    # Extract thinking part
+    thinking_match = re.search(r'<\|thinking\|>(.*?)(?=<\|answer\|>|$)', text, re.DOTALL)
+    thinking = thinking_match.group(1).strip() if thinking_match else ""
+    # Extract answer part
+    answer_match = re.search(r'<\|answer\|>(.*?)$', text, re.DOTALL)
+    answer = answer_match.group(1).strip() if answer_match else text.strip()
+    return thinking, answer
+def format_message(text):
+    """Format message with thinking and answer sections"""
+    thinking, answer = parse_response(text)
+    formatted = []
+    if thinking:
+        formatted.append(f'<div class="thinking">{thinking}</div>')
+    if answer:
+        formatted.append(f'<div class="answer">{answer}</div>')
+    return "\n".join(formatted)
+@spaces.GPU(duration=60)
+def generate_response(message, history):
+    messages = [{"role": "system", "content": SYSTEM_MESSAGE}]
+    # Add history to context
+    for user_msg, bot_msg in history:
+        messages.append({"role": "user", "content": user_msg})
+        messages.append({"role": "assistant", "content": bot_msg})
     messages.append({"role": "user", "content": message})
+    # Format prompt
+    prompt = tokenizer.apply_chat_template(messages, tokenize=False)
     try:
+        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048).to(device)
         outputs = model.generate(
             **inputs,
             max_new_tokens=512,
             temperature=0.7,
+            do_sample=True,
+            top_p=0.95,
+            pad_token_id=tokenizer.eos_token_id,
         )
+        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
         response = response.split(message)[-1].strip()
+        # Format response for display
+        formatted_response = format_message(response)
+        torch.cuda.empty_cache()
+        return formatted_response
     except Exception as e:
+        print(f"Error: {e}")
+        return "[Error occurred during generation]"
 with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
+    gr.HTML("""
+        <h1 style="text-align: center; margin-bottom: 1rem">
+            MiniThinky Chat Assistant
+        </h1>
+    """)
     chatbot = gr.Chatbot(
+        bubble=True,
+        height=600,
+        container=True,
+        show_copy_button=True
     )
     with gr.Row():
         txt = gr.Textbox(
             placeholder="Type your message here...",
+            container=False,
             scale=4
         )
+        submit_btn = gr.Button("Send", scale=1, variant="primary")
     with gr.Row():
+        clear_btn = gr.ClearButton([txt, chatbot], value="Clear chat")
+    with gr.Accordion("Examples", open=False):
+        gr.Examples(
+            examples=[
+                "Solve the equation x^2 - 3x + 2 = 0",
+                "Lily is three times older than her son. In 15 years, she will be twice as old as him. How old is she now?",
+                "Write python code to compute the nth fibonacci number.",
+            ],
+            inputs=txt
+        )
+    def respond(message, chat_history):
+        bot_message = generate_response(message, chat_history)
+        chat_history.append((message, bot_message))
+        return "", chat_history
+    txt.submit(respond, [txt, chatbot], [txt, chatbot])
+    submit_btn.click(respond, [txt, chatbot], [txt, chatbot])
 if __name__ == "__main__":
+    demo.queue(max_size=20).launch()