Spaces:

Ultronprime
/

Emails2go

Build error

App Files Files Community

Ultronprime commited on Jan 22, 2025

Commit

7818f69

verified ·

1 Parent(s): 5cfc354

Update app.py

Browse files

Files changed (1) hide show

app.py +86 -84

app.py CHANGED Viewed

@@ -3,126 +3,128 @@ import spaces
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
-# Add CSS for footer hiding and styling
 css = """
-footer {
-    visibility: hidden;
-}
-.container {max-width: 850px; margin: auto; padding: 20px}
-.title {text-align: center; margin-bottom: 20px}
 """
-# Model initialization
 model_name = "ngxson/MiniThinky-v2-1B-Llama-3.2"
 device = "cuda" if torch.cuda.is_available() else "cpu"
-try:
-    tokenizer = AutoTokenizer.from_pretrained(model_name)
-    model = AutoModelForCausalLM.from_pretrained(
-        model_name,
-        torch_dtype=torch.float16,
-        device_map="auto"
-    )
-except Exception as e:
-    print(f"Error loading model: {e}")
-    raise gr.Error("Failed to load model. Please try again later.")
-SYSTEM_MESSAGE = "You are MiniThinky, a helpful AI assistant. You always think before giving the answer. Use <|thinking|> before thinking and <|answer|> before giving the answer."
-def format_chat_prompt(messages):
-    formatted_messages = [{"role": "system", "content": SYSTEM_MESSAGE}]
-    formatted_messages.extend(messages)
-    return tokenizer.apply_chat_template(
-        formatted_messages,
-        tokenize=False,
-        add_generation_prompt=True
-    )
 @spaces.GPU(duration=60)
-def generate_response(message, history, progress=gr.Progress(track_tqdm=True)):
-    if not message.strip():
-        return "", history
     try:
-        # Format messages including history
-        messages = []
-        for user_msg, assistant_msg in history:
-            messages.append({"role": "user", "content": user_msg})
-            messages.append({"role": "assistant", "content": assistant_msg})
-        messages.append({"role": "user", "content": message})
-        # Format prompt
-        prompt = format_chat_prompt(messages)
-        # Tokenize
-        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048).to(device)
-        # Generate
         outputs = model.generate(
             **inputs,
             max_new_tokens=512,
-            temperature=0.7,
             do_sample=True,
-            pad_token_id=tokenizer.eos_token_id,
         )
-        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        # Extract response after the last user message
         response = response.split(message)[-1].strip()
-        # Clear GPU memory
-        torch.cuda.empty_cache()
-        return response
-    except Exception as e:
-        print(f"Error during generation: {e}")
-        return "[Error: Generation failed. Please try again.]", history
-def respond(message, chat_history):
-    try:
-        bot_message = generate_response(message, chat_history)
-        chat_history.append((message, bot_message))
-        return "", chat_history
     except Exception as e:
-        raise gr.Error(str(e))
-# Gradio Interface
 with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
     gr.HTML(
         """
-        <div class="title">
             <h1>MiniThinky Chat Assistant</h1>
-            <p>A helpful AI assistant that thinks before answering</p>
         </div>
         """
     )
-    with gr.Column(elem_id="col-container"):
-        chatbot = gr.Chatbot(height=400)
-        with gr.Row():
-            msg = gr.Textbox(
-                placeholder="Type your message here...",
-                container=False,
-                scale=4
-            )
-            submit = gr.Button("Submit", scale=1)
-        clear = gr.ClearButton([msg, chatbot], value="🗑️ Clear Chat")
-        with gr.Accordion("Examples", open=False):
-            gr.Examples(
-                examples=[
-                    "What is the capital of France?",
-                    "Explain quantum computing in simple terms",
-                    "Write a short poem about AI",
-                ],
-                inputs=msg
             )
-    msg.submit(respond, [msg, chatbot], [msg, chatbot], queue=True)
-    submit.click(respond, [msg, chatbot], [msg, chatbot], queue=True)
 if __name__ == "__main__":
     demo.queue(max_size=20, api_open=False).launch()

 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
 css = """
+footer {visibility: hidden}
+.message-wrap {padding: 10px}
+.assistant-message pre {background-color: #f6f8fa; padding: 12px; border-radius: 8px}
 """
 model_name = "ngxson/MiniThinky-v2-1B-Llama-3.2"
 device = "cuda" if torch.cuda.is_available() else "cpu"
+# Initialize tokenizer and model
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    torch_dtype=torch.float16,
+    device_map="auto"
+)
+EXAMPLES = [
+    "Solve the equation x^2 - 3x + 2 = 0",
+    "Lily is three times older than her son. In 15 years, she will be twice as old as him. How old is she now?",
+    "Write python code to compute the nth fibonacci number."
+]
+def format_message(message, history):
+    base_prompt = "You are MiniThinky, a helpful AI assistant. You always think before giving the answer. Use <|thinking|> before thinking and <|answer|> before giving the answer."
+    # Format conversation history
+    messages = [{"role": "system", "content": base_prompt}]
+    # Add conversation history
+    for human, assistant in history:
+        messages.append({"role": "user", "content": human})
+        messages.append({"role": "assistant", "content": assistant})
+    # Add current message
+    messages.append({"role": "user", "content": message})
+    return tokenizer.apply_chat_template(messages, tokenize=False)
 @spaces.GPU(duration=60)
+def generate(message, history):
     try:
+        # Format prompt with history
+        prompt = format_message(message, history)
+        # Encode prompt
+        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048)
+        inputs = inputs.to(device)
+        # Generate response
         outputs = model.generate(
             **inputs,
             max_new_tokens=512,
             do_sample=True,
+            temperature=0.7,
+            top_p=0.9,
+            repetition_penalty=1.2,
+            pad_token_id=tokenizer.eos_token_id
         )
+        # Decode response
+        response = tokenizer.decode(outputs[0], skip_special_tokens=False)
         response = response.split(message)[-1].strip()
+        # Split thinking and answer parts
+        thinking = ""
+        answer = response
+        if "<|thinking|>" in response:
+            parts = response.split("<|thinking|>", 1)
+            if len(parts) > 1:
+                thinking = parts[1].split("<|answer|>")[0].strip()
+                answer = parts[1].split("<|answer|>")[1].strip()
+        # Format final response
+        final_response = f"🤔 Thinking:\n{thinking}\n\n✨ Answer:\n{answer}"
+        return final_response
     except Exception as e:
+        return f"Error: {str(e)}"
 with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
     gr.HTML(
         """
+        <div style='text-align: center'>
             <h1>MiniThinky Chat Assistant</h1>
+            <p>A helpful AI assistant that thinks before answering.</p>
         </div>
         """
     )
+    chatbot = gr.Chatbot(
+        label="Conversation",
+        height=500,
+    )
+    with gr.Row():
+        txt = gr.Textbox(
+            placeholder="Type your message here...",
+            show_label=False,
+            scale=4
+        )
+        btn = gr.Button("Send", scale=1)
+    clear = gr.ClearButton([txt, chatbot])
+    # Example buttons
+    with gr.Row():
+        for example in EXAMPLES:
+            gr.Button(example).click(
+                lambda msg: gr.update(value=msg),
+                [example],
+                [txt]
             )
+    txt.submit(generate, [txt, chatbot], [chatbot]).then(
+        lambda: "", None, [txt]
+    )
+    btn.click(generate, [txt, chatbot], [chatbot]).then(
+        lambda: "", None, [txt]
+    )
 if __name__ == "__main__":
     demo.queue(max_size=20, api_open=False).launch()