Spaces:

Ultronprime
/

Emails2go

Build error

App Files Files Community

Ultronprime commited on Jan 22, 2025

Commit

4b6929f

verified ·

1 Parent(s): 017c2f0

Update app.py

Browse files

Files changed (1) hide show

app.py +124 -42

app.py CHANGED Viewed

@@ -1,46 +1,128 @@
 import gradio as gr
 import spaces
-from transformers import pipeline
-@spaces.GPU()
-def generate_text(input_text, history):
-    # Initialize the pipeline with specified parameters
-    pipe = pipeline("text-generation", model="ngxson/MiniThinky-v2-1B-Llama-3.2", max_new_tokens=1024, temperature=0.7, do_sample=True)
-    # Prepare the system message
-    system_message = "You are MiniThinky, a helpful AI assistant. You always think before giving the answer. Use <|thinking|> before thinking and <|answer|> before giving the answer."
-    # Format the input messages
-    messages = [
-        {"role": "system", "content": system_message},
-        {"role": "user", "content": input_text}
-    ]
-    # Generate response
-    response = pipe(messages, max_new_tokens=1024, temperature=0.7, do_sample=True)
-    # Extract the generated text
-    response_text = response[0]["generated_text"]
-    # Append user and assistant messages to history
-    history.append({"role": "user", "content": input_text})
-    history.append({"role": "assistant", "content": response_text})
-    # Return updated history
-    return history
-# Define the Gradio interface
-demo = gr.Interface(
-    fn=generate_text,
-    inputs=[
-        gr.Textbox(label="Enter your text"),
-        gr.JSON(value=[], visible=False)
-    ],
-    outputs=[
-        gr.Chatbot(type='messages')
-    ],
-    title="MiniThinky Text Generator"
-)
-# Launch the interface
-demo.launch()

 import gradio as gr
 import spaces
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+# Add CSS for footer hiding and styling
+css = """
+footer {
+    visibility: hidden;
+}
+.container {max-width: 850px; margin: auto; padding: 20px}
+.title {text-align: center; margin-bottom: 20px}
+"""
+# Model initialization
+model_name = "ngxson/MiniThinky-v2-1B-Llama-3.2"
+device = "cuda" if torch.cuda.is_available() else "cpu"
+try:
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    model = AutoModelForCausalLM.from_pretrained(
+        model_name,
+        torch_dtype=torch.float16,
+        device_map="auto"
+    )
+except Exception as e:
+    print(f"Error loading model: {e}")
+    raise gr.Error("Failed to load model. Please try again later.")
+SYSTEM_MESSAGE = "You are MiniThinky, a helpful AI assistant. You always think before giving the answer. Use <|thinking|> before thinking and <|answer|> before giving the answer."
+def format_chat_prompt(messages):
+    formatted_messages = [{"role": "system", "content": SYSTEM_MESSAGE}]
+    formatted_messages.extend(messages)
+    return tokenizer.apply_chat_template(
+        formatted_messages,
+        tokenize=False,
+        add_generation_prompt=True
+    )
+@spaces.GPU(duration=60)
+def generate_response(message, history, progress=gr.Progress(track_tqdm=True)):
+    if not message.strip():
+        return "", history
+    try:
+        # Format messages including history
+        messages = []
+        for user_msg, assistant_msg in history:
+            messages.append({"role": "user", "content": user_msg})
+            messages.append({"role": "assistant", "content": assistant_msg})
+        messages.append({"role": "user", "content": message})
+        # Format prompt
+        prompt = format_chat_prompt(messages)
+        # Tokenize
+        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048).to(device)
+        # Generate
+        outputs = model.generate(
+            **inputs,
+            max_new_tokens=512,
+            temperature=0.7,
+            do_sample=True,
+            pad_token_id=tokenizer.eos_token_id,
+        )
+        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        # Extract response after the last user message
+        response = response.split(message)[-1].strip()
+        # Clear GPU memory
+        torch.cuda.empty_cache()
+        return response
+    except Exception as e:
+        print(f"Error during generation: {e}")
+        return "[Error: Generation failed. Please try again.]", history
+def respond(message, chat_history):
+    try:
+        bot_message = generate_response(message, chat_history)
+        chat_history.append((message, bot_message))
+        return "", chat_history
+    except Exception as e:
+        raise gr.Error(str(e))
+# Gradio Interface
+with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
+    gr.HTML(
+        """
+        <div class="title">
+            <h1>MiniThinky Chat Assistant</h1>
+            <p>A helpful AI assistant that thinks before answering</p>
+        </div>
+        """
+    )
+    with gr.Column(elem_id="col-container"):
+        chatbot = gr.Chatbot(height=400)
+        with gr.Row():
+            msg = gr.Textbox(
+                placeholder="Type your message here...",
+                container=False,
+                scale=4
+            )
+            submit = gr.Button("Submit", scale=1)
+        clear = gr.ClearButton([msg, chatbot], value="🗑️ Clear Chat")
+        with gr.Accordion("Examples", open=False):
+            gr.Examples(
+                examples=[
+                    "What is the capital of France?",
+                    "Explain quantum computing in simple terms",
+                    "Write a short poem about AI",
+                ],
+                inputs=msg
+            )
+    msg.submit(respond, [msg, chatbot], [msg, chatbot], queue=True)
+    submit.click(respond, [msg, chatbot], [msg, chatbot], queue=True)
+if __name__ == "__main__":
+    demo.queue(max_size=20, api_open=False).launch()