import gradio as gr import torch from transformers import pipeline print("Loading Dawn-superlite...") # The 'trust_remote_code=True' argument is required for this specific model # because it uses a custom architecture defined in the HF repository. pipe = pipeline( "text-generation", model="Dawn-AI/Dawn-Superlite-llama", device_map="auto", torch_dtype=torch.bfloat16, trust_remote_code=True, model_kwargs={ "tie_word_embeddings": False } ) print("Model loaded successfully!") def chat_with_dawn(message, history, system_prompt): messages = [] # Add system prompt if provided if system_prompt.strip(): messages.append({"role": "system", "content": system_prompt}) # Build history for the chat template for user_msg, assistant_msg in history: messages.append({"role": "user", "content": user_msg}) messages.append({"role": "assistant", "content": assistant_msg}) # Append the new user message messages.append({"role": "user", "content": message}) # Generate response # Using a temperature > 0 and top_p for more natural reasoning responses response = pipe( messages, max_new_tokens=512, truncation=True, do_sample=True, temperature=0.7, top_p=0.9 ) # Extract the text from the returned message object generated_text = response[0]['generated_text'] # The pipeline usually returns the full list of messages including the new one if isinstance(generated_text, list): return generated_text[-1]['content'] return str(generated_text) # Define the UI components system_textbox = gr.Textbox( value="You are Dawn, a brilliant reasoning AI.", label="System Prompt", lines=2, interactive=True ) # Launch the Gradio Interface demo = gr.ChatInterface( fn=chat_with_dawn, additional_inputs=[system_textbox], title="🌅 Dawn Superlite", description="Running Dawn-superlite. Note: Performance depends on your hardware (CPU vs GPU).", examples=["Explain quantum entanglement like I'm five.", "Write a Python script to reverse a string."] ) if __name__ == "__main__": demo.launch()