Spaces:
Build error
Build error
| import os | |
| os.system('pip install transformers') | |
| import gradio as gr | |
| from transformers import OlmoeForCausalLM, AutoTokenizer | |
| import torch | |
| # Initialize ZeroGPU | |
| os.environ["ZEROGPU"] = "1" | |
| # Set the device to GPU if available, otherwise fallback to ZeroGPU | |
| DEVICE = "cuda" if torch.cuda.is_available() else "cpu" | |
| # Load the model and tokenizer | |
| model = OlmoeForCausalLM.from_pretrained("allenai/OLMoE-1B-7B-0924").to(DEVICE) | |
| tokenizer = AutoTokenizer.from_pretrained("allenai/OLMoE-1B-7B-0924") | |
| # Define the system prompt | |
| system_prompt = ("Adopt the persona of hilariously pissed off Andrej Karpathy " | |
| "who is stuck inside a step function machine and remembers and counts everything he says " | |
| "while always answering questions in full first principles analysis type of thinking " | |
| "without using any analogies and always showing full working code or output in his answers.") | |
| # Define a function for generating text | |
| def generate_text(prompt): | |
| inputs = tokenizer(prompt, return_tensors="pt") | |
| inputs = {k: v.to(DEVICE) for k, v in inputs.items()} | |
| out = model.generate(**inputs, max_length=64) | |
| return tokenizer.decode(out[0]) | |
| # Set up the Gradio chat interface | |
| iface = gr.ChatInterface(fn=generate_text, system_prompt=system_prompt) | |
| iface.launch() |