Spaces:
Running
Running
| [ | |
| { | |
| "query": "What is Python?", | |
| "complexity": 0.05, | |
| "tier": "trivial", | |
| "domain": "factual" | |
| }, | |
| { | |
| "query": "What is the capital of France?", | |
| "complexity": 0.04, | |
| "tier": "trivial", | |
| "domain": "factual" | |
| }, | |
| { | |
| "query": "Who invented the telephone?", | |
| "complexity": 0.05, | |
| "tier": "trivial", | |
| "domain": "factual" | |
| }, | |
| { | |
| "query": "What does HTTP stand for?", | |
| "complexity": 0.06, | |
| "tier": "trivial", | |
| "domain": "factual" | |
| }, | |
| { | |
| "query": "Translate 'hello' to Spanish", | |
| "complexity": 0.07, | |
| "tier": "trivial", | |
| "domain": "translation" | |
| }, | |
| { | |
| "query": "What year was Python created?", | |
| "complexity": 0.04, | |
| "tier": "trivial", | |
| "domain": "factual" | |
| }, | |
| { | |
| "query": "Is Java object-oriented?", | |
| "complexity": 0.07, | |
| "tier": "trivial", | |
| "domain": "factual" | |
| }, | |
| { | |
| "query": "What is RAM?", | |
| "complexity": 0.05, | |
| "tier": "trivial", | |
| "domain": "factual" | |
| }, | |
| { | |
| "query": "Summarize this paragraph: The quick brown fox...", | |
| "complexity": 0.18, | |
| "tier": "easy", | |
| "domain": "summarization" | |
| }, | |
| { | |
| "query": "Explain what a variable is in programming", | |
| "complexity": 0.15, | |
| "tier": "easy", | |
| "domain": "code" | |
| }, | |
| { | |
| "query": "What is the difference between a list and a tuple in Python?", | |
| "complexity": 0.22, | |
| "tier": "easy", | |
| "domain": "code" | |
| }, | |
| { | |
| "query": "Write a simple hello world in JavaScript", | |
| "complexity": 0.2, | |
| "tier": "easy", | |
| "domain": "code" | |
| }, | |
| { | |
| "query": "What is recursion? Give a simple example", | |
| "complexity": 0.25, | |
| "tier": "easy", | |
| "domain": "code" | |
| }, | |
| { | |
| "query": "Translate this paragraph to French", | |
| "complexity": 0.2, | |
| "tier": "easy", | |
| "domain": "translation" | |
| }, | |
| { | |
| "query": "Summarize the key points of agile methodology", | |
| "complexity": 0.25, | |
| "tier": "easy", | |
| "domain": "summarization" | |
| }, | |
| { | |
| "query": "Explain binary search with a code example", | |
| "complexity": 0.4, | |
| "tier": "medium", | |
| "domain": "code" | |
| }, | |
| { | |
| "query": "Write a Python function to check if a number is prime", | |
| "complexity": 0.38, | |
| "tier": "medium", | |
| "domain": "code" | |
| }, | |
| { | |
| "query": "Compare REST and GraphQL APIs", | |
| "complexity": 0.42, | |
| "tier": "medium", | |
| "domain": "reasoning" | |
| }, | |
| { | |
| "query": "Explain the CAP theorem in distributed systems", | |
| "complexity": 0.48, | |
| "tier": "medium", | |
| "domain": "reasoning" | |
| }, | |
| { | |
| "query": "Write SQL to find duplicate rows in a table", | |
| "complexity": 0.4, | |
| "tier": "medium", | |
| "domain": "code" | |
| }, | |
| { | |
| "query": "Explain gradient descent step by step", | |
| "complexity": 0.5, | |
| "tier": "medium", | |
| "domain": "math" | |
| }, | |
| { | |
| "query": "What is the time complexity of quicksort? Explain with an example", | |
| "complexity": 0.45, | |
| "tier": "medium", | |
| "domain": "code" | |
| }, | |
| { | |
| "query": "Design a rate limiter for an API", | |
| "complexity": 0.62, | |
| "tier": "hard", | |
| "domain": "code" | |
| }, | |
| { | |
| "query": "Implement a LRU cache in Python", | |
| "complexity": 0.6, | |
| "tier": "hard", | |
| "domain": "code" | |
| }, | |
| { | |
| "query": "Explain the Transformer architecture in detail", | |
| "complexity": 0.7, | |
| "tier": "hard", | |
| "domain": "science" | |
| }, | |
| { | |
| "query": "Write a comprehensive tutorial on Docker and Kubernetes", | |
| "complexity": 0.68, | |
| "tier": "hard", | |
| "domain": "code" | |
| }, | |
| { | |
| "query": "Analyze the pros and cons of microservices vs monoliths", | |
| "complexity": 0.65, | |
| "tier": "hard", | |
| "domain": "reasoning" | |
| }, | |
| { | |
| "query": "Derive the backpropagation equations from first principles", | |
| "complexity": 0.8, | |
| "tier": "hard", | |
| "domain": "math" | |
| }, | |
| { | |
| "query": "Design the Paxos consensus algorithm", | |
| "complexity": 0.92, | |
| "tier": "expert", | |
| "domain": "reasoning" | |
| }, | |
| { | |
| "query": "Prove that P ≠ NP (or outline the key open problems)", | |
| "complexity": 0.98, | |
| "tier": "expert", | |
| "domain": "math" | |
| }, | |
| { | |
| "query": "Design a distributed SQL database from scratch", | |
| "complexity": 0.95, | |
| "tier": "expert", | |
| "domain": "code" | |
| }, | |
| { | |
| "query": "Implement a compiler for a simple language in Python", | |
| "complexity": 0.9, | |
| "tier": "expert", | |
| "domain": "code" | |
| }, | |
| { | |
| "query": "Explain quantum entanglement and Bell's inequality with math", | |
| "complexity": 0.88, | |
| "tier": "expert", | |
| "domain": "science" | |
| }, | |
| { | |
| "query": "Write a full-stack web app with React and FastAPI", | |
| "complexity": 0.85, | |
| "tier": "expert", | |
| "domain": "code" | |
| }, | |
| { | |
| "query": "Analyze the ethical implications of AI in healthcare", | |
| "complexity": 0.72, | |
| "tier": "hard", | |
| "domain": "reasoning" | |
| }, | |
| { | |
| "query": "Compare BERT and GPT architectures in depth", | |
| "complexity": 0.75, | |
| "tier": "hard", | |
| "domain": "science" | |
| }, | |
| { | |
| "query": "What is async/await in Python?", | |
| "complexity": 0.28, | |
| "tier": "easy", | |
| "domain": "code" | |
| }, | |
| { | |
| "query": "Explain SOLID principles with examples", | |
| "complexity": 0.45, | |
| "tier": "medium", | |
| "domain": "code" | |
| }, | |
| { | |
| "query": "Write a regex to validate email addresses", | |
| "complexity": 0.3, | |
| "tier": "easy", | |
| "domain": "code" | |
| }, | |
| { | |
| "query": "Design a URL shortener system like bit.ly", | |
| "complexity": 0.7, | |
| "tier": "hard", | |
| "domain": "code" | |
| }, | |
| { | |
| "query": "Implement a red-black tree in C++", | |
| "complexity": 0.8, | |
| "tier": "hard", | |
| "domain": "code" | |
| }, | |
| { | |
| "query": "Explain Bayesian inference with an example", | |
| "complexity": 0.65, | |
| "tier": "hard", | |
| "domain": "math" | |
| }, | |
| { | |
| "query": "Write a neural network from scratch in numpy", | |
| "complexity": 0.82, | |
| "tier": "expert", | |
| "domain": "code" | |
| }, | |
| { | |
| "query": "Translate this legal document to Spanish", | |
| "complexity": 0.35, | |
| "tier": "medium", | |
| "domain": "translation" | |
| }, | |
| { | |
| "query": "Summarize this 50-page research paper", | |
| "complexity": 0.45, | |
| "tier": "medium", | |
| "domain": "summarization" | |
| }, | |
| { | |
| "query": "Debate the pros and cons of nuclear energy", | |
| "complexity": 0.6, | |
| "tier": "hard", | |
| "domain": "reasoning" | |
| }, | |
| { | |
| "query": "Write a creative short story about time travel", | |
| "complexity": 0.42, | |
| "tier": "medium", | |
| "domain": "creative" | |
| }, | |
| { | |
| "query": "Explain what a closure is in JavaScript", | |
| "complexity": 0.3, | |
| "tier": "easy", | |
| "domain": "code" | |
| }, | |
| { | |
| "query": "What is the difference between TCP and UDP?", | |
| "complexity": 0.22, | |
| "tier": "easy", | |
| "domain": "factual" | |
| }, | |
| { | |
| "query": "Prove the Pythagorean theorem", | |
| "complexity": 0.55, | |
| "tier": "medium", | |
| "domain": "math" | |
| }, | |
| { | |
| "query": "What is Python?", | |
| "complexity": 0.05, | |
| "tier": "trivial", | |
| "domain": "factual", | |
| "model": "gpt-3.5-turbo" | |
| }, | |
| { | |
| "query": "What is the capital of France?", | |
| "complexity": 0.04, | |
| "tier": "trivial", | |
| "domain": "factual", | |
| "model": "claude-3-haiku-20240307" | |
| }, | |
| { | |
| "query": "Who invented the telephone?", | |
| "complexity": 0.05, | |
| "tier": "trivial", | |
| "domain": "factual", | |
| "model": "llama3.2:3b" | |
| }, | |
| { | |
| "query": "What does HTTP stand for?", | |
| "complexity": 0.06, | |
| "tier": "trivial", | |
| "domain": "factual", | |
| "model": "gemini-1.5-flash" | |
| }, | |
| { | |
| "query": "Translate 'hello' to Spanish", | |
| "complexity": 0.07, | |
| "tier": "trivial", | |
| "domain": "translation", | |
| "model": "mistral-small-latest" | |
| }, | |
| { | |
| "query": "What year was Python created?", | |
| "complexity": 0.04, | |
| "tier": "trivial", | |
| "domain": "factual", | |
| "model": "gpt-4o-mini" | |
| }, | |
| { | |
| "query": "Is Java object-oriented?", | |
| "complexity": 0.07, | |
| "tier": "trivial", | |
| "domain": "factual", | |
| "model": "llama3.1:8b" | |
| }, | |
| { | |
| "query": "What is RAM?", | |
| "complexity": 0.05, | |
| "tier": "trivial", | |
| "domain": "factual", | |
| "model": "claude-3-5-haiku-20241022" | |
| }, | |
| { | |
| "query": "What is 2+2?", | |
| "complexity": 0.02, | |
| "tier": "trivial", | |
| "domain": "math", | |
| "model": "gpt-3.5-turbo" | |
| }, | |
| { | |
| "query": "Name one planet in our solar system", | |
| "complexity": 0.03, | |
| "tier": "trivial", | |
| "domain": "science", | |
| "model": "llama3.2:3b" | |
| }, | |
| { | |
| "query": "Summarize this: 'The quick brown fox jumps over the lazy dog.'", | |
| "complexity": 0.18, | |
| "tier": "easy", | |
| "domain": "summarization", | |
| "model": "gpt-4o-mini" | |
| }, | |
| { | |
| "query": "Explain what a variable is in programming", | |
| "complexity": 0.15, | |
| "tier": "easy", | |
| "domain": "code", | |
| "model": "claude-3-haiku-20240307" | |
| }, | |
| { | |
| "query": "What is the difference between a list and a tuple in Python?", | |
| "complexity": 0.22, | |
| "tier": "easy", | |
| "domain": "code", | |
| "model": "gemini-1.5-flash" | |
| }, | |
| { | |
| "query": "Write a simple hello world in JavaScript", | |
| "complexity": 0.2, | |
| "tier": "easy", | |
| "domain": "code", | |
| "model": "mistral-small-latest" | |
| }, | |
| { | |
| "query": "What is recursion? Give a simple example", | |
| "complexity": 0.25, | |
| "tier": "easy", | |
| "domain": "code", | |
| "model": "llama3.1:8b" | |
| }, | |
| { | |
| "query": "Translate 'Good morning, how are you?' to French", | |
| "complexity": 0.2, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "gpt-4o-mini" | |
| }, | |
| { | |
| "query": "Summarize the key points of agile methodology", | |
| "complexity": 0.25, | |
| "tier": "easy", | |
| "domain": "summarization", | |
| "model": "claude-3-5-haiku-20241022" | |
| }, | |
| { | |
| "query": "Solve for x: 2x + 5 = 15", | |
| "complexity": 0.12, | |
| "tier": "easy", | |
| "domain": "math", | |
| "model": "gpt-3.5-turbo" | |
| }, | |
| { | |
| "query": "What is photosynthesis? Explain briefly", | |
| "complexity": 0.17, | |
| "tier": "easy", | |
| "domain": "science", | |
| "model": "gemini-1.5-flash" | |
| }, | |
| { | |
| "query": "Write a haiku about spring", | |
| "complexity": 0.21, | |
| "tier": "easy", | |
| "domain": "creative", | |
| "model": "llama3.1:70b" | |
| }, | |
| { | |
| "query": "Why is the sky blue?", | |
| "complexity": 0.18, | |
| "tier": "easy", | |
| "domain": "reasoning", | |
| "model": "mistral-large-latest" | |
| }, | |
| { | |
| "query": "Explain binary search with a code example", | |
| "complexity": 0.4, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "deepseek-chat" | |
| }, | |
| { | |
| "query": "Write a Python function to check if a number is prime", | |
| "complexity": 0.38, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "claude-3-5-sonnet-20241022" | |
| }, | |
| { | |
| "query": "Compare REST and GraphQL APIs", | |
| "complexity": 0.42, | |
| "tier": "medium", | |
| "domain": "reasoning", | |
| "model": "gpt-4o" | |
| }, | |
| { | |
| "query": "Explain the CAP theorem in distributed systems", | |
| "complexity": 0.48, | |
| "tier": "medium", | |
| "domain": "reasoning", | |
| "model": "gemini-1.5-pro" | |
| }, | |
| { | |
| "query": "Write SQL to find duplicate rows in a table", | |
| "complexity": 0.4, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "llama3.1:70b" | |
| }, | |
| { | |
| "query": "Explain gradient descent step by step", | |
| "complexity": 0.5, | |
| "tier": "medium", | |
| "domain": "math", | |
| "model": "deepseek-chat" | |
| }, | |
| { | |
| "query": "What is the time complexity of quicksort? Explain with an example", | |
| "complexity": 0.45, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "gpt-4o" | |
| }, | |
| { | |
| "query": "Translate this technical document abstract to German", | |
| "complexity": 0.35, | |
| "tier": "medium", | |
| "domain": "translation", | |
| "model": "claude-3-5-sonnet-20241022" | |
| }, | |
| { | |
| "query": "Summarize the plot of '1984' by George Orwell", | |
| "complexity": 0.37, | |
| "tier": "medium", | |
| "domain": "summarization", | |
| "model": "mistral-large-latest" | |
| }, | |
| { | |
| "query": "Write a short story about a robot learning to dream", | |
| "complexity": 0.44, | |
| "tier": "medium", | |
| "domain": "creative", | |
| "model": "gpt-4o" | |
| }, | |
| { | |
| "query": "Explain the theory of relativity in simple terms", | |
| "complexity": 0.46, | |
| "tier": "medium", | |
| "domain": "science", | |
| "model": "gemini-1.5-pro" | |
| }, | |
| { | |
| "query": "What is the difference between supervised and unsupervised learning?", | |
| "complexity": 0.36, | |
| "tier": "medium", | |
| "domain": "factual", | |
| "model": "claude-3-5-haiku-20241022" | |
| }, | |
| { | |
| "query": "Solve the quadratic equation: x^2 - 5x + 6 = 0", | |
| "complexity": 0.32, | |
| "tier": "medium", | |
| "domain": "math", | |
| "model": "llama3.1:70b" | |
| }, | |
| { | |
| "query": "Design a rate limiter for an API", | |
| "complexity": 0.62, | |
| "tier": "hard", | |
| "domain": "code", | |
| "model": "gpt-4o" | |
| }, | |
| { | |
| "query": "Implement an LRU cache in Python", | |
| "complexity": 0.6, | |
| "tier": "hard", | |
| "domain": "code", | |
| "model": "claude-3-5-sonnet-20241022" | |
| }, | |
| { | |
| "query": "Explain the Transformer architecture in detail", | |
| "complexity": 0.7, | |
| "tier": "hard", | |
| "domain": "science", | |
| "model": "deepseek-chat" | |
| }, | |
| { | |
| "query": "Write a comprehensive tutorial on Docker and Kubernetes", | |
| "complexity": 0.68, | |
| "tier": "hard", | |
| "domain": "code", | |
| "model": "gemini-1.5-pro" | |
| }, | |
| { | |
| "query": "Analyze the pros and cons of microservices vs monoliths", | |
| "complexity": 0.65, | |
| "tier": "hard", | |
| "domain": "reasoning", | |
| "model": "gpt-4o" | |
| }, | |
| { | |
| "query": "Derive the backpropagation equations from first principles", | |
| "complexity": 0.8, | |
| "tier": "expert", | |
| "domain": "math", | |
| "model": "deepseek-chat" | |
| }, | |
| { | |
| "query": "Prove that the square root of 2 is irrational", | |
| "complexity": 0.58, | |
| "tier": "hard", | |
| "domain": "math", | |
| "model": "claude-3-5-sonnet-20241022" | |
| }, | |
| { | |
| "query": "Translate this legal contract summary to Japanese", | |
| "complexity": 0.55, | |
| "tier": "hard", | |
| "domain": "translation", | |
| "model": "gpt-4o" | |
| }, | |
| { | |
| "query": "Summarize the latest advances in quantum computing (2023-2024)", | |
| "complexity": 0.66, | |
| "tier": "hard", | |
| "domain": "summarization", | |
| "model": "gemini-1.5-pro" | |
| }, | |
| { | |
| "query": "Write a poem in the style of Edgar Allan Poe about AI", | |
| "complexity": 0.63, | |
| "tier": "hard", | |
| "domain": "creative", | |
| "model": "claude-3-5-sonnet-20241022" | |
| }, | |
| { | |
| "query": "Explain the ethics of autonomous weapons systems", | |
| "complexity": 0.67, | |
| "tier": "hard", | |
| "domain": "reasoning", | |
| "model": "gpt-4o" | |
| }, | |
| { | |
| "query": "What is the role of the Golgi apparatus in cells?", | |
| "complexity": 0.42, | |
| "tier": "medium", | |
| "domain": "science", | |
| "model": "mistral-large-latest" | |
| }, | |
| { | |
| "query": "Write a script to scrape a website and extract all links", | |
| "complexity": 0.52, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "deepseek-chat" | |
| }, | |
| { | |
| "query": "Explain the concept of idempotency in REST APIs", | |
| "complexity": 0.39, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "gpt-4o-mini" | |
| }, | |
| { | |
| "query": "Describe the water cycle for a 10-year-old", | |
| "complexity": 0.19, | |
| "tier": "easy", | |
| "domain": "science", | |
| "model": "llama3.2:3b" | |
| }, | |
| { | |
| "query": "What is a neural network? Give a simple analogy", | |
| "complexity": 0.24, | |
| "tier": "easy", | |
| "domain": "science", | |
| "model": "claude-3-haiku-20240307" | |
| }, | |
| { | |
| "query": "Solve: If a train travels 60 mph for 2 hours, how far does it go?", | |
| "complexity": 0.08, | |
| "tier": "trivial", | |
| "domain": "math", | |
| "model": "gpt-3.5-turbo" | |
| }, | |
| { | |
| "query": "Name three data types in Python", | |
| "complexity": 0.06, | |
| "tier": "trivial", | |
| "domain": "code", | |
| "model": "llama3.2:3b" | |
| }, | |
| { | |
| "query": "Translate 'Thank you very much' to Italian", | |
| "complexity": 0.07, | |
| "tier": "trivial", | |
| "domain": "translation", | |
| "model": "gemini-1.5-flash" | |
| }, | |
| { | |
| "query": "What is the main ingredient in guacamole?", | |
| "complexity": 0.03, | |
| "tier": "trivial", | |
| "domain": "factual", | |
| "model": "mistral-small-latest" | |
| }, | |
| { | |
| "query": "Write a for loop that prints numbers 1 to 10 in Python", | |
| "complexity": 0.16, | |
| "tier": "easy", | |
| "domain": "code", | |
| "model": "gpt-4o-mini" | |
| }, | |
| { | |
| "query": "Explain what a deadlock is in concurrency", | |
| "complexity": 0.33, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "claude-3-5-sonnet-20241022" | |
| }, | |
| { | |
| "query": "Compare socialism and capitalism", | |
| "complexity": 0.47, | |
| "tier": "medium", | |
| "domain": "reasoning", | |
| "model": "gpt-4o" | |
| }, | |
| { | |
| "query": "What is the significance of the Higgs boson discovery?", | |
| "complexity": 0.56, | |
| "tier": "medium", | |
| "domain": "science", | |
| "model": "deepseek-chat" | |
| }, | |
| { | |
| "query": "Write a SQL query to join three tables", | |
| "complexity": 0.41, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "llama3.1:70b" | |
| }, | |
| { | |
| "query": "Create a simple JavaScript function that returns the current date", | |
| "complexity": 0.14, | |
| "tier": "easy", | |
| "domain": "code", | |
| "model": "mistral-small-latest" | |
| }, | |
| { | |
| "query": "What is the Pythagorean theorem? Provide an example", | |
| "complexity": 0.23, | |
| "tier": "easy", | |
| "domain": "math", | |
| "model": "gemini-1.5-flash" | |
| }, | |
| { | |
| "query": "Summarize the main ideas of Stoic philosophy", | |
| "complexity": 0.49, | |
| "tier": "medium", | |
| "domain": "summarization", | |
| "model": "claude-3-5-haiku-20241022" | |
| }, | |
| { | |
| "query": "Write a recipe for chocolate chip cookies in poetic form", | |
| "complexity": 0.51, | |
| "tier": "medium", | |
| "domain": "creative", | |
| "model": "gpt-4o-mini" | |
| }, | |
| { | |
| "query": "Explain the difference between CPU and GPU", | |
| "complexity": 0.27, | |
| "tier": "easy", | |
| "domain": "factual", | |
| "model": "llama3.1:8b" | |
| }, | |
| { | |
| "query": "Solve: What is the derivative of x^3?", | |
| "complexity": 0.31, | |
| "tier": "medium", | |
| "domain": "math", | |
| "model": "deepseek-chat" | |
| }, | |
| { | |
| "query": "Design a simple task scheduler in Python", | |
| "complexity": 0.59, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "gpt-4o" | |
| }, | |
| { | |
| "query": "Explain the concept of 'impostor syndrome' in the workplace", | |
| "complexity": 0.34, | |
| "tier": "easy", | |
| "domain": "reasoning", | |
| "model": "claude-3-5-sonnet-20241022" | |
| }, | |
| { | |
| "query": "What are the three laws of robotics?", | |
| "complexity": 0.09, | |
| "tier": "trivial", | |
| "domain": "science", | |
| "model": "gpt-3.5-turbo" | |
| }, | |
| { | |
| "query": "Translate 'The food was delicious' to Mandarin Chinese", | |
| "complexity": 0.26, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "gemini-1.5-pro" | |
| }, | |
| { | |
| "query": "Write a CSS snippet to center a div", | |
| "complexity": 0.13, | |
| "tier": "easy", | |
| "domain": "code", | |
| "model": "mistral-small-latest" | |
| }, | |
| { | |
| "query": "Explain the Drake equation", | |
| "complexity": 0.44, | |
| "tier": "medium", | |
| "domain": "science", | |
| "model": "llama3.1:70b" | |
| }, | |
| { | |
| "query": "What is the difference between error and exception in programming?", | |
| "complexity": 0.21, | |
| "tier": "easy", | |
| "domain": "code", | |
| "model": "gpt-4o-mini" | |
| }, | |
| { | |
| "query": "Solve: 15% of 200", | |
| "complexity": 0.05, | |
| "tier": "trivial", | |
| "domain": "math", | |
| "model": "llama3.2:3b" | |
| }, | |
| { | |
| "query": "Write a haiku about debugging", | |
| "complexity": 0.28, | |
| "tier": "easy", | |
| "domain": "creative", | |
| "model": "claude-3-haiku-20240307" | |
| }, | |
| { | |
| "query": "Explain the concept of 'technical debt'", | |
| "complexity": 0.37, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "gpt-4o" | |
| }, | |
| { | |
| "query": "What are the benefits of using TypeScript over JavaScript?", | |
| "complexity": 0.29, | |
| "tier": "easy", | |
| "domain": "code", | |
| "model": "mistral-large-latest" | |
| }, | |
| { | |
| "query": "Summarize the plot of 'The Great Gatsby' in one paragraph", | |
| "complexity": 0.38, | |
| "tier": "medium", | |
| "domain": "summarization", | |
| "model": "claude-3-5-sonnet-20241022" | |
| }, | |
| { | |
| "query": "Translate the following error message to Spanish: 'File not found'", | |
| "complexity": 0.08, | |
| "tier": "trivial", | |
| "domain": "translation", | |
| "model": "gemini-1.5-flash" | |
| }, | |
| { | |
| "query": "Write a Python decorator that measures execution time", | |
| "complexity": 0.55, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "deepseek-chat" | |
| }, | |
| { | |
| "query": "Explain the concept of 'virtue ethics'", | |
| "complexity": 0.46, | |
| "tier": "medium", | |
| "domain": "reasoning", | |
| "model": "gpt-4o" | |
| }, | |
| { | |
| "query": "What is the function of the mitochondria?", | |
| "complexity": 0.11, | |
| "tier": "easy", | |
| "domain": "science", | |
| "model": "llama3.1:8b" | |
| }, | |
| { | |
| "query": "Solve for x: log(x) = 2", | |
| "complexity": 0.3, | |
| "tier": "medium", | |
| "domain": "math", | |
| "model": "gemini-1.5-pro" | |
| }, | |
| { | |
| "query": "Design a simple REST API for a todo list", | |
| "complexity": 0.57, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "claude-3-5-sonnet-20241022" | |
| }, | |
| { | |
| "query": "Explain the difference between let, const, and var in JavaScript", | |
| "complexity": 0.23, | |
| "tier": "easy", | |
| "domain": "code", | |
| "model": "gpt-4o-mini" | |
| }, | |
| { | |
| "query": "Write a limerick about a programmer", | |
| "complexity": 0.32, | |
| "tier": "easy", | |
| "domain": "creative", | |
| "model": "llama3.1:70b" | |
| }, | |
| { | |
| "query": "What is the greenhouse effect? Explain simply", | |
| "complexity": 0.19, | |
| "tier": "easy", | |
| "domain": "science", | |
| "model": "claude-3-haiku-20240307" | |
| }, | |
| { | |
| "query": "Translate 'I would like to book a flight' to French", | |
| "complexity": 0.16, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "mistral-small-latest" | |
| }, | |
| { | |
| "query": "What is the difference between a stack and a queue?", | |
| "complexity": 0.24, | |
| "tier": "easy", | |
| "domain": "code", | |
| "model": "gpt-3.5-turbo" | |
| }, | |
| { | |
| "query": "Explain the concept of 'opportunity cost' in economics", | |
| "complexity": 0.28, | |
| "tier": "easy", | |
| "domain": "reasoning", | |
| "model": "gemini-1.5-flash" | |
| }, | |
| { | |
| "query": "Write a binary search algorithm in Python", | |
| "complexity": 0.43, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "deepseek-chat" | |
| }, | |
| { | |
| "query": "Summarize the key innovations of the Renaissance", | |
| "complexity": 0.41, | |
| "tier": "medium", | |
| "domain": "summarization", | |
| "model": "claude-3-5-sonnet-20241022" | |
| }, | |
| { | |
| "query": "Solve: What is the area of a circle with radius 5?", | |
| "complexity": 0.1, | |
| "tier": "easy", | |
| "domain": "math", | |
| "model": "llama3.2:3b" | |
| }, | |
| { | |
| "query": "Create a simple HTML page with a button", | |
| "complexity": 0.12, | |
| "tier": "easy", | |
| "domain": "code", | |
| "model": "gpt-4o-mini" | |
| }, | |
| { | |
| "query": "Explain the concept of 'black swan events'", | |
| "complexity": 0.45, | |
| "tier": "medium", | |
| "domain": "reasoning", | |
| "model": "gpt-4o" | |
| }, | |
| { | |
| "query": "What is CRISPR used for?", | |
| "complexity": 0.22, | |
| "tier": "easy", | |
| "domain": "science", | |
| "model": "gemini-1.5-pro" | |
| }, | |
| { | |
| "query": "Write a regular expression to match an email address", | |
| "complexity": 0.34, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "mistral-large-latest" | |
| }, | |
| { | |
| "query": "Translate 'Where is the nearest hospital?' to German", | |
| "complexity": 0.15, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "claude-3-haiku-20240307" | |
| }, | |
| { | |
| "query": "Write a short story about a character who wakes up with amnesia", | |
| "complexity": 0.49, | |
| "tier": "medium", | |
| "domain": "creative", | |
| "model": "llama3.1:70b" | |
| }, | |
| { | |
| "query": "Explain the difference between correlation and causation", | |
| "complexity": 0.33, | |
| "tier": "medium", | |
| "domain": "reasoning", | |
| "model": "deepseek-chat" | |
| }, | |
| { | |
| "query": "What is a SAT solver used for in computer science?", | |
| "complexity": 0.52, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "gpt-4o" | |
| }, | |
| { | |
| "query": "What is an API?", | |
| "complexity": 0.06, | |
| "tier": "trivial", | |
| "domain": "factual", | |
| "model": "llama3.2:3b" | |
| }, | |
| { | |
| "query": "Who wrote 'Romeo and Juliet'?", | |
| "complexity": 0.04, | |
| "tier": "trivial", | |
| "domain": "factual", | |
| "model": "gpt-3.5-turbo" | |
| }, | |
| { | |
| "query": "What does CSS stand for?", | |
| "complexity": 0.05, | |
| "tier": "trivial", | |
| "domain": "factual", | |
| "model": "claude-3-haiku-20240307" | |
| }, | |
| { | |
| "query": "Translate 'good night' to German", | |
| "complexity": 0.06, | |
| "tier": "trivial", | |
| "domain": "translation", | |
| "model": "gemini-1.5-flash" | |
| }, | |
| { | |
| "query": "What is the square root of 64?", | |
| "complexity": 0.03, | |
| "tier": "trivial", | |
| "domain": "math", | |
| "model": "mistral-small-latest" | |
| }, | |
| { | |
| "query": "What color is the sky on a clear day?", | |
| "complexity": 0.02, | |
| "tier": "trivial", | |
| "domain": "science", | |
| "model": "llama3.1:8b" | |
| }, | |
| { | |
| "query": "What does SQL stand for?", | |
| "complexity": 0.06, | |
| "tier": "trivial", | |
| "domain": "factual", | |
| "model": "gpt-4o-mini" | |
| }, | |
| { | |
| "query": "Name one mammal that can fly", | |
| "complexity": 0.04, | |
| "tier": "trivial", | |
| "domain": "science", | |
| "model": "claude-3-5-haiku-20241022" | |
| }, | |
| { | |
| "query": "Translate 'I love you' to French", | |
| "complexity": 0.05, | |
| "tier": "trivial", | |
| "domain": "translation", | |
| "model": "llama3.2:3b" | |
| }, | |
| { | |
| "query": "What is the largest ocean on Earth?", | |
| "complexity": 0.03, | |
| "tier": "trivial", | |
| "domain": "factual", | |
| "model": "gpt-3.5-turbo" | |
| }, | |
| { | |
| "query": "Explain what a function is in programming", | |
| "complexity": 0.16, | |
| "tier": "easy", | |
| "domain": "code", | |
| "model": "gemini-1.5-flash" | |
| }, | |
| { | |
| "query": "Write a simple for loop in C++ that prints 0 to 9", | |
| "complexity": 0.19, | |
| "tier": "easy", | |
| "domain": "code", | |
| "model": "mistral-small-latest" | |
| }, | |
| { | |
| "query": "What is the difference between '==' and '===' in JavaScript?", | |
| "complexity": 0.21, | |
| "tier": "easy", | |
| "domain": "code", | |
| "model": "llama3.1:8b" | |
| }, | |
| { | |
| "query": "Translate 'The weather is nice today' to Italian", | |
| "complexity": 0.18, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "gpt-4o-mini" | |
| }, | |
| { | |
| "query": "Summarize the water cycle in two sentences", | |
| "complexity": 0.17, | |
| "tier": "easy", | |
| "domain": "summarization", | |
| "model": "claude-3-haiku-20240307" | |
| }, | |
| { | |
| "query": "Solve for y: 3y - 7 = 11", | |
| "complexity": 0.11, | |
| "tier": "easy", | |
| "domain": "math", | |
| "model": "llama3.2:3b" | |
| }, | |
| { | |
| "query": "Why do we have seasons?", | |
| "complexity": 0.19, | |
| "tier": "easy", | |
| "domain": "science", | |
| "model": "gpt-3.5-turbo" | |
| }, | |
| { | |
| "query": "Write a two-line poem about the moon", | |
| "complexity": 0.14, | |
| "tier": "easy", | |
| "domain": "creative", | |
| "model": "claude-3-5-haiku-20241022" | |
| }, | |
| { | |
| "query": "Explain why ice floats on water", | |
| "complexity": 0.2, | |
| "tier": "easy", | |
| "domain": "reasoning", | |
| "model": "gemini-1.5-pro" | |
| }, | |
| { | |
| "query": "What is a stack overflow in programming?", | |
| "complexity": 0.23, | |
| "tier": "easy", | |
| "domain": "code", | |
| "model": "mistral-large-latest" | |
| }, | |
| { | |
| "query": "Write a Python function to find the maximum of three numbers", | |
| "complexity": 0.27, | |
| "tier": "easy", | |
| "domain": "code", | |
| "model": "deepseek-chat" | |
| }, | |
| { | |
| "query": "What is the difference between a class and an object?", | |
| "complexity": 0.25, | |
| "tier": "easy", | |
| "domain": "code", | |
| "model": "gpt-4o-mini" | |
| }, | |
| { | |
| "query": "Translate 'Where is the bathroom?' to Spanish", | |
| "complexity": 0.12, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "llama3.1:70b" | |
| }, | |
| { | |
| "query": "Summarize the main idea of the movie 'Inception'", | |
| "complexity": 0.29, | |
| "tier": "easy", | |
| "domain": "summarization", | |
| "model": "claude-3-5-sonnet-20241022" | |
| }, | |
| { | |
| "query": "Solve: What is 25% of 80?", | |
| "complexity": 0.08, | |
| "tier": "trivial", | |
| "domain": "math", | |
| "model": "gpt-4o" | |
| }, | |
| { | |
| "query": "Write a short story about a lost key", | |
| "complexity": 0.35, | |
| "tier": "medium", | |
| "domain": "creative", | |
| "model": "gpt-4o" | |
| }, | |
| { | |
| "query": "Explain how a memoization works with an example in JavaScript", | |
| "complexity": 0.44, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "claude-3-5-sonnet-20241022" | |
| }, | |
| { | |
| "query": "Compare TCP and UDP protocols in detail", | |
| "complexity": 0.43, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "deepseek-chat" | |
| }, | |
| { | |
| "query": "What is the halting problem? Why is it important?", | |
| "complexity": 0.51, | |
| "tier": "medium", | |
| "domain": "reasoning", | |
| "model": "gpt-4o" | |
| }, | |
| { | |
| "query": "Derive the formula for the area of a circle", | |
| "complexity": 0.37, | |
| "tier": "medium", | |
| "domain": "math", | |
| "model": "gemini-1.5-pro" | |
| }, | |
| { | |
| "query": "Explain the second law of thermodynamics", | |
| "complexity": 0.53, | |
| "tier": "medium", | |
| "domain": "science", | |
| "model": "llama3.1:70b" | |
| }, | |
| { | |
| "query": "Write a SQL query to get the top 5 highest paid employees", | |
| "complexity": 0.39, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "mistral-large-latest" | |
| }, | |
| { | |
| "query": "Translate this business email to Japanese: 'Dear Sir, we appreciate your prompt response.'", | |
| "complexity": 0.48, | |
| "tier": "medium", | |
| "domain": "translation", | |
| "model": "gpt-4o" | |
| }, | |
| { | |
| "query": "Summarize the plot of 'The Odyssey' in 100 words", | |
| "complexity": 0.42, | |
| "tier": "medium", | |
| "domain": "summarization", | |
| "model": "claude-3-5-haiku-20241022" | |
| }, | |
| { | |
| "query": "Write a sonnet about artificial intelligence", | |
| "complexity": 0.54, | |
| "tier": "medium", | |
| "domain": "creative", | |
| "model": "deepseek-chat" | |
| }, | |
| { | |
| "query": "Explain the concept of race conditions in multithreading", | |
| "complexity": 0.46, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "gpt-4o-mini" | |
| }, | |
| { | |
| "query": "What is the difference between deep learning and traditional machine learning?", | |
| "complexity": 0.47, | |
| "tier": "medium", | |
| "domain": "science", | |
| "model": "gemini-1.5-pro" | |
| }, | |
| { | |
| "query": "Solve the integral of x^2 dx", | |
| "complexity": 0.36, | |
| "tier": "medium", | |
| "domain": "math", | |
| "model": "llama3.1:8b" | |
| }, | |
| { | |
| "query": "Implement a binary tree traversal in Python (in-order)", | |
| "complexity": 0.56, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "claude-3-5-sonnet-20241022" | |
| }, | |
| { | |
| "query": "Design a simple load balancer algorithm", | |
| "complexity": 0.58, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "gpt-4o" | |
| }, | |
| { | |
| "query": "Write a regular expression to validate a US phone number", | |
| "complexity": 0.41, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "mistral-small-latest" | |
| }, | |
| { | |
| "query": "Explain the prisoner's dilemma and its implications", | |
| "complexity": 0.49, | |
| "tier": "medium", | |
| "domain": "reasoning", | |
| "model": "deepseek-chat" | |
| }, | |
| { | |
| "query": "What is the Golden Ratio? Provide examples", | |
| "complexity": 0.34, | |
| "tier": "medium", | |
| "domain": "math", | |
| "model": "gpt-4o-mini" | |
| }, | |
| { | |
| "query": "Translate a medical prescription summary to German", | |
| "complexity": 0.52, | |
| "tier": "medium", | |
| "domain": "translation", | |
| "model": "llama3.1:70b" | |
| }, | |
| { | |
| "query": "Summarize the key arguments in Plato's 'Republic'", | |
| "complexity": 0.57, | |
| "tier": "hard", | |
| "domain": "summarization", | |
| "model": "gpt-4o" | |
| }, | |
| { | |
| "query": "Write a dark fantasy short story about a cursed mirror", | |
| "complexity": 0.64, | |
| "tier": "hard", | |
| "domain": "creative", | |
| "model": "claude-3-5-sonnet-20241022" | |
| }, | |
| { | |
| "query": "Implement a concurrent web scraper in Python with asyncio", | |
| "complexity": 0.69, | |
| "tier": "hard", | |
| "domain": "code", | |
| "model": "deepseek-chat" | |
| }, | |
| { | |
| "query": "Explain the proof of Fermat's Last Theorem at a high level", | |
| "complexity": 0.78, | |
| "tier": "hard", | |
| "domain": "math", | |
| "model": "gpt-4o" | |
| }, | |
| { | |
| "query": "Analyze the performance implications of columnar vs row-based storage", | |
| "complexity": 0.66, | |
| "tier": "hard", | |
| "domain": "reasoning", | |
| "model": "gemini-1.5-pro" | |
| }, | |
| { | |
| "query": "Describe the architecture of a distributed key-value store like DynamoDB", | |
| "complexity": 0.72, | |
| "tier": "hard", | |
| "domain": "code", | |
| "model": "llama3.1:70b" | |
| }, | |
| { | |
| "query": "What is the Bellman equation in reinforcement learning?", | |
| "complexity": 0.63, | |
| "tier": "hard", | |
| "domain": "science", | |
| "model": "mistral-large-latest" | |
| }, | |
| { | |
| "query": "Translate a complex legal disclaimer to French", | |
| "complexity": 0.61, | |
| "tier": "hard", | |
| "domain": "translation", | |
| "model": "gpt-4o" | |
| }, | |
| { | |
| "query": "Design a real-time chat system supporting 1 million concurrent users", | |
| "complexity": 0.75, | |
| "tier": "hard", | |
| "domain": "code", | |
| "model": "claude-3-5-sonnet-20241022" | |
| }, | |
| { | |
| "query": "Explain the concept of zero-knowledge proofs", | |
| "complexity": 0.7, | |
| "tier": "hard", | |
| "domain": "science", | |
| "model": "deepseek-chat" | |
| }, | |
| { | |
| "query": "Write a detailed guide on optimizing Python code with C extensions", | |
| "complexity": 0.68, | |
| "tier": "hard", | |
| "domain": "code", | |
| "model": "gpt-4o" | |
| }, | |
| { | |
| "query": "Prove that there are infinitely many prime numbers", | |
| "complexity": 0.59, | |
| "tier": "hard", | |
| "domain": "math", | |
| "model": "gemini-1.5-pro" | |
| }, | |
| { | |
| "query": "What is the 'butterfly effect' in chaos theory?", | |
| "complexity": 0.55, | |
| "tier": "medium", | |
| "domain": "science", | |
| "model": "llama3.1:8b" | |
| }, | |
| { | |
| "query": "Write a memoization decorator in Python", | |
| "complexity": 0.53, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "claude-3-5-haiku-20241022" | |
| }, | |
| { | |
| "query": "Explain the difference between a process and a thread", | |
| "complexity": 0.31, | |
| "tier": "easy", | |
| "domain": "code", | |
| "model": "gpt-4o-mini" | |
| }, | |
| { | |
| "query": "Translate 'Congratulations on your new job' to Portuguese", | |
| "complexity": 0.13, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "mistral-small-latest" | |
| }, | |
| { | |
| "query": "What is the role of the nucleus in a cell?", | |
| "complexity": 0.15, | |
| "tier": "easy", | |
| "domain": "science", | |
| "model": "gemini-1.5-flash" | |
| }, | |
| { | |
| "query": "Write a Java program to reverse a string", | |
| "complexity": 0.26, | |
| "tier": "easy", | |
| "domain": "code", | |
| "model": "llama3.2:3b" | |
| }, | |
| { | |
| "query": "Solve for x: 4x^2 - 16 = 0", | |
| "complexity": 0.28, | |
| "tier": "easy", | |
| "domain": "math", | |
| "model": "gpt-3.5-turbo" | |
| }, | |
| { | |
| "query": "Write a haiku about winter", | |
| "complexity": 0.15, | |
| "tier": "easy", | |
| "domain": "creative", | |
| "model": "claude-3-haiku-20240307" | |
| }, | |
| { | |
| "query": "Explain why the sky is red at sunset", | |
| "complexity": 0.27, | |
| "tier": "easy", | |
| "domain": "reasoning", | |
| "model": "gpt-4o-mini" | |
| }, | |
| { | |
| "query": "What is an abstract class in Java?", | |
| "complexity": 0.29, | |
| "tier": "easy", | |
| "domain": "code", | |
| "model": "mistral-large-latest" | |
| }, | |
| { | |
| "query": "Write a simple HTML form with two input fields", | |
| "complexity": 0.18, | |
| "tier": "easy", | |
| "domain": "code", | |
| "model": "llama3.1:70b" | |
| }, | |
| { | |
| "query": "Translate 'I need a doctor' to Korean", | |
| "complexity": 0.17, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "deepseek-chat" | |
| }, | |
| { | |
| "query": "Summarize the plot of 'Hamlet' in three sentences", | |
| "complexity": 0.32, | |
| "tier": "easy", | |
| "domain": "summarization", | |
| "model": "gpt-4o" | |
| }, | |
| { | |
| "query": "What is the difference between GET and POST in HTTP?", | |
| "complexity": 0.22, | |
| "tier": "easy", | |
| "domain": "code", | |
| "model": "gemini-1.5-pro" | |
| }, | |
| { | |
| "query": "Write a C program to calculate factorial using recursion", | |
| "complexity": 0.4, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "claude-3-5-sonnet-20241022" | |
| }, | |
| { | |
| "query": "Explain the concept of 'sharding' in databases", | |
| "complexity": 0.48, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "gpt-4o" | |
| }, | |
| { | |
| "query": "What is the difference between L1 and L2 regularization?", | |
| "complexity": 0.54, | |
| "tier": "medium", | |
| "domain": "science", | |
| "model": "deepseek-chat" | |
| }, | |
| { | |
| "query": "Solve the system of equations: 2x + y = 10, x - y = 2", | |
| "complexity": 0.33, | |
| "tier": "medium", | |
| "domain": "math", | |
| "model": "llama3.1:70b" | |
| }, | |
| { | |
| "query": "Write a Python script to download an image from a URL", | |
| "complexity": 0.36, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "mistral-large-latest" | |
| }, | |
| { | |
| "query": "Explain the Turing test and its criticisms", | |
| "complexity": 0.45, | |
| "tier": "medium", | |
| "domain": "reasoning", | |
| "model": "gemini-1.5-pro" | |
| }, | |
| { | |
| "query": "Translate 'The system is currently offline' to Russian", | |
| "complexity": 0.39, | |
| "tier": "medium", | |
| "domain": "translation", | |
| "model": "claude-3-5-haiku-20241022" | |
| }, | |
| { | |
| "query": "Summarize the main findings of the Human Genome Project", | |
| "complexity": 0.5, | |
| "tier": "medium", | |
| "domain": "science", | |
| "model": "gpt-4o-mini" | |
| }, | |
| { | |
| "query": "Write a villanelle about lost time", | |
| "complexity": 0.62, | |
| "tier": "hard", | |
| "domain": "creative", | |
| "model": "claude-3-5-sonnet-20241022" | |
| }, | |
| { | |
| "query": "Implement a connection pool in Go", | |
| "complexity": 0.71, | |
| "tier": "hard", | |
| "domain": "code", | |
| "model": "deepseek-chat" | |
| }, | |
| { | |
| "query": "Explain the concept of 'eventual consistency' in distributed systems", | |
| "complexity": 0.65, | |
| "tier": "hard", | |
| "domain": "reasoning", | |
| "model": "gpt-4o" | |
| }, | |
| { | |
| "query": "Derive the Black-Scholes equation for option pricing", | |
| "complexity": 0.85, | |
| "tier": "expert", | |
| "domain": "math", | |
| "model": "gpt-4o" | |
| }, | |
| { | |
| "query": "Design a distributed consensus protocol like Raft", | |
| "complexity": 0.88, | |
| "tier": "expert", | |
| "domain": "code", | |
| "model": "claude-3-5-sonnet-20241022" | |
| }, | |
| { | |
| "query": "Explain the holographic principle in theoretical physics", | |
| "complexity": 0.92, | |
| "tier": "expert", | |
| "domain": "science", | |
| "model": "deepseek-chat" | |
| }, | |
| { | |
| "query": "Write a full compiler frontend for a small language in Rust", | |
| "complexity": 0.95, | |
| "tier": "expert", | |
| "domain": "code", | |
| "model": "gemini-1.5-pro" | |
| }, | |
| { | |
| "query": "Prove the Riemann Hypothesis (outline the main approach)", | |
| "complexity": 0.99, | |
| "tier": "expert", | |
| "domain": "math", | |
| "model": "gpt-4o" | |
| }, | |
| { | |
| "query": "Analyze the security of the TLS 1.3 handshake", | |
| "complexity": 0.82, | |
| "tier": "expert", | |
| "domain": "reasoning", | |
| "model": "llama3.1:70b" | |
| }, | |
| { | |
| "query": "Implement a B+ tree index from scratch", | |
| "complexity": 0.87, | |
| "tier": "expert", | |
| "domain": "code", | |
| "model": "claude-3-5-sonnet-20241022" | |
| }, | |
| { | |
| "query": "Translate a complex patent document to Chinese", | |
| "complexity": 0.76, | |
| "tier": "hard", | |
| "domain": "translation", | |
| "model": "gpt-4o" | |
| }, | |
| { | |
| "query": "What is the difference between Bayesian and frequentist statistics?", | |
| "complexity": 0.67, | |
| "tier": "hard", | |
| "domain": "math", | |
| "model": "deepseek-chat" | |
| }, | |
| { | |
| "query": "Write a high-performance WebSocket server in C++", | |
| "complexity": 0.79, | |
| "tier": "hard", | |
| "domain": "code", | |
| "model": "gpt-4o" | |
| }, | |
| { | |
| "query": "Explain the many-worlds interpretation of quantum mechanics", | |
| "complexity": 0.73, | |
| "tier": "hard", | |
| "domain": "science", | |
| "model": "llama3.1:70b" | |
| }, | |
| { | |
| "query": "Design a URL shortening service like TinyURL (full design)", | |
| "complexity": 0.69, | |
| "tier": "hard", | |
| "domain": "code", | |
| "model": "gemini-1.5-pro" | |
| }, | |
| { | |
| "query": "Prove the central limit theorem", | |
| "complexity": 0.84, | |
| "tier": "expert", | |
| "domain": "math", | |
| "model": "claude-3-5-sonnet-20241022" | |
| }, | |
| { | |
| "query": "Write a distributed task queue using Redis", | |
| "complexity": 0.74, | |
| "tier": "hard", | |
| "domain": "code", | |
| "model": "deepseek-chat" | |
| }, | |
| { | |
| "query": "Explain the concept of 'transfer learning' in neural networks", | |
| "complexity": 0.56, | |
| "tier": "medium", | |
| "domain": "science", | |
| "model": "gpt-4o" | |
| }, | |
| { | |
| "query": "Solve the traveling salesman problem using dynamic programming", | |
| "complexity": 0.77, | |
| "tier": "hard", | |
| "domain": "code", | |
| "model": "llama3.1:70b" | |
| }, | |
| { | |
| "query": "Translate a software license agreement to Spanish", | |
| "complexity": 0.58, | |
| "tier": "hard", | |
| "domain": "translation", | |
| "model": "gpt-4o-mini" | |
| }, | |
| { | |
| "query": "Write a detailed critique of the OpenAI GPT-4 architecture", | |
| "complexity": 0.7, | |
| "tier": "hard", | |
| "domain": "reasoning", | |
| "model": "claude-3-5-sonnet-20241022" | |
| }, | |
| { | |
| "query": "What is the curse of dimensionality in machine learning?", | |
| "complexity": 0.51, | |
| "tier": "medium", | |
| "domain": "science", | |
| "model": "gemini-1.5-pro" | |
| }, | |
| { | |
| "query": "Fix this Python code: `def add(a,b): return a-b` – it should add, not subtract.", | |
| "complexity": 0.11, | |
| "tier": "easy", | |
| "domain": "code", | |
| "model": "gpt-3.5-turbo" | |
| }, | |
| { | |
| "query": "Write a Python one-liner to reverse a string.", | |
| "complexity": 0.13, | |
| "tier": "easy", | |
| "domain": "code", | |
| "model": "claude-3-haiku-20240307" | |
| }, | |
| { | |
| "query": "Why does this JavaScript code print 'undefined'? `var x; console.log(x);`", | |
| "complexity": 0.09, | |
| "tier": "trivial", | |
| "domain": "code", | |
| "model": "llama3.2:3b" | |
| }, | |
| { | |
| "query": "Implement a function `is_palindrome(s)` in Python that ignores spaces and case.", | |
| "complexity": 0.32, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "gemini-1.5-flash" | |
| }, | |
| { | |
| "query": "Debug this SQL: `SELECT * FORM users WHERE name = 'John';`", | |
| "complexity": 0.07, | |
| "tier": "trivial", | |
| "domain": "code", | |
| "model": "mistral-small-latest" | |
| }, | |
| { | |
| "query": "Write a recursive function to compute the nth Fibonacci number in Java.", | |
| "complexity": 0.34, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "gpt-4o-mini" | |
| }, | |
| { | |
| "query": "What is the output of `console.log(1 + '2' + 3)` in JavaScript? Explain.", | |
| "complexity": 0.15, | |
| "tier": "easy", | |
| "domain": "code", | |
| "model": "llama3.1:8b" | |
| }, | |
| { | |
| "query": "Write a C function to swap two integers using pointers.", | |
| "complexity": 0.28, | |
| "tier": "easy", | |
| "domain": "code", | |
| "model": "claude-3-5-haiku-20241022" | |
| }, | |
| { | |
| "query": "Fix the memory leak in this C++ snippet: `int* p = new int; p = new int; delete p;`", | |
| "complexity": 0.45, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "deepseek-chat" | |
| }, | |
| { | |
| "query": "Implement a queue using two stacks in Python.", | |
| "complexity": 0.52, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "gpt-4o" | |
| }, | |
| { | |
| "query": "Write a regex to extract all email addresses from a text.", | |
| "complexity": 0.38, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "claude-3-5-sonnet-20241022" | |
| }, | |
| { | |
| "query": "Why does this infinite loop happen? `for i in range(10): i -= 1`", | |
| "complexity": 0.21, | |
| "tier": "easy", | |
| "domain": "code", | |
| "model": "gemini-1.5-pro" | |
| }, | |
| { | |
| "query": "Write a Python decorator that caches return values of a function.", | |
| "complexity": 0.58, | |
| "tier": "hard", | |
| "domain": "code", | |
| "model": "llama3.1:70b" | |
| }, | |
| { | |
| "query": "Convert this list comprehension to a for loop: `[x**2 for x in range(10) if x%2==0]`", | |
| "complexity": 0.19, | |
| "tier": "easy", | |
| "domain": "code", | |
| "model": "gpt-3.5-turbo" | |
| }, | |
| { | |
| "query": "Write a SQL query to find employees who earn more than their managers.", | |
| "complexity": 0.44, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "mistral-large-latest" | |
| }, | |
| { | |
| "query": "Implement a binary search in a sorted array (any language).", | |
| "complexity": 0.36, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "deepseek-chat" | |
| }, | |
| { | |
| "query": "Debug this Python: `print('Hello' + 123)` – what error and how to fix?", | |
| "complexity": 0.1, | |
| "tier": "trivial", | |
| "domain": "code", | |
| "model": "llama3.2:3b" | |
| }, | |
| { | |
| "query": "Write a function that merges two sorted lists into one sorted list (O(n)).", | |
| "complexity": 0.41, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "gpt-4o-mini" | |
| }, | |
| { | |
| "query": "Explain why `[1,2,3].map(parseInt)` returns `[1, NaN, NaN]` in JavaScript.", | |
| "complexity": 0.47, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "claude-3-5-sonnet-20241022" | |
| }, | |
| { | |
| "query": "Write a simple HTML page that fetches data from a REST API and displays it.", | |
| "complexity": 0.39, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "gemini-1.5-pro" | |
| }, | |
| { | |
| "query": "Implement a singleton pattern in Python.", | |
| "complexity": 0.35, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "gpt-4o" | |
| }, | |
| { | |
| "query": "Fix the race condition in this multi‑threaded Python code (pseudo).", | |
| "complexity": 0.63, | |
| "tier": "hard", | |
| "domain": "code", | |
| "model": "deepseek-chat" | |
| }, | |
| { | |
| "query": "Write a recursive descent parser for simple arithmetic expressions (+, -, *, /).", | |
| "complexity": 0.71, | |
| "tier": "hard", | |
| "domain": "code", | |
| "model": "claude-3-5-sonnet-20241022" | |
| }, | |
| { | |
| "query": "Why does this C code crash? `int arr[5]; arr[10] = 42;`", | |
| "complexity": 0.08, | |
| "tier": "trivial", | |
| "domain": "code", | |
| "model": "gpt-3.5-turbo" | |
| }, | |
| { | |
| "query": "Write a Python generator that yields the Fibonacci sequence infinitely.", | |
| "complexity": 0.33, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "llama3.1:8b" | |
| }, | |
| { | |
| "query": "Implement an LRU cache using `OrderedDict` in Python.", | |
| "complexity": 0.59, | |
| "tier": "hard", | |
| "domain": "code", | |
| "model": "gemini-1.5-pro" | |
| }, | |
| { | |
| "query": "What does `*args` and `**kwargs` do in Python? Give an example.", | |
| "complexity": 0.25, | |
| "tier": "easy", | |
| "domain": "code", | |
| "model": "mistral-small-latest" | |
| }, | |
| { | |
| "query": "Write a JavaScript function that throttles another function (limit calls per second).", | |
| "complexity": 0.55, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "gpt-4o" | |
| }, | |
| { | |
| "query": "Debug this SQL injection vulnerability: `\"SELECT * FROM users WHERE id = \" + user_id`", | |
| "complexity": 0.3, | |
| "tier": "easy", | |
| "domain": "code", | |
| "model": "claude-3-haiku-20240307" | |
| }, | |
| { | |
| "query": "Implement a simple HTTP server in Python using `sockets`.", | |
| "complexity": 0.62, | |
| "tier": "hard", | |
| "domain": "code", | |
| "model": "deepseek-chat" | |
| }, | |
| { | |
| "query": "Explain the output: `console.log([] + []); console.log([] + {}); console.log({} + []);`", | |
| "complexity": 0.49, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "llama3.1:70b" | |
| }, | |
| { | |
| "query": "Write a C++ program that reverses a linked list.", | |
| "complexity": 0.46, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "gpt-4o-mini" | |
| }, | |
| { | |
| "query": "Convert this `try/except` to using `contextlib.suppress` in Python.", | |
| "complexity": 0.27, | |
| "tier": "easy", | |
| "domain": "code", | |
| "model": "claude-3-5-haiku-20241022" | |
| }, | |
| { | |
| "query": "Implement the Sieve of Eratosthenes in Java.", | |
| "complexity": 0.42, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "gemini-1.5-flash" | |
| }, | |
| { | |
| "query": "Why does `0.1 + 0.2 !== 0.3` in JavaScript? Explain floating point.", | |
| "complexity": 0.35, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "mistral-large-latest" | |
| }, | |
| { | |
| "query": "Write a Python script to find duplicate files in a directory (by hash).", | |
| "complexity": 0.51, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "deepseek-chat" | |
| }, | |
| { | |
| "query": "Fix the deadlock in this pseudocode: two threads lock A then B, and B then A.", | |
| "complexity": 0.66, | |
| "tier": "hard", | |
| "domain": "code", | |
| "model": "gpt-4o" | |
| }, | |
| { | |
| "query": "Implement a simple event emitter in JavaScript (Node.js style).", | |
| "complexity": 0.48, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "claude-3-5-sonnet-20241022" | |
| }, | |
| { | |
| "query": "What is tail recursion? Convert this factorial to tail‑recursive: `def fact(n): return 1 if n==0 else n*fact(n-1)`", | |
| "complexity": 0.37, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "llama3.1:8b" | |
| }, | |
| { | |
| "query": "Write a Go routine that computes the sum of squares concurrently.", | |
| "complexity": 0.54, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "gpt-4o-mini" | |
| }, | |
| { | |
| "query": "Translate 'The server is down' to Arabic", | |
| "complexity": 0.14, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "gemini-1.5-flash" | |
| }, | |
| { | |
| "query": "Summarize the concept of 'reference counting' in memory management.", | |
| "complexity": 0.28, | |
| "tier": "easy", | |
| "domain": "summarization", | |
| "model": "claude-3-haiku-20240307" | |
| }, | |
| { | |
| "query": "Solve `∫ x e^x dx`", | |
| "complexity": 0.43, | |
| "tier": "medium", | |
| "domain": "math", | |
| "model": "deepseek-chat" | |
| }, | |
| { | |
| "query": "Explain why black holes evaporate (Hawking radiation).", | |
| "complexity": 0.61, | |
| "tier": "hard", | |
| "domain": "science", | |
| "model": "gpt-4o" | |
| }, | |
| { | |
| "query": "Write a haiku about a segfault", | |
| "complexity": 0.22, | |
| "tier": "easy", | |
| "domain": "creative", | |
| "model": "llama3.2:3b" | |
| }, | |
| { | |
| "query": "What is the difference between `malloc` and `calloc` in C?", | |
| "complexity": 0.17, | |
| "tier": "easy", | |
| "domain": "factual", | |
| "model": "gpt-3.5-turbo" | |
| }, | |
| { | |
| "query": "Write a Python script to fetch JSON from an API and pretty‑print it.", | |
| "complexity": 0.24, | |
| "tier": "easy", | |
| "domain": "code", | |
| "model": "mistral-small-latest" | |
| }, | |
| { | |
| "query": "Debug this React hook: `useEffect(() => { setCount(count+1) }, [])` – why infinite loop?", | |
| "complexity": 0.4, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "claude-3-5-sonnet-20241022" | |
| }, | |
| { | |
| "query": "Implement a deep copy function for nested dictionaries in Python.", | |
| "complexity": 0.39, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "gemini-1.5-pro" | |
| }, | |
| { | |
| "query": "Explain the `volatile` keyword in Java.", | |
| "complexity": 0.44, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "gpt-4o-mini" | |
| }, | |
| { | |
| "query": "Write a SQL query to delete duplicate rows keeping one copy.", | |
| "complexity": 0.41, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "llama3.1:70b" | |
| }, | |
| { | |
| "query": "Why does this Python code raise `UnboundLocalError`? `x = 10; def foo(): print(x); x=5`", | |
| "complexity": 0.26, | |
| "tier": "easy", | |
| "domain": "code", | |
| "model": "deepseek-chat" | |
| }, | |
| { | |
| "query": "Implement a simple key‑value store with TTL (time‑to‑live) in Python.", | |
| "complexity": 0.57, | |
| "tier": "hard", | |
| "domain": "code", | |
| "model": "gpt-4o" | |
| }, | |
| { | |
| "query": "Translate 'The warranty is void if seal is broken' to Mandarin.", | |
| "complexity": 0.36, | |
| "tier": "medium", | |
| "domain": "translation", | |
| "model": "claude-3-5-sonnet-20241022" | |
| }, | |
| { | |
| "query": "Summarize the paper 'Attention is All You Need' in 5 bullet points.", | |
| "complexity": 0.49, | |
| "tier": "medium", | |
| "domain": "summarization", | |
| "model": "gemini-1.5-pro" | |
| }, | |
| { | |
| "query": "Solve `det([[1,2],[3,4]])` (determinant).", | |
| "complexity": 0.18, | |
| "tier": "easy", | |
| "domain": "math", | |
| "model": "llama3.1:8b" | |
| }, | |
| { | |
| "query": "Explain the chemical process of rusting.", | |
| "complexity": 0.23, | |
| "tier": "easy", | |
| "domain": "science", | |
| "model": "gpt-3.5-turbo" | |
| }, | |
| { | |
| "query": "Write a limerick about a null pointer", | |
| "complexity": 0.29, | |
| "tier": "easy", | |
| "domain": "creative", | |
| "model": "claude-3-5-haiku-20241022" | |
| }, | |
| { | |
| "query": "What is a JIT compiler? Give an example runtime.", | |
| "complexity": 0.34, | |
| "tier": "medium", | |
| "domain": "factual", | |
| "model": "mistral-large-latest" | |
| }, | |
| { | |
| "query": "Write a C# method that reads a CSV file and returns a list of objects.", | |
| "complexity": 0.42, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "deepseek-chat" | |
| }, | |
| { | |
| "query": "Fix the JSON syntax error: `{name: 'John', age: 30}`", | |
| "complexity": 0.06, | |
| "tier": "trivial", | |
| "domain": "code", | |
| "model": "llama3.2:3b" | |
| }, | |
| { | |
| "query": "Implement a rate limiter using the token bucket algorithm in Python.", | |
| "complexity": 0.65, | |
| "tier": "hard", | |
| "domain": "code", | |
| "model": "gpt-4o" | |
| }, | |
| { | |
| "query": "Explain the output `(0 == '0')` vs `(0 === '0')` in JavaScript.", | |
| "complexity": 0.16, | |
| "tier": "easy", | |
| "domain": "code", | |
| "model": "gpt-4o-mini" | |
| }, | |
| { | |
| "query": "Write a Bash script to find all `.log` files older than 7 days and compress them.", | |
| "complexity": 0.38, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "gemini-1.5-flash" | |
| }, | |
| { | |
| "query": "Debug this Python multiprocessing code that hangs: `p = Pool(); p.map(f, range(10)); p.close()` (missing join).", | |
| "complexity": 0.33, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "claude-3-haiku-20240307" | |
| }, | |
| { | |
| "query": "Write a regular expression to match a valid IPv4 address.", | |
| "complexity": 0.48, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "llama3.1:70b" | |
| }, | |
| { | |
| "query": "Translate 'Please sign here' to Russian.", | |
| "complexity": 0.11, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "mistral-small-latest" | |
| }, | |
| { | |
| "query": "Summarize the concept of 'copy‑on‑write' in operating systems.", | |
| "complexity": 0.37, | |
| "tier": "medium", | |
| "domain": "summarization", | |
| "model": "deepseek-chat" | |
| }, | |
| { | |
| "query": "Solve `dy/dx = y` with initial condition y(0)=1.", | |
| "complexity": 0.3, | |
| "tier": "medium", | |
| "domain": "math", | |
| "model": "gpt-4o" | |
| }, | |
| { | |
| "query": "Explain the Doppler effect with an example.", | |
| "complexity": 0.27, | |
| "tier": "easy", | |
| "domain": "science", | |
| "model": "gemini-1.5-pro" | |
| }, | |
| { | |
| "query": "Write a tanka (5 lines) about a broken build pipeline.", | |
| "complexity": 0.35, | |
| "tier": "medium", | |
| "domain": "creative", | |
| "model": "claude-3-5-sonnet-20241022" | |
| }, | |
| { | |
| "query": "What is the difference between `INNER JOIN` and `LEFT JOIN`?", | |
| "complexity": 0.21, | |
| "tier": "easy", | |
| "domain": "factual", | |
| "model": "gpt-3.5-turbo" | |
| }, | |
| { | |
| "query": "Write a Python script that watches a directory for new files and processes them.", | |
| "complexity": 0.56, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "llama3.1:8b" | |
| }, | |
| { | |
| "query": "Fix the off‑by‑one error: `for i in range(1, len(arr)): if arr[i] > arr[i-1]:` – correct.", | |
| "complexity": 0.12, | |
| "tier": "easy", | |
| "domain": "code", | |
| "model": "gpt-4o-mini" | |
| }, | |
| { | |
| "query": "Implement a Bloom filter in Python (simple version).", | |
| "complexity": 0.6, | |
| "tier": "hard", | |
| "domain": "code", | |
| "model": "deepseek-chat" | |
| }, | |
| { | |
| "query": "Explain why `'5' - 3` works but `'5' + 3` gives different results in JavaScript.", | |
| "complexity": 0.31, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "claude-3-5-haiku-20241022" | |
| }, | |
| { | |
| "query": "Write a Golang function that reads a file line by line.", | |
| "complexity": 0.26, | |
| "tier": "easy", | |
| "domain": "code", | |
| "model": "gemini-1.5-flash" | |
| }, | |
| { | |
| "query": "Translate 'This is a confidential document' to German.", | |
| "complexity": 0.2, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "mistral-large-latest" | |
| }, | |
| { | |
| "query": "Summarize the key differences between TCP and UDP.", | |
| "complexity": 0.24, | |
| "tier": "easy", | |
| "domain": "summarization", | |
| "model": "gpt-4o" | |
| }, | |
| { | |
| "query": "Solve the eigenvalue problem for matrix `[[2, 1], [1, 2]]`.", | |
| "complexity": 0.53, | |
| "tier": "medium", | |
| "domain": "math", | |
| "model": "llama3.1:70b" | |
| }, | |
| { | |
| "query": "Explain the double‑slit experiment in quantum mechanics.", | |
| "complexity": 0.58, | |
| "tier": "hard", | |
| "domain": "science", | |
| "model": "deepseek-chat" | |
| }, | |
| { | |
| "query": "Write a sonnet about a race condition (14 lines).", | |
| "complexity": 0.63, | |
| "tier": "hard", | |
| "domain": "creative", | |
| "model": "claude-3-5-sonnet-20241022" | |
| }, | |
| { | |
| "query": "What is the purpose of `__slots__` in Python?", | |
| "complexity": 0.41, | |
| "tier": "medium", | |
| "domain": "factual", | |
| "model": "gpt-4o-mini" | |
| }, | |
| { | |
| "query": "Write a Rust function that takes a string and returns the first word.", | |
| "complexity": 0.32, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "gemini-1.5-pro" | |
| }, | |
| { | |
| "query": "Fix the SQL injection in this PHP code: `$query = \"SELECT * FROM users WHERE id = $_GET[id]\";`", | |
| "complexity": 0.29, | |
| "tier": "easy", | |
| "domain": "code", | |
| "model": "gpt-3.5-turbo" | |
| }, | |
| { | |
| "query": "Implement a priority queue using a binary heap in Java.", | |
| "complexity": 0.55, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "deepseek-chat" | |
| }, | |
| { | |
| "query": "Why does `a = []; a.append(a)` cause infinite recursion when printed? Explain.", | |
| "complexity": 0.34, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "llama3.1:8b" | |
| }, | |
| { | |
| "query": "Write a JavaScript function that deep freezes an object.", | |
| "complexity": 0.46, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "claude-3-5-sonnet-20241022" | |
| }, | |
| { | |
| "query": "Translate 'Temperature exceeds safe limit' to Japanese.", | |
| "complexity": 0.25, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "gpt-4o" | |
| }, | |
| { | |
| "query": "Summarize the plot of 'The Metamorphosis' by Kafka.", | |
| "complexity": 0.33, | |
| "tier": "medium", | |
| "domain": "summarization", | |
| "model": "gemini-1.5-flash" | |
| }, | |
| { | |
| "query": "Solve `lim x→0 (sin x)/x`.", | |
| "complexity": 0.22, | |
| "tier": "easy", | |
| "domain": "math", | |
| "model": "mistral-small-latest" | |
| }, | |
| { | |
| "query": "Explain why the sky appears blue (Rayleigh scattering).", | |
| "complexity": 0.28, | |
| "tier": "easy", | |
| "domain": "science", | |
| "model": "claude-3-haiku-20240307" | |
| }, | |
| { | |
| "query": "Write a seven‑line poem about a memory leak.", | |
| "complexity": 0.38, | |
| "tier": "medium", | |
| "domain": "creative", | |
| "model": "llama3.2:3b" | |
| }, | |
| { | |
| "query": "What is the difference between a thread and a coroutine?", | |
| "complexity": 0.43, | |
| "tier": "medium", | |
| "domain": "factual", | |
| "model": "gpt-4o-mini" | |
| }, | |
| { | |
| "query": "Write a Python script that converts a CSV to JSON.", | |
| "complexity": 0.31, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "deepseek-chat" | |
| }, | |
| { | |
| "query": "Fix the race condition in this Python async code: `async def f(): global x; x+=1` (use lock).", | |
| "complexity": 0.51, | |
| "tier": "medium", | |
| "domain": "code", | |
| "model": "gpt-4o" | |
| }, | |
| { | |
| "query": "Translate 'The quick brown fox jumps over the lazy dog' to Spanish.", | |
| "complexity": 0.12, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "gpt-3.5-turbo" | |
| }, | |
| { | |
| "query": "Paraphrase this sentence: 'Despite the rain, the event was a huge success.'", | |
| "complexity": 0.18, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "claude-3-haiku-20240307" | |
| }, | |
| { | |
| "query": "Translate 'Where is the nearest metro station?' to French.", | |
| "complexity": 0.1, | |
| "tier": "trivial", | |
| "domain": "translation", | |
| "model": "llama3.2:3b" | |
| }, | |
| { | |
| "query": "Correct the grammar: 'He don't know nothing about that.'", | |
| "complexity": 0.09, | |
| "tier": "trivial", | |
| "domain": "translation", | |
| "model": "gemini-1.5-flash" | |
| }, | |
| { | |
| "query": "Translate 'I would like to order a vegetarian pizza' to Italian.", | |
| "complexity": 0.14, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "mistral-small-latest" | |
| }, | |
| { | |
| "query": "Rewrite this in formal English: 'Hey, can you send me the doc ASAP?'", | |
| "complexity": 0.16, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "gpt-4o-mini" | |
| }, | |
| { | |
| "query": "Translate 'The system is under maintenance' to German.", | |
| "complexity": 0.11, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "llama3.1:8b" | |
| }, | |
| { | |
| "query": "Convert this passive voice to active: 'The report was written by John.'", | |
| "complexity": 0.08, | |
| "tier": "trivial", | |
| "domain": "translation", | |
| "model": "claude-3-5-haiku-20241022" | |
| }, | |
| { | |
| "query": "Translate 'What is your name?' to Mandarin Chinese (pinyin).", | |
| "complexity": 0.13, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "deepseek-chat" | |
| }, | |
| { | |
| "query": "Paraphrase the idiom 'It's raining cats and dogs' into plain English.", | |
| "complexity": 0.15, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "gpt-4o" | |
| }, | |
| { | |
| "query": "Translate 'Please call back later' to Japanese.", | |
| "complexity": 0.15, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "claude-3-5-sonnet-20241022" | |
| }, | |
| { | |
| "query": "Change this sentence to past tense: 'I go to the gym every day.'", | |
| "complexity": 0.06, | |
| "tier": "trivial", | |
| "domain": "translation", | |
| "model": "gemini-1.5-pro" | |
| }, | |
| { | |
| "query": "Translate 'The price does not include tax' to Portuguese.", | |
| "complexity": 0.12, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "mistral-large-latest" | |
| }, | |
| { | |
| "query": "Summarize this paragraph in 10 words: 'Machine learning is a subset of artificial intelligence that enables systems to learn from data.'", | |
| "complexity": 0.19, | |
| "tier": "easy", | |
| "domain": "summarization", | |
| "model": "llama3.1:70b" | |
| }, | |
| { | |
| "query": "Translate 'I have a meeting at 3 PM' to Russian.", | |
| "complexity": 0.14, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "gpt-3.5-turbo" | |
| }, | |
| { | |
| "query": "Explain the difference between 'affect' and 'effect' with examples.", | |
| "complexity": 0.17, | |
| "tier": "easy", | |
| "domain": "factual", | |
| "model": "claude-3-haiku-20240307" | |
| }, | |
| { | |
| "query": "Translate 'Your session has expired' to Arabic.", | |
| "complexity": 0.16, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "deepseek-chat" | |
| }, | |
| { | |
| "query": "Rewrite this sentence more concisely: 'Due to the fact that it was raining, we canceled the picnic.'", | |
| "complexity": 0.11, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "gemini-1.5-flash" | |
| }, | |
| { | |
| "query": "Translate 'Can you help me with this problem?' to Hindi (Romanized).", | |
| "complexity": 0.18, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "gpt-4o-mini" | |
| }, | |
| { | |
| "query": "Change this to a question: 'She knows the answer.'", | |
| "complexity": 0.05, | |
| "tier": "trivial", | |
| "domain": "translation", | |
| "model": "llama3.2:3b" | |
| }, | |
| { | |
| "query": "Translate 'The file is corrupted' to Korean.", | |
| "complexity": 0.15, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "claude-3-5-sonnet-20241022" | |
| }, | |
| { | |
| "query": "Paraphrase this technical sentence: 'Authentication via OAuth 2.0 provides delegated access.'", | |
| "complexity": 0.34, | |
| "tier": "medium", | |
| "domain": "translation", | |
| "model": "gpt-4o" | |
| }, | |
| { | |
| "query": "Translate a short poem from English to French (preserve rhyme).", | |
| "complexity": 0.45, | |
| "tier": "medium", | |
| "domain": "translation", | |
| "model": "mistral-large-latest" | |
| }, | |
| { | |
| "query": "Detect the language of this text: 'Bonjour, comment ça va?' and translate to English.", | |
| "complexity": 0.2, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "gemini-1.5-pro" | |
| }, | |
| { | |
| "query": "Write a grammatically correct sentence using 'their', 'there', and 'they're'.", | |
| "complexity": 0.12, | |
| "tier": "easy", | |
| "domain": "creative", | |
| "model": "llama3.1:8b" | |
| }, | |
| { | |
| "query": "Translate 'The application has encountered an unexpected error' to German.", | |
| "complexity": 0.22, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "gpt-3.5-turbo" | |
| }, | |
| { | |
| "query": "Rewrite this in the imperative mood: 'You should read the instructions carefully.'", | |
| "complexity": 0.09, | |
| "tier": "trivial", | |
| "domain": "translation", | |
| "model": "claude-3-haiku-20240307" | |
| }, | |
| { | |
| "query": "Translate 'I apologize for the inconvenience' to Spanish.", | |
| "complexity": 0.13, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "mistral-small-latest" | |
| }, | |
| { | |
| "query": "Convert this direct speech to indirect: He said he would come tomorrow.", | |
| "complexity": 0.16, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "deepseek-chat" | |
| }, | |
| { | |
| "query": "Translate 'Please proceed to gate B12' to Japanese.", | |
| "complexity": 0.17, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "gpt-4o" | |
| }, | |
| { | |
| "query": "Simplify this legalese: 'The party of the first part shall indemnify the party of the second part.'", | |
| "complexity": 0.41, | |
| "tier": "medium", | |
| "domain": "translation", | |
| "model": "claude-3-5-sonnet-20241022" | |
| }, | |
| { | |
| "query": "Translate 'The deadline is Friday at 5 PM' to Italian.", | |
| "complexity": 0.14, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "gemini-1.5-flash" | |
| }, | |
| { | |
| "query": "Explain what a 'malapropism' is and give an example.", | |
| "complexity": 0.26, | |
| "tier": "easy", | |
| "domain": "factual", | |
| "model": "llama3.1:70b" | |
| }, | |
| { | |
| "query": "Translate a business email subject: 'Q3 Financial Results Attached' to French.", | |
| "complexity": 0.18, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "gpt-4o-mini" | |
| }, | |
| { | |
| "query": "Change this to future perfect tense: 'I finish the project.'", | |
| "complexity": 0.1, | |
| "tier": "trivial", | |
| "domain": "translation", | |
| "model": "llama3.2:3b" | |
| }, | |
| { | |
| "query": "Translate 'The server will reboot in 5 minutes' to Portuguese.", | |
| "complexity": 0.15, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "mistral-large-latest" | |
| }, | |
| { | |
| "query": "Paraphrase this scientific abstract (2 sentences) for a general audience.", | |
| "complexity": 0.39, | |
| "tier": "medium", | |
| "domain": "summarization", | |
| "model": "deepseek-chat" | |
| }, | |
| { | |
| "query": "Translate 'Your payment was successful' to Russian.", | |
| "complexity": 0.13, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "gpt-3.5-turbo" | |
| }, | |
| { | |
| "query": "Rewrite this sentence in the negative form: 'Everyone attended the meeting.'", | |
| "complexity": 0.07, | |
| "tier": "trivial", | |
| "domain": "translation", | |
| "model": "claude-3-5-haiku-20241022" | |
| }, | |
| { | |
| "query": "Translate 'Please do not touch the glass' to Mandarin.", | |
| "complexity": 0.14, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "gemini-1.5-pro" | |
| }, | |
| { | |
| "query": "Correct the spelling: 'recieve', 'seperate', 'definately'", | |
| "complexity": 0.06, | |
| "tier": "trivial", | |
| "domain": "translation", | |
| "model": "llama3.1:8b" | |
| }, | |
| { | |
| "query": "Translate 'The operation was completed successfully' to Arabic.", | |
| "complexity": 0.18, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "gpt-4o" | |
| }, | |
| { | |
| "query": "Convert this bullet list into a coherent paragraph.", | |
| "complexity": 0.28, | |
| "tier": "easy", | |
| "domain": "summarization", | |
| "model": "claude-3-5-sonnet-20241022" | |
| }, | |
| { | |
| "query": "Translate 'What time does the store close?' to German.", | |
| "complexity": 0.11, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "mistral-small-latest" | |
| }, | |
| { | |
| "query": "Explain the difference between 'who' and 'whom' with examples.", | |
| "complexity": 0.22, | |
| "tier": "easy", | |
| "domain": "factual", | |
| "model": "deepseek-chat" | |
| }, | |
| { | |
| "query": "Translate 'This feature is not yet implemented' to Spanish.", | |
| "complexity": 0.16, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "gpt-4o-mini" | |
| }, | |
| { | |
| "query": "Rewrite this sentence using a simile: 'Her voice was loud.'", | |
| "complexity": 0.2, | |
| "tier": "easy", | |
| "domain": "creative", | |
| "model": "gemini-1.5-flash" | |
| }, | |
| { | |
| "query": "Translate 'Please verify your email address' to Japanese.", | |
| "complexity": 0.17, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "llama3.2:3b" | |
| }, | |
| { | |
| "query": "Change this from first person to third person: 'I think this solution is optimal.'", | |
| "complexity": 0.15, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "gpt-3.5-turbo" | |
| }, | |
| { | |
| "query": "Translate a chat message: 'BRB, gonna grab coffee' to formal English.", | |
| "complexity": 0.12, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "claude-3-haiku-20240307" | |
| }, | |
| { | |
| "query": "Paraphrase 'The new update includes several security patches' without changing meaning.", | |
| "complexity": 0.19, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "mistral-large-latest" | |
| }, | |
| { | |
| "query": "Translate 'The connection has timed out' to Italian.", | |
| "complexity": 0.15, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "gpt-4o" | |
| }, | |
| { | |
| "query": "Write a haiku about the translator's dilemma.", | |
| "complexity": 0.31, | |
| "tier": "medium", | |
| "domain": "creative", | |
| "model": "claude-3-5-sonnet-20241022" | |
| }, | |
| { | |
| "query": "Translate 'Access denied' to French.", | |
| "complexity": 0.06, | |
| "tier": "trivial", | |
| "domain": "translation", | |
| "model": "gemini-1.5-pro" | |
| }, | |
| { | |
| "query": "Explain the concept of 'cognates' in linguistics with examples.", | |
| "complexity": 0.35, | |
| "tier": "medium", | |
| "domain": "science", | |
| "model": "deepseek-chat" | |
| }, | |
| { | |
| "query": "Translate a short warning: 'High voltage, risk of electric shock' to Spanish.", | |
| "complexity": 0.18, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "llama3.1:70b" | |
| }, | |
| { | |
| "query": "Rewrite this in colloquial English: 'I am extremely fatigued.'", | |
| "complexity": 0.1, | |
| "tier": "trivial", | |
| "domain": "translation", | |
| "model": "gpt-4o-mini" | |
| }, | |
| { | |
| "query": "Translate 'The package will arrive within 2 business days' to German.", | |
| "complexity": 0.19, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "mistral-small-latest" | |
| }, | |
| { | |
| "query": "Change this sentence to exclamatory: 'It is a beautiful day.'", | |
| "complexity": 0.05, | |
| "tier": "trivial", | |
| "domain": "translation", | |
| "model": "claude-3-5-haiku-20241022" | |
| }, | |
| { | |
| "query": "Translate 'Please enter your password' to Korean.", | |
| "complexity": 0.13, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "gpt-3.5-turbo" | |
| }, | |
| { | |
| "query": "Summarize the difference between British and American English spelling (5 examples).", | |
| "complexity": 0.27, | |
| "tier": "easy", | |
| "domain": "summarization", | |
| "model": "gemini-1.5-flash" | |
| }, | |
| { | |
| "query": "Translate 'I don't understand this instruction' to Portuguese.", | |
| "complexity": 0.14, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "llama3.1:8b" | |
| }, | |
| { | |
| "query": "Paraphrase this corporate jargon: 'We'll circle back on that action item.'", | |
| "complexity": 0.22, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "gpt-4o" | |
| }, | |
| { | |
| "query": "Translate 'Your account has been locked' to Arabic.", | |
| "complexity": 0.18, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "deepseek-chat" | |
| }, | |
| { | |
| "query": "Write a sentence using the word 'ubiquitous' correctly.", | |
| "complexity": 0.12, | |
| "tier": "easy", | |
| "domain": "creative", | |
| "model": "claude-3-5-sonnet-20241022" | |
| }, | |
| { | |
| "query": "Translate 'The meeting was rescheduled to Monday' to Italian.", | |
| "complexity": 0.16, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "mistral-large-latest" | |
| }, | |
| { | |
| "query": "Change this from active to passive: 'The chef cooked a delicious meal.'", | |
| "complexity": 0.08, | |
| "tier": "trivial", | |
| "domain": "translation", | |
| "model": "gpt-4o-mini" | |
| }, | |
| { | |
| "query": "Translate 'Please wait while we process your request' to Japanese.", | |
| "complexity": 0.2, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "gemini-1.5-pro" | |
| }, | |
| { | |
| "query": "Explain the term 'portmanteau' and give three examples.", | |
| "complexity": 0.25, | |
| "tier": "easy", | |
| "domain": "factual", | |
| "model": "llama3.2:3b" | |
| }, | |
| { | |
| "query": "Translate a legal disclaimer: 'Not responsible for lost or stolen items' to French.", | |
| "complexity": 0.33, | |
| "tier": "medium", | |
| "domain": "translation", | |
| "model": "gpt-3.5-turbo" | |
| }, | |
| { | |
| "query": "Rewrite this sentence without using the word 'very': 'She was very tired.'", | |
| "complexity": 0.09, | |
| "tier": "trivial", | |
| "domain": "translation", | |
| "model": "claude-3-haiku-20240307" | |
| }, | |
| { | |
| "query": "Translate 'Your session will expire in 10 minutes' to Russian.", | |
| "complexity": 0.17, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "deepseek-chat" | |
| }, | |
| { | |
| "query": "Convert this to a rhetorical question: 'You should know better.'", | |
| "complexity": 0.11, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "gpt-4o" | |
| }, | |
| { | |
| "query": "Translate 'The file size exceeds the limit' to Spanish.", | |
| "complexity": 0.14, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "mistral-small-latest" | |
| }, | |
| { | |
| "query": "Summarize the plot of 'The Tower of Babel' story in two sentences.", | |
| "complexity": 0.25, | |
| "tier": "easy", | |
| "domain": "summarization", | |
| "model": "gemini-1.5-flash" | |
| }, | |
| { | |
| "query": "Translate 'Please accept our sincere apologies' to German.", | |
| "complexity": 0.16, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "llama3.1:70b" | |
| }, | |
| { | |
| "query": "Paraphrase this proverb: 'A bird in the hand is worth two in the bush.'", | |
| "complexity": 0.2, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "claude-3-5-sonnet-20241022" | |
| }, | |
| { | |
| "query": "Translate 'The system will restart automatically' to Mandarin.", | |
| "complexity": 0.18, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "gpt-4o-mini" | |
| }, | |
| { | |
| "query": "Fix the subject‑verb agreement: 'The list of items are on the table.'", | |
| "complexity": 0.07, | |
| "tier": "trivial", | |
| "domain": "translation", | |
| "model": "llama3.2:3b" | |
| }, | |
| { | |
| "query": "Translate 'I've attached the document for your review' to Italian.", | |
| "complexity": 0.17, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "gpt-3.5-turbo" | |
| }, | |
| { | |
| "query": "Explain the difference between denotation and connotation with examples.", | |
| "complexity": 0.29, | |
| "tier": "easy", | |
| "domain": "factual", | |
| "model": "gemini-1.5-pro" | |
| }, | |
| { | |
| "query": "Translate 'Your request has been received' to Korean.", | |
| "complexity": 0.14, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "mistral-large-latest" | |
| }, | |
| { | |
| "query": "Rewrite this sentence using alliteration: 'The dog ran fast.'", | |
| "complexity": 0.21, | |
| "tier": "easy", | |
| "domain": "creative", | |
| "model": "deepseek-chat" | |
| }, | |
| { | |
| "query": "Translate 'The store is closed on Sundays' to Portuguese.", | |
| "complexity": 0.13, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "claude-3-5-haiku-20241022" | |
| }, | |
| { | |
| "query": "Change this from singular to plural: 'The child is playing.'", | |
| "complexity": 0.04, | |
| "tier": "trivial", | |
| "domain": "translation", | |
| "model": "gpt-4o" | |
| }, | |
| { | |
| "query": "Translate 'Please note that prices are subject to change' to French.", | |
| "complexity": 0.24, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "llama3.1:8b" | |
| }, | |
| { | |
| "query": "Paraphrase this headline: 'Tech Giant Announces Record Profits Amid Layoffs'", | |
| "complexity": 0.26, | |
| "tier": "easy", | |
| "domain": "summarization", | |
| "model": "gpt-4o-mini" | |
| }, | |
| { | |
| "query": "Translate 'The shipment is delayed due to weather' to German.", | |
| "complexity": 0.19, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "mistral-small-latest" | |
| }, | |
| { | |
| "query": "Write a sentence that is grammatically correct but semantically nonsensical.", | |
| "complexity": 0.23, | |
| "tier": "easy", | |
| "domain": "creative", | |
| "model": "claude-3-haiku-20240307" | |
| }, | |
| { | |
| "query": "Translate 'Your feedback is valuable to us' to Japanese.", | |
| "complexity": 0.16, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "gemini-1.5-flash" | |
| }, | |
| { | |
| "query": "Explain what a 'double negative' is and why it's often avoided in standard English.", | |
| "complexity": 0.2, | |
| "tier": "easy", | |
| "domain": "factual", | |
| "model": "deepseek-chat" | |
| }, | |
| { | |
| "query": "Translate 'The server is experiencing high load' to Arabic.", | |
| "complexity": 0.21, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "gpt-4o" | |
| }, | |
| { | |
| "query": "Rewrite this sentence as a conditional: 'You didn't water the plant, so it died.'", | |
| "complexity": 0.14, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "llama3.1:70b" | |
| }, | |
| { | |
| "query": "Translate 'Congratulations on your promotion' to Spanish.", | |
| "complexity": 0.11, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "claude-3-5-sonnet-20241022" | |
| }, | |
| { | |
| "query": "Summarize the concept of 'code‑switching' in linguistics in one sentence.", | |
| "complexity": 0.28, | |
| "tier": "easy", | |
| "domain": "science", | |
| "model": "gpt-4o-mini" | |
| }, | |
| { | |
| "query": "Translate 'Please do not disturb' to Italian.", | |
| "complexity": 0.09, | |
| "tier": "trivial", | |
| "domain": "translation", | |
| "model": "gemini-1.5-pro" | |
| }, | |
| { | |
| "query": "Convert this sentence to use the subjunctive mood: 'I wish I was there.' (correction)", | |
| "complexity": 0.15, | |
| "tier": "easy", | |
| "domain": "translation", | |
| "model": "mistral-large-latest" | |
| } | |
| ] |