| 1 | GPT-5.4 Thinking xHigh EffortOpenAI | 70.0% | — | — | — | — |
| 2 | Gemini 3.1 Pro Preview HighGoogle | 65.0% | — | — | — | — |
| 3 | Claude 4.5 Opus Thinking High EffortAnthropic | 63.3% | — | — | — | — |
| 4 | Claude 4.5 Opus Medium EffortAnthropic | 63.3% | — | — | — | — |
| 5 | Claude 4.6 Opus Thinking High EffortAnthropic | 61.7% | — | — | — | — |
| 6 | Claude 4.6 Sonnet Thinking Medium EffortAnthropic | 60.0% | — | — | — | — |
| 7 | Gemini 3 Pro Preview HighGoogle | 55.0% | — | — | — | — |
| 8 | GPT-5.3 Codex HighOpenAI | 55.0% | — | — | — | — |
| 9 | Qwen 3.6 PlusAlibaba | 55.0% | — | — | — | — |
| 10 | GLM 5.1Z.AI | 55.0% | — | — | — | — |
| 11 | GLM 5Z.AI | 55.0% | — | — | — | — |
| 12 | GPT-5.1 Codex Max HighOpenAI | 53.3% | — | — | — | — |
| 13 | GPT-5.1 HighOpenAI | 53.3% | — | — | — | — |
| 14 | GPT-5.1 CodexOpenAI | 53.3% | — | — | — | — |
| 15 | Claude Sonnet 4.5 ThinkingAnthropic | 53.3% | — | — | — | — |
| 16 | Claude 4.1 OpusAnthropic | 53.3% | — | — | — | — |
| 17 | GPT-5.2 HighOpenAI | 51.7% | — | — | — | — |
| 18 | GPT-5.2 CodexOpenAI | 51.7% | — | — | — | — |
| 19 | GPT-5 ProOpenAI | 51.7% | — | — | — | — |
| 20 | Minimax M2.5Minimax | 51.7% | — | — | — | — |
| 21 | Minimax M2.7Minimax | 50.0% | — | — | — | — |
| 22 | GPT-5.4 Nano xHighOpenAI | 49.1% | — | — | — | — |
| 23 | Kimi K2.5 ThinkingMoonshot AI | 48.3% | — | — | — | — |
| 24 | Claude 4.1 Opus ThinkingAnthropic | 48.3% | — | — | — | — |
| 25 | Claude Sonnet 4.5Anthropic | 48.3% | — | — | — | — |
| 26 | GPT-5.4 Mini xHighOpenAI | 47.5% | — | — | — | — |
| 27 | GPT-5 Mini HighOpenAI | 46.7% | — | — | — | — |
| 28 | DeepSeek V3.2DeepSeek | 46.7% | — | — | — | — |
| 29 | Grok 4.20 BetaxAI | 43.3% | — | — | — | — |
| 30 | Devstral 2Mistral | 43.3% | — | — | — | — |
| 31 | Claude Haiku 4.5 ThinkingAnthropic | 41.7% | — | — | — | — |
| 32 | GLM 4.7Z.AI | 41.7% | — | — | — | — |
| 33 | Gemini 3 Flash Preview HighGoogle | 40.0% | — | — | — | — |
| 34 | DeepSeek V3.2 ThinkingDeepSeek | 40.0% | — | — | — | — |
| 35 | Gemma 4 31BGoogle | 40.0% | — | — | — | — |
| 36 | Claude 4 Sonnet ThinkingAnthropic | 40.0% | — | — | — | — |
| 37 | GPT-5.1 Codex MiniOpenAI | 40.0% | — | — | — | — |
| 38 | GPT-5.2 No ThinkingOpenAI | 40.0% | — | — | — | — |
| 39 | Kimi K2 ThinkingMoonshot AI | 38.3% | — | — | — | — |
| 40 | Claude 4 SonnetAnthropic | 38.3% | — | — | — | — |
| 41 | Grok 4.20 Beta (Non-Reasoning)xAI | 38.3% | — | — | — | — |
| 42 | DeepSeek V3.2 ExpDeepSeek | 36.7% | — | — | — | — |
| 43 | GLM 4.6Z.AI | 35.0% | — | — | — | — |
| 44 | Gemini 3.1 Flash Lite Preview HighGoogle | 33.3% | — | — | — | — |
| 45 | Gemini 2.5 Pro (Max Thinking)Google | 33.3% | — | — | — | — |
| 46 | Claude Haiku 4.5Anthropic | 33.3% | — | — | — | — |
| 47 | Grok Code FastxAI | 33.3% | — | — | — | — |
| 48 | Grok 4.1 FastxAI | 31.7% | — | — | — | — |
| 49 | DeepSeek V3.2 Exp ThinkingDeepSeek | 31.7% | — | — | — | — |
| 50 | Kimi K2 InstructMoonshot AI | 31.7% | — | — | — | — |
| 51 | Grok 4xAI | 30.0% | — | — | — | — |
| 52 | MiMo V2 ProXiaomi | 30.0% | — | — | — | — |
| 53 | GPT-5.3 InstantOpenAI | 28.3% | — | — | — | — |
| 54 | GPT-5.1 No ThinkingOpenAI | 28.3% | — | — | — | — |
| 55 | Gemini 2.5 Flash (Max Thinking) (2025-09-25)Google | 23.3% | — | — | — | — |
| 56 | GPT-5 Nano HighOpenAI | 23.3% | — | — | — | — |
| 57 | Nemotron 3 Super 120B A12BNVIDIA | 23.0% | — | — | — | — |
| 58 | Gemini 2.5 Flash (Max Thinking) (2025-06-05)Google | 16.7% | — | — | — | — |
| 59 | GPT OSS 120bOpenAI | 16.7% | — | — | — | — |
| 60 | Qwen 3 235B A22B Instruct 2507Alibaba | 13.3% | — | — | — | — |
| 61 | Qwen 3 Next 80B A3B InstructAlibaba | 10.0% | — | — | — | — |
| 62 | Grok 4.1 Fast (Non-Reasoning)xAI | 10.0% | — | — | — | — |
| 63 | Qwen 3 Next 80B A3B ThinkingAlibaba | 8.3% | — | — | — | — |
| 64 | Qwen 3 235B A22B Thinking 2507Alibaba | 6.7% | — | — | — | — |
| 65 | Gemini 2.5 Flash Lite (Max Thinking) (2025-06-17)Google | 5.0% | — | — | — | — |
| 66 | GLM 5V TurboZ.AI | 3.3% | — | — | — | — |
| 67 | Qwen 3 32BAlibaba | 3.3% | — | — | — | — |
| 68 | GLM 4.6VZ.AI | 3.3% | — | — | — | — |
| 69 | Trinity Large PreviewArcee | 3.3% | — | — | — | — |
| 70 | Gemini 2.5 Flash Lite (Max Thinking) (2025-09-25)Google | 1.7% | — | — | — | — |
| 71 | Qwen 3 30B A3BAlibaba | 1.7% | — | — | — | — |