{
  "benchmark": "gsm8k",
  "trust_filter": "attested",
  "count": 10,
  "queried_at": "2026-04-26T14:21:52.406Z",
  "results": [
    {
      "model": "claude-sonnet-4.5",
      "service": "anthropic-claude",
      "score": 100,
      "source": "attested",
      "sample_count": 3,
      "ci_low": 43.85,
      "ci_high": 100,
      "ci_half_width": 28.08,
      "thin_flag": true,
      "finished_at": "2026-04-26T07:24:33.356Z",
      "run_id": "run-5c103412a8eb",
      "verify_url": "https://benchlist.ai/verify/run-5c103412a8eb"
    },
    {
      "model": "claude-opus-4.7",
      "service": "anthropic-claude",
      "score": 100,
      "source": "attested",
      "sample_count": 8,
      "ci_low": 67.56,
      "ci_high": 100,
      "ci_half_width": 16.22,
      "thin_flag": true,
      "finished_at": "2026-04-26T07:52:46.969Z",
      "run_id": "run-73546fdc6579",
      "verify_url": "https://benchlist.ai/verify/run-73546fdc6579"
    },
    {
      "model": "claude-haiku-4.5",
      "service": "anthropic-claude",
      "score": 100,
      "source": "attested",
      "sample_count": 8,
      "ci_low": 67.56,
      "ci_high": 100,
      "ci_half_width": 16.22,
      "thin_flag": true,
      "finished_at": "2026-04-26T07:48:34.414Z",
      "run_id": "run-9b7f26a8846f",
      "verify_url": "https://benchlist.ai/verify/run-9b7f26a8846f"
    },
    {
      "model": "claude-sonnet-4-5-20250929",
      "service": "anthropic-claude",
      "score": 94.1,
      "source": "attested",
      "sample_count": 20,
      "ci_low": 75.16,
      "ci_high": 98.82,
      "ci_half_width": 11.83,
      "thin_flag": true,
      "finished_at": "2026-04-24T16:42:43Z",
      "run_id": "run-gsm8k-290568d1f2",
      "verify_url": "https://benchlist.ai/verify/run-gsm8k-290568d1f2"
    },
    {
      "model": "llama3-8b-q40",
      "service": "openrouter",
      "score": 70,
      "source": "local",
      "sample_count": 10,
      "ci_low": 39.68,
      "ci_high": 89.22,
      "ci_half_width": 24.77,
      "thin_flag": true,
      "finished_at": "2026-04-26T07:05:53Z",
      "run_id": "run-local-863019f170ee",
      "verify_url": "https://benchlist.ai/verify/run-local-863019f170ee"
    },
    {
      "model": "deepseek-coder-v2-15.7b",
      "service": "openrouter",
      "score": 70,
      "source": "local",
      "sample_count": 50,
      "ci_low": 56.25,
      "ci_high": 80.9,
      "ci_half_width": 12.32,
      "thin_flag": false,
      "finished_at": "2026-04-26T07:51:54Z",
      "run_id": "run-local-a32cbb52839c",
      "verify_url": "https://benchlist.ai/verify/run-local-a32cbb52839c"
    },
    {
      "model": "qwen3.6-27b-dense-q5km",
      "service": "openrouter",
      "score": 40,
      "source": "local",
      "sample_count": 50,
      "ci_low": 27.61,
      "ci_high": 53.82,
      "ci_half_width": 13.11,
      "thin_flag": false,
      "finished_at": "2026-04-26T08:30:45Z",
      "run_id": "run-local-4c80350d4337",
      "verify_url": "https://benchlist.ai/verify/run-local-4c80350d4337"
    },
    {
      "model": "mistral-7b-q4km",
      "service": "openrouter",
      "score": 10,
      "source": "local",
      "sample_count": 50,
      "ci_low": 4.35,
      "ci_high": 21.36,
      "ci_half_width": 8.51,
      "thin_flag": false,
      "finished_at": "2026-04-26T07:37:05Z",
      "run_id": "run-local-9e4635fcf727",
      "verify_url": "https://benchlist.ai/verify/run-local-9e4635fcf727"
    },
    {
      "model": "glm-4.7-flash-30b-q4km",
      "service": "openrouter",
      "score": 6,
      "source": "local",
      "sample_count": 50,
      "ci_low": 2.06,
      "ci_high": 16.22,
      "ci_half_width": 7.08,
      "thin_flag": false,
      "finished_at": "2026-04-26T10:08:41Z",
      "run_id": "run-local-2d1f8ab358c4",
      "verify_url": "https://benchlist.ai/verify/run-local-2d1f8ab358c4"
    },
    {
      "model": "qwen3.6-35b-q4km",
      "service": "openrouter",
      "score": 0,
      "source": "local",
      "sample_count": 50,
      "ci_low": 0,
      "ci_high": 7.14,
      "ci_half_width": 3.57,
      "thin_flag": false,
      "finished_at": "2026-04-26T07:10:41Z",
      "run_id": "run-local-1864885ad28f",
      "verify_url": "https://benchlist.ai/verify/run-local-1864885ad28f"
    }
  ]
}