{
  "name": "Git.Top Public Trust Benchmark",
  "positioning": "The Knowledge Graph of Open Source",
  "summary": "Recommendation eval top-3 hit rate is 1; explanation checks are 12/12; data trust is 81/100 with 12 review queue items.",
  "generated_at": "2026-07-04T06:31:21.558Z",
  "evaluation": {
    "generated_at": "2026-06-30T00:08:13.734Z",
    "evaluated_cases": 28,
    "top1_hit_rate": 0.929,
    "top3_hit_rate": 1,
    "category_accuracy": 1,
    "deployment_accuracy": 1,
    "cloudflare_readiness_accuracy": 1,
    "unacceptable_hit_count": 0,
    "generated_fixture_projects": 504,
    "d1_fixture_projects": 504,
    "effective_generated_fixture_projects": 500,
    "synthetic_projects": 0,
    "review_focus": [
      {
        "id": "search-prompt-tooling",
        "issue": "top-1 miss",
        "expected": [
          "promptfoo/promptfoo",
          "guardrails-ai/guardrails",
          "dottxt-ai/outlines",
          "567-labs/instructor"
        ],
        "observed": [
          "openai/openai-structured-outputs-samples",
          "guardrails-ai/guardrails",
          "567-labs/instructor",
          "microsoft/TypeChat",
          "BoundaryML/baml"
        ]
      },
      {
        "id": "search-coding-agent",
        "issue": "top-1 miss",
        "expected": [
          "openai/codex",
          "cline/cline",
          "aider-ai/aider",
          "OpenHands/OpenHands"
        ],
        "observed": [
          "smol-ai/developer",
          "OpenHands/OpenHands",
          "cline/cline",
          "google-gemini/gemini-cli",
          "anomalyco/opencode"
        ]
      }
    ],
    "source_report": "docs/EVAL_QUALITY.md"
  },
  "explanations": {
    "generated_at": "2026-07-04T02:51:39.580Z",
    "checks": 12,
    "passed": 12,
    "failed": 0,
    "source_report": "docs/EVAL_EXPLANATIONS.md",
    "coverage": 1
  },
  "data_coverage": {
    "project_count": 770,
    "category_coverage": "13/13",
    "covered_categories": 13,
    "total_categories": 13,
    "missing_categories": [],
    "low_confidence_classification_count": 201,
    "low_confidence_classification_rate": 0.261,
    "stale_project_count": 243,
    "stale_project_rate": 0.316,
    "collection_count": 42,
    "collection_review_count": 4,
    "collection_review_rate": 0.005,
    "data_trust_score": 81,
    "release_score": 100,
    "risk_level": "high"
  },
  "review_queue": {
    "review_count": 12,
    "low_signal_count": 9,
    "medium_signal_count": 17,
    "top_impact_score": 107,
    "top_items": [
      {
        "project_id": "modelcontextprotocol/servers",
        "category": "mcp_server",
        "impact_score": 107,
        "reasons": [
          "Repository is a collection and may need curation semantics review."
        ],
        "suggested_action": "Confirm collection scope, freshness, and whether the category should represent resources rather than runtime code."
      },
      {
        "project_id": "n8n-io/self-hosted-ai-starter-kit",
        "category": "ai_app_template",
        "impact_score": 48,
        "reasons": [
          "Repository is a collection and may need curation semantics review."
        ],
        "suggested_action": "Confirm collection scope, freshness, and whether the category should represent resources rather than runtime code."
      },
      {
        "project_id": "vonzosten/awesome-LangGraph",
        "category": "ai_app_template",
        "impact_score": 40,
        "reasons": [
          "Repository is a collection and may need curation semantics review."
        ],
        "suggested_action": "Confirm collection scope, freshness, and whether the category should represent resources rather than runtime code."
      },
      {
        "project_id": "arabicapp/everything-claude-code",
        "category": "agent_framework",
        "impact_score": 32,
        "reasons": [
          "Repository is a collection and may need curation semantics review."
        ],
        "suggested_action": "Confirm collection scope, freshness, and whether the category should represent resources rather than runtime code."
      },
      {
        "project_id": "GoDiao/Free-Way",
        "category": "coding_agent",
        "impact_score": 32,
        "reasons": [
          "Repository is a collection and may need curation semantics review."
        ],
        "suggested_action": "Confirm collection scope, freshness, and whether the category should represent resources rather than runtime code."
      }
    ]
  },
  "known_limitations": [
    "Eval quality is a CI-safe baseline over curated cases and generated fixtures; it is not a live benchmark over every GitHub repository.",
    "Top-1 misses remain for prompt-tooling and coding-agent search cases; use top-3 health when evaluating shortlist quality.",
    "Quality signal confidence can be snapshot, partial, estimated, or unknown depending on available GitHub sync depth.",
    "Current data trust risk is high; inspect quality and review queue details before high-confidence claims.",
    "12 projects are in the low-confidence review queue and may need classification or collection-semantics review."
  ],
  "recommended_use": [
    "Use top_3_hit_rate, category_accuracy, and deployment_accuracy for current recommendation health.",
    "Use explanation coverage to decide whether agent-facing responses carry enough evidence to cite.",
    "Use data_trust_score, risk_level, and review_queue before making high-confidence production recommendations.",
    "Use require_d1=true for workflows that should fail closed instead of using seed fallback."
  ],
  "links": {
    "html": "/benchmark",
    "api": "/api/benchmark",
    "quality": "/api/quality",
    "review": "/api/quality/review",
    "trust": "/api/trust",
    "coverage": "/coverage",
    "eval_quality": "https://github.com/haocn-ops/git-top/blob/main/docs/EVAL_QUALITY.md",
    "eval_explanations": "https://github.com/haocn-ops/git-top/blob/main/docs/EVAL_EXPLANATIONS.md"
  },
  "metadata": {
    "source": "d1",
    "reason": "d1_query",
    "project_count": 770,
    "generated_at": "2026-07-04T06:31:21.558Z",
    "loaded_project_limit": 2000,
    "truncated": false
  }
}