{
  "meta": {
    "version": "1.0",
    "description": "Standardized role taxonomy for the Career Path Explorer. Freetext job titles are normalized to these canonical role buckets. Inspired by real LinkedIn title distributions.",
    "total_canonical_roles": 24,
    "clusters": 5
  },
  "clusters": [
    {
      "id": "technical",
      "label": "Technical / Engineering",
      "color": "#0a66c2",
      "roles": [
        {
          "canonical": "Software Engineer",
          "aliases": ["SWE", "Software Developer", "Backend Engineer", "Frontend Engineer", "Full Stack Engineer", "Sr. Software Engineer", "Staff Engineer", "Principal Engineer"],
          "level_range": ["IC2", "IC3", "IC4", "IC5", "IC6"],
          "median_yoe": 3.5
        },
        {
          "canonical": "ML Engineer",
          "aliases": ["Machine Learning Engineer", "Sr. ML Engineer", "Staff ML Engineer", "MLOps Engineer", "AI Engineer", "ML Platform Engineer"],
          "level_range": ["IC3", "IC4", "IC5", "IC6"],
          "median_yoe": 5.0
        },
        {
          "canonical": "Data Engineer",
          "aliases": ["Sr. Data Engineer", "Analytics Engineer", "Platform Engineer (Data)", "Data Infrastructure Engineer"],
          "level_range": ["IC2", "IC3", "IC4", "IC5"],
          "median_yoe": 4.0
        },
        {
          "canonical": "Backend Engineer",
          "aliases": ["Backend SWE", "Server-side Engineer", "API Engineer", "Platform Engineer"],
          "level_range": ["IC2", "IC3", "IC4"],
          "median_yoe": 3.0
        },
        {
          "canonical": "VP Engineering",
          "aliases": ["VP of Engineering", "SVP Engineering", "Head of Engineering", "VP Software Engineering"],
          "level_range": ["M5", "M6"],
          "median_yoe": 14.0
        },
        {
          "canonical": "CTO",
          "aliases": ["Chief Technology Officer", "Co-Founder & CTO", "Founding CTO"],
          "level_range": ["C-suite"],
          "median_yoe": 18.0
        }
      ]
    },
    {
      "id": "product",
      "label": "Product & Strategy",
      "color": "#057642",
      "roles": [
        {
          "canonical": "Product Manager",
          "aliases": ["PM", "Product Manager", "Sr. Product Manager", "Senior PM", "Group PM", "Technical PM", "APM", "Associate Product Manager"],
          "level_range": ["IC3", "IC4", "IC5"],
          "median_yoe": 5.0
        },
        {
          "canonical": "Product Lead",
          "aliases": ["Product Lead", "Group Product Manager", "Principal PM", "Director of Product"],
          "level_range": ["IC5", "M4"],
          "median_yoe": 9.0
        },
        {
          "canonical": "Chief of Staff",
          "aliases": ["CoS", "Chief of Staff to CEO", "Chief of Staff to CTO", "Strategic Chief of Staff"],
          "level_range": ["M3", "M4"],
          "median_yoe": 8.0
        },
        {
          "canonical": "Strategy & Operations",
          "aliases": ["Strategy & Ops", "Biz Ops", "Business Operations", "Strategic Operations Lead", "Ops Lead", "Head of Strategy"],
          "level_range": ["IC3", "IC4", "M3"],
          "median_yoe": 5.5
        },
        {
          "canonical": "Founder",
          "aliases": ["Co-Founder", "Founder & CEO", "Founder & CTO", "Founder & CPO", "Solo Founder"],
          "level_range": ["Founder"],
          "median_yoe": 8.0
        }
      ]
    },
    {
      "id": "research",
      "label": "Research & Science",
      "color": "#b24020",
      "roles": [
        {
          "canonical": "Research Scientist",
          "aliases": ["Research Scientist", "Sr. Research Scientist", "AI Researcher", "ML Researcher", "Scientist"],
          "level_range": ["IC3", "IC4", "IC5"],
          "median_yoe": 6.0
        },
        {
          "canonical": "Applied Scientist",
          "aliases": ["Applied Scientist", "Sr. Applied Scientist", "Applied ML Scientist", "Applied Research Scientist"],
          "level_range": ["IC4", "IC5"],
          "median_yoe": 7.0
        },
        {
          "canonical": "Research Lead",
          "aliases": ["Research Lead", "Principal Scientist", "Director of Research", "Head of Research", "Research Manager"],
          "level_range": ["IC5", "IC6", "M4"],
          "median_yoe": 11.0
        }
      ]
    },
    {
      "id": "leadership",
      "label": "Leadership & Exec",
      "color": "#7c3aed",
      "roles": [
        {
          "canonical": "Director of Data Science",
          "aliases": ["Director of DS", "Director of Data", "Head of Data Science", "Sr. Director of Data Science"],
          "level_range": ["M4", "M5"],
          "median_yoe": 11.0
        },
        {
          "canonical": "VP of Data",
          "aliases": ["VP Data Science", "VP Analytics & Data", "VP of AI/ML", "Head of Data"],
          "level_range": ["M5", "M6"],
          "median_yoe": 15.0
        },
        {
          "canonical": "Chief Data Officer",
          "aliases": ["CDO", "Chief AI Officer", "CAIO", "Chief Analytics Officer"],
          "level_range": ["C-suite"],
          "median_yoe": 18.0
        }
      ]
    },
    {
      "id": "analytics",
      "label": "Analytics & BI",
      "color": "#b45309",
      "roles": [
        {
          "canonical": "Data Analyst",
          "aliases": ["Analyst", "Data Analyst", "Sr. Data Analyst", "Business Data Analyst", "Marketing Analyst", "Product Analyst"],
          "level_range": ["IC1", "IC2", "IC3"],
          "median_yoe": 2.5
        },
        {
          "canonical": "Data Scientist",
          "aliases": ["Data Scientist", "Sr. Data Scientist", "Staff Data Scientist", "Lead Data Scientist", "Decision Scientist"],
          "level_range": ["IC3", "IC4", "IC5"],
          "median_yoe": 5.0
        },
        {
          "canonical": "BI Engineer",
          "aliases": ["BI Developer", "Business Intelligence Engineer", "BI Analyst", "Reporting Engineer", "Tableau Developer"],
          "level_range": ["IC2", "IC3", "IC4"],
          "median_yoe": 3.5
        },
        {
          "canonical": "Analytics Manager",
          "aliases": ["Manager of Analytics", "Head of Analytics", "Sr. Analytics Manager", "Analytics Lead"],
          "level_range": ["M2", "M3", "M4"],
          "median_yoe": 8.0
        },
        {
          "canonical": "Business Analyst",
          "aliases": ["BA", "Business Analyst", "Sr. Business Analyst", "Systems Analyst", "Process Analyst"],
          "level_range": ["IC1", "IC2", "IC3"],
          "median_yoe": 2.0
        }
      ]
    }
  ],
  "transition_probabilities": {
    "description": "P(to_role | from_role) — estimated from synthetic cohort. Rows are source roles, values are destination probabilities.",
    "data": {
      "Data Analyst": {
        "Data Scientist": 0.32,
        "BI Engineer": 0.18,
        "Analytics Manager": 0.16,
        "Business Analyst": 0.08,
        "Product Manager": 0.12,
        "Data Engineer": 0.08,
        "Strategy & Operations": 0.06
      },
      "Data Scientist": {
        "ML Engineer": 0.24,
        "Product Manager": 0.18,
        "Research Scientist": 0.16,
        "Applied Scientist": 0.14,
        "Director of Data Science": 0.12,
        "Analytics Manager": 0.08,
        "Founder": 0.08
      },
      "Software Engineer": {
        "ML Engineer": 0.22,
        "Data Engineer": 0.16,
        "Product Manager": 0.18,
        "VP Engineering": 0.12,
        "Founder": 0.14,
        "CTO": 0.08,
        "Staff Engineer": 0.10
      },
      "ML Engineer": {
        "Applied Scientist": 0.28,
        "Research Scientist": 0.21,
        "VP Engineering": 0.15,
        "Founder": 0.12,
        "Director of Data Science": 0.10,
        "CTO": 0.14
      },
      "Research Scientist": {
        "Applied Scientist": 0.35,
        "Research Lead": 0.28,
        "ML Engineer": 0.20,
        "Product Manager": 0.10,
        "Founder": 0.07
      },
      "Product Manager": {
        "Product Lead": 0.35,
        "Chief of Staff": 0.18,
        "Founder": 0.20,
        "Director of Data Science": 0.10,
        "VP Engineering": 0.10,
        "Strategy & Operations": 0.07
      }
    }
  },
  "normalization_rules": [
    { "pattern": "^(Sr\\.|Senior|Lead|Principal|Staff)\\s+", "action": "strip_prefix", "note": "Remove seniority prefix before canonical matching" },
    { "pattern": "\\s+(I|II|III|IV|L\\d+)$", "action": "strip_suffix", "note": "Remove level suffixes" },
    { "pattern": "^(Associate|Junior|Jr\\.)\\s+", "action": "strip_prefix", "note": "Entry-level prefix removal" },
    { "pattern": "Machine Learning Engineer", "action": "map_to", "canonical": "ML Engineer" },
    { "pattern": "AI Engineer", "action": "map_to", "canonical": "ML Engineer" },
    { "pattern": "Decision Scientist", "action": "map_to", "canonical": "Data Scientist" },
    { "pattern": "Analytics Engineer", "action": "map_to", "canonical": "Data Engineer" }
  ]
}
