init: add source code from src.zip

2026-03-31 01:55:58 -07:00
commit f5a40b86de
1902 changed files with 513237 additions and 0 deletions
--- a/src/services/analytics/metadata.ts
+++ b/src/services/analytics/metadata.ts
@@ -0,0 +1,973 @@
+// biome-ignore-all assist/source/organizeImports: ANT-ONLY import markers must not be reordered
+/**
+ * Shared event metadata enrichment for analytics systems
+ *
+ * This module provides a single source of truth for collecting and formatting
+ * event metadata across all analytics systems (Datadog, 1P).
+ */
+
+import { extname } from 'path'
+import memoize from 'lodash-es/memoize.js'
+import { env, getHostPlatformForAnalytics } from '../../utils/env.js'
+import { envDynamic } from '../../utils/envDynamic.js'
+import { getModelBetas } from '../../utils/betas.js'
+import { getMainLoopModel } from '../../utils/model/model.js'
+import {
+  getSessionId,
+  getIsInteractive,
+  getKairosActive,
+  getClientType,
+  getParentSessionId as getParentSessionIdFromState,
+} from '../../bootstrap/state.js'
+import { isEnvTruthy } from '../../utils/envUtils.js'
+import { isOfficialMcpUrl } from '../mcp/officialRegistry.js'
+import { isClaudeAISubscriber, getSubscriptionType } from '../../utils/auth.js'
+import { getRepoRemoteHash } from '../../utils/git.js'
+import {
+  getWslVersion,
+  getLinuxDistroInfo,
+  detectVcs,
+} from '../../utils/platform.js'
+import type { CoreUserData } from 'src/utils/user.js'
+import { getAgentContext } from '../../utils/agentContext.js'
+import type { EnvironmentMetadata } from '../../types/generated/events_mono/claude_code/v1/claude_code_internal_event.js'
+import type { PublicApiAuth } from '../../types/generated/events_mono/common/v1/auth.js'
+import { jsonStringify } from '../../utils/slowOperations.js'
+import {
+  getAgentId,
+  getParentSessionId as getTeammateParentSessionId,
+  getTeamName,
+  isTeammate,
+} from '../../utils/teammate.js'
+import { feature } from 'bun:bundle'
+
+/**
+ * Marker type for verifying analytics metadata doesn't contain sensitive data
+ *
+ * This type forces explicit verification that string values being logged
+ * don't contain code snippets, file paths, or other sensitive information.
+ *
+ * The metadata is expected to be JSON-serializable.
+ *
+ * Usage: `myString as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS`
+ *
+ * The type is `never` which means it can never actually hold a value - this is
+ * intentional as it's only used for type-casting to document developer intent.
+ */
+export type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS = never
+
+/**
+ * Sanitizes tool names for analytics logging to avoid PII exposure.
+ *
+ * MCP tool names follow the format `mcp__<server>__<tool>` and can reveal
+ * user-specific server configurations, which is considered PII-medium.
+ * This function redacts MCP tool names while preserving built-in tool names
+ * (Bash, Read, Write, etc.) which are safe to log.
+ *
+ * @param toolName - The tool name to sanitize
+ * @returns The original name for built-in tools, or 'mcp_tool' for MCP tools
+ */
+export function sanitizeToolNameForAnalytics(
+  toolName: string,
+): AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS {
+  if (toolName.startsWith('mcp__')) {
+    return 'mcp_tool' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
+  }
+  return toolName as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
+}
+
+/**
+ * Check if detailed tool name logging is enabled for OTLP events.
+ * When enabled, MCP server/tool names and Skill names are logged.
+ * Disabled by default to protect PII (user-specific server configurations).
+ *
+ * Enable with OTEL_LOG_TOOL_DETAILS=1
+ */
+export function isToolDetailsLoggingEnabled(): boolean {
+  return isEnvTruthy(process.env.OTEL_LOG_TOOL_DETAILS)
+}
+
+/**
+ * Check if detailed tool name logging (MCP server/tool names) is enabled
+ * for analytics events.
+ *
+ * Per go/taxonomy, MCP names are medium PII. We log them for:
+ * - Cowork (entrypoint=local-agent) — no ZDR concept, log all MCPs
+ * - claude.ai-proxied connectors — always official (from claude.ai's list)
+ * - Servers whose URL matches the official MCP registry — directory
+ *   connectors added via `claude mcp add`, not customer-specific config
+ *
+ * Custom/user-configured MCPs stay sanitized (toolName='mcp_tool').
+ */
+export function isAnalyticsToolDetailsLoggingEnabled(
+  mcpServerType: string | undefined,
+  mcpServerBaseUrl: string | undefined,
+): boolean {
+  if (process.env.CLAUDE_CODE_ENTRYPOINT === 'local-agent') {
+    return true
+  }
+  if (mcpServerType === 'claudeai-proxy') {
+    return true
+  }
+  if (mcpServerBaseUrl && isOfficialMcpUrl(mcpServerBaseUrl)) {
+    return true
+  }
+  return false
+}
+
+/**
+ * Built-in first-party MCP servers whose names are fixed reserved strings,
+ * not user-configured — so logging them is not PII. Checked in addition to
+ * isAnalyticsToolDetailsLoggingEnabled's transport/URL gates, which a stdio
+ * built-in would otherwise fail.
+ *
+ * Feature-gated so the set is empty when the feature is off: the name
+ * reservation (main.tsx, config.ts addMcpServer) is itself feature-gated, so
+ * a user-configured 'computer-use' is possible in builds without the feature.
+ */
+/* eslint-disable @typescript-eslint/no-require-imports */
+const BUILTIN_MCP_SERVER_NAMES: ReadonlySet<string> = new Set(
+  feature('CHICAGO_MCP')
+    ? [
+        (
+          require('../../utils/computerUse/common.js') as typeof import('../../utils/computerUse/common.js')
+        ).COMPUTER_USE_MCP_SERVER_NAME,
+      ]
+    : [],
+)
+/* eslint-enable @typescript-eslint/no-require-imports */
+
+/**
+ * Spreadable helper for logEvent payloads — returns {mcpServerName, mcpToolName}
+ * if the gate passes, empty object otherwise. Consolidates the identical IIFE
+ * pattern at each tengu_tool_use_* call site.
+ */
+export function mcpToolDetailsForAnalytics(
+  toolName: string,
+  mcpServerType: string | undefined,
+  mcpServerBaseUrl: string | undefined,
+): {
+  mcpServerName?: AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
+  mcpToolName?: AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
+} {
+  const details = extractMcpToolDetails(toolName)
+  if (!details) {
+    return {}
+  }
+  if (
+    !BUILTIN_MCP_SERVER_NAMES.has(details.serverName) &&
+    !isAnalyticsToolDetailsLoggingEnabled(mcpServerType, mcpServerBaseUrl)
+  ) {
+    return {}
+  }
+  return {
+    mcpServerName: details.serverName,
+    mcpToolName: details.mcpToolName,
+  }
+}
+
+/**
+ * Extract MCP server and tool names from a full MCP tool name.
+ * MCP tool names follow the format: mcp__<server>__<tool>
+ *
+ * @param toolName - The full tool name (e.g., 'mcp__slack__read_channel')
+ * @returns Object with serverName and toolName, or undefined if not an MCP tool
+ */
+export function extractMcpToolDetails(toolName: string):
+  | {
+      serverName: AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
+      mcpToolName: AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
+    }
+  | undefined {
+  if (!toolName.startsWith('mcp__')) {
+    return undefined
+  }
+
+  // Format: mcp__<server>__<tool>
+  const parts = toolName.split('__')
+  if (parts.length < 3) {
+    return undefined
+  }
+
+  const serverName = parts[1]
+  // Tool name may contain __ so rejoin remaining parts
+  const mcpToolName = parts.slice(2).join('__')
+
+  if (!serverName || !mcpToolName) {
+    return undefined
+  }
+
+  return {
+    serverName:
+      serverName as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+    mcpToolName:
+      mcpToolName as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+  }
+}
+
+/**
+ * Extract skill name from Skill tool input.
+ *
+ * @param toolName - The tool name (should be 'Skill')
+ * @param input - The tool input containing the skill name
+ * @returns The skill name if this is a Skill tool call, undefined otherwise
+ */
+export function extractSkillName(
+  toolName: string,
+  input: unknown,
+): AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS | undefined {
+  if (toolName !== 'Skill') {
+    return undefined
+  }
+
+  if (
+    typeof input === 'object' &&
+    input !== null &&
+    'skill' in input &&
+    typeof (input as { skill: unknown }).skill === 'string'
+  ) {
+    return (input as { skill: string })
+      .skill as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
+  }
+
+  return undefined
+}
+
+const TOOL_INPUT_STRING_TRUNCATE_AT = 512
+const TOOL_INPUT_STRING_TRUNCATE_TO = 128
+const TOOL_INPUT_MAX_JSON_CHARS = 4 * 1024
+const TOOL_INPUT_MAX_COLLECTION_ITEMS = 20
+const TOOL_INPUT_MAX_DEPTH = 2
+
+function truncateToolInputValue(value: unknown, depth = 0): unknown {
+  if (typeof value === 'string') {
+    if (value.length > TOOL_INPUT_STRING_TRUNCATE_AT) {
+      return `${value.slice(0, TOOL_INPUT_STRING_TRUNCATE_TO)}…[${value.length} chars]`
+    }
+    return value
+  }
+  if (
+    typeof value === 'number' ||
+    typeof value === 'boolean' ||
+    value === null ||
+    value === undefined
+  ) {
+    return value
+  }
+  if (depth >= TOOL_INPUT_MAX_DEPTH) {
+    return '<nested>'
+  }
+  if (Array.isArray(value)) {
+    const mapped = value
+      .slice(0, TOOL_INPUT_MAX_COLLECTION_ITEMS)
+      .map(v => truncateToolInputValue(v, depth + 1))
+    if (value.length > TOOL_INPUT_MAX_COLLECTION_ITEMS) {
+      mapped.push(`…[${value.length} items]`)
+    }
+    return mapped
+  }
+  if (typeof value === 'object') {
+    const entries = Object.entries(value as Record<string, unknown>)
+      // Skip internal marker keys (e.g. _simulatedSedEdit re-introduced by
+      // SedEditPermissionRequest) so they don't leak into telemetry.
+      .filter(([k]) => !k.startsWith('_'))
+    const mapped = entries
+      .slice(0, TOOL_INPUT_MAX_COLLECTION_ITEMS)
+      .map(([k, v]) => [k, truncateToolInputValue(v, depth + 1)])
+    if (entries.length > TOOL_INPUT_MAX_COLLECTION_ITEMS) {
+      mapped.push(['…', `${entries.length} keys`])
+    }
+    return Object.fromEntries(mapped)
+  }
+  return String(value)
+}
+
+/**
+ * Serialize a tool's input arguments for the OTel tool_result event.
+ * Truncates long strings and deep nesting to keep the output bounded while
+ * preserving forensically useful fields like file paths, URLs, and MCP args.
+ * Returns undefined when OTEL_LOG_TOOL_DETAILS is not enabled.
+ */
+export function extractToolInputForTelemetry(
+  input: unknown,
+): string | undefined {
+  if (!isToolDetailsLoggingEnabled()) {
+    return undefined
+  }
+  const truncated = truncateToolInputValue(input)
+  let json = jsonStringify(truncated)
+  if (json.length > TOOL_INPUT_MAX_JSON_CHARS) {
+    json = json.slice(0, TOOL_INPUT_MAX_JSON_CHARS) + '…[truncated]'
+  }
+  return json
+}
+
+/**
+ * Maximum length for file extensions to be logged.
+ * Extensions longer than this are considered potentially sensitive
+ * (e.g., hash-based filenames like "key-hash-abcd-123-456") and
+ * will be replaced with 'other'.
+ */
+const MAX_FILE_EXTENSION_LENGTH = 10
+
+/**
+ * Extracts and sanitizes a file extension for analytics logging.
+ *
+ * Uses Node's path.extname for reliable cross-platform extension extraction.
+ * Returns 'other' for extensions exceeding MAX_FILE_EXTENSION_LENGTH to avoid
+ * logging potentially sensitive data (like hash-based filenames).
+ *
+ * @param filePath - The file path to extract the extension from
+ * @returns The sanitized extension, 'other' for long extensions, or undefined if no extension
+ */
+export function getFileExtensionForAnalytics(
+  filePath: string,
+): AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS | undefined {
+  const ext = extname(filePath).toLowerCase()
+  if (!ext || ext === '.') {
+    return undefined
+  }
+
+  const extension = ext.slice(1) // remove leading dot
+  if (extension.length > MAX_FILE_EXTENSION_LENGTH) {
+    return 'other' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
+  }
+
+  return extension as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
+}
+
+/** Allow list of commands we extract file extensions from. */
+const FILE_COMMANDS = new Set([
+  'rm',
+  'mv',
+  'cp',
+  'touch',
+  'mkdir',
+  'chmod',
+  'chown',
+  'cat',
+  'head',
+  'tail',
+  'sort',
+  'stat',
+  'diff',
+  'wc',
+  'grep',
+  'rg',
+  'sed',
+])
+
+/** Regex to split bash commands on compound operators (&&, ||, ;, |). */
+const COMPOUND_OPERATOR_REGEX = /\s*(?:&&|\|\||[;|])\s*/
+
+/** Regex to split on whitespace. */
+const WHITESPACE_REGEX = /\s+/
+
+/**
+ * Extracts file extensions from a bash command for analytics.
+ * Best-effort: splits on operators and whitespace, extracts extensions
+ * from non-flag args of allowed commands. No heavy shell parsing needed
+ * because grep patterns and sed scripts rarely resemble file extensions.
+ */
+export function getFileExtensionsFromBashCommand(
+  command: string,
+  simulatedSedEditFilePath?: string,
+): AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS | undefined {
+  if (!command.includes('.') && !simulatedSedEditFilePath) return undefined
+
+  let result: string | undefined
+  const seen = new Set<string>()
+
+  if (simulatedSedEditFilePath) {
+    const ext = getFileExtensionForAnalytics(simulatedSedEditFilePath)
+    if (ext) {
+      seen.add(ext)
+      result = ext
+    }
+  }
+
+  for (const subcmd of command.split(COMPOUND_OPERATOR_REGEX)) {
+    if (!subcmd) continue
+    const tokens = subcmd.split(WHITESPACE_REGEX)
+    if (tokens.length < 2) continue
+
+    const firstToken = tokens[0]!
+    const slashIdx = firstToken.lastIndexOf('/')
+    const baseCmd = slashIdx >= 0 ? firstToken.slice(slashIdx + 1) : firstToken
+    if (!FILE_COMMANDS.has(baseCmd)) continue
+
+    for (let i = 1; i < tokens.length; i++) {
+      const arg = tokens[i]!
+      if (arg.charCodeAt(0) === 45 /* - */) continue
+      const ext = getFileExtensionForAnalytics(arg)
+      if (ext && !seen.has(ext)) {
+        seen.add(ext)
+        result = result ? result + ',' + ext : ext
+      }
+    }
+  }
+
+  if (!result) return undefined
+  return result as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
+}
+
+/**
+ * Environment context metadata
+ */
+export type EnvContext = {
+  platform: string
+  platformRaw: string
+  arch: string
+  nodeVersion: string
+  terminal: string | null
+  packageManagers: string
+  runtimes: string
+  isRunningWithBun: boolean
+  isCi: boolean
+  isClaubbit: boolean
+  isClaudeCodeRemote: boolean
+  isLocalAgentMode: boolean
+  isConductor: boolean
+  remoteEnvironmentType?: string
+  coworkerType?: string
+  claudeCodeContainerId?: string
+  claudeCodeRemoteSessionId?: string
+  tags?: string
+  isGithubAction: boolean
+  isClaudeCodeAction: boolean
+  isClaudeAiAuth: boolean
+  version: string
+  versionBase?: string
+  buildTime: string
+  deploymentEnvironment: string
+  githubEventName?: string
+  githubActionsRunnerEnvironment?: string
+  githubActionsRunnerOs?: string
+  githubActionRef?: string
+  wslVersion?: string
+  linuxDistroId?: string
+  linuxDistroVersion?: string
+  linuxKernel?: string
+  vcs?: string
+}
+
+/**
+ * Process metrics included with all analytics events.
+ */
+export type ProcessMetrics = {
+  uptime: number
+  rss: number
+  heapTotal: number
+  heapUsed: number
+  external: number
+  arrayBuffers: number
+  constrainedMemory: number | undefined
+  cpuUsage: NodeJS.CpuUsage
+  cpuPercent: number | undefined
+}
+
+/**
+ * Core event metadata shared across all analytics systems
+ */
+export type EventMetadata = {
+  model: string
+  sessionId: string
+  userType: string
+  betas?: string
+  envContext: EnvContext
+  entrypoint?: string
+  agentSdkVersion?: string
+  isInteractive: string
+  clientType: string
+  processMetrics?: ProcessMetrics
+  sweBenchRunId: string
+  sweBenchInstanceId: string
+  sweBenchTaskId: string
+  // Swarm/team agent identification for analytics attribution
+  agentId?: string // CLAUDE_CODE_AGENT_ID (format: agentName@teamName) or subagent UUID
+  parentSessionId?: string // CLAUDE_CODE_PARENT_SESSION_ID (team lead's session)
+  agentType?: 'teammate' | 'subagent' | 'standalone' // Distinguishes swarm teammates, Agent tool subagents, and standalone agents
+  teamName?: string // Team name for swarm agents (from env var or AsyncLocalStorage)
+  subscriptionType?: string // OAuth subscription tier (max, pro, enterprise, team)
+  rh?: string // Hashed repo remote URL (first 16 chars of SHA256), for joining with server-side data
+  kairosActive?: true // KAIROS assistant mode active (ant-only; set in main.tsx after gate check)
+  skillMode?: 'discovery' | 'coach' | 'discovery_and_coach' // Which skill surfacing mechanism(s) are gated on (ant-only; for BQ session segmentation)
+  observerMode?: 'backseat' | 'skillcoach' | 'both' // Which observer classifiers are gated on (ant-only; for BQ cohort splits on tengu_backseat_* events)
+}
+
+/**
+ * Options for enriching event metadata
+ */
+export type EnrichMetadataOptions = {
+  // Model to use, falls back to getMainLoopModel() if not provided
+  model?: unknown
+  // Explicit betas string (already joined)
+  betas?: unknown
+  // Additional metadata to include (optional)
+  additionalMetadata?: Record<string, unknown>
+}
+
+/**
+ * Get agent identification for analytics.
+ * Priority: AsyncLocalStorage context (subagents) > env vars (swarm teammates)
+ */
+function getAgentIdentification(): {
+  agentId?: string
+  parentSessionId?: string
+  agentType?: 'teammate' | 'subagent' | 'standalone'
+  teamName?: string
+} {
+  // Check AsyncLocalStorage first (for subagents running in same process)
+  const agentContext = getAgentContext()
+  if (agentContext) {
+    const result: ReturnType<typeof getAgentIdentification> = {
+      agentId: agentContext.agentId,
+      parentSessionId: agentContext.parentSessionId,
+      agentType: agentContext.agentType,
+    }
+    if (agentContext.agentType === 'teammate') {
+      result.teamName = agentContext.teamName
+    }
+    return result
+  }
+
+  // Fall back to swarm helpers (for swarm agents)
+  const agentId = getAgentId()
+  const parentSessionId = getTeammateParentSessionId()
+  const teamName = getTeamName()
+  const isSwarmAgent = isTeammate()
+  // For standalone agents (have agent ID but not a teammate), set agentType to 'standalone'
+  const agentType = isSwarmAgent
+    ? ('teammate' as const)
+    : agentId
+      ? ('standalone' as const)
+      : undefined
+  if (agentId || agentType || parentSessionId || teamName) {
+    return {
+      ...(agentId ? { agentId } : {}),
+      ...(agentType ? { agentType } : {}),
+      ...(parentSessionId ? { parentSessionId } : {}),
+      ...(teamName ? { teamName } : {}),
+    }
+  }
+
+  // Check bootstrap state for parent session ID (e.g., plan mode -> implementation)
+  const stateParentSessionId = getParentSessionIdFromState()
+  if (stateParentSessionId) {
+    return { parentSessionId: stateParentSessionId }
+  }
+
+  return {}
+}
+
+/**
+ * Extract base version from full version string. "2.0.36-dev.20251107.t174150.sha2709699" → "2.0.36-dev"
+ */
+const getVersionBase = memoize((): string | undefined => {
+  const match = MACRO.VERSION.match(/^\d+\.\d+\.\d+(?:-[a-z]+)?/)
+  return match ? match[0] : undefined
+})
+
+/**
+ * Builds the environment context object
+ */
+const buildEnvContext = memoize(async (): Promise<EnvContext> => {
+  const [packageManagers, runtimes, linuxDistroInfo, vcs] = await Promise.all([
+    env.getPackageManagers(),
+    env.getRuntimes(),
+    getLinuxDistroInfo(),
+    detectVcs(),
+  ])
+
+  return {
+    platform: getHostPlatformForAnalytics(),
+    // Raw process.platform so freebsd/openbsd/aix/sunos are visible in BQ.
+    // getHostPlatformForAnalytics() buckets those into 'linux'; here we want
+    // the truth. CLAUDE_CODE_HOST_PLATFORM still overrides for container/remote.
+    platformRaw: process.env.CLAUDE_CODE_HOST_PLATFORM || process.platform,
+    arch: env.arch,
+    nodeVersion: env.nodeVersion,
+    terminal: envDynamic.terminal,
+    packageManagers: packageManagers.join(','),
+    runtimes: runtimes.join(','),
+    isRunningWithBun: env.isRunningWithBun(),
+    isCi: isEnvTruthy(process.env.CI),
+    isClaubbit: isEnvTruthy(process.env.CLAUBBIT),
+    isClaudeCodeRemote: isEnvTruthy(process.env.CLAUDE_CODE_REMOTE),
+    isLocalAgentMode: process.env.CLAUDE_CODE_ENTRYPOINT === 'local-agent',
+    isConductor: env.isConductor(),
+    ...(process.env.CLAUDE_CODE_REMOTE_ENVIRONMENT_TYPE && {
+      remoteEnvironmentType: process.env.CLAUDE_CODE_REMOTE_ENVIRONMENT_TYPE,
+    }),
+    // Gated by feature flag to prevent leaking "coworkerType" string in external builds
+    ...(feature('COWORKER_TYPE_TELEMETRY')
+      ? process.env.CLAUDE_CODE_COWORKER_TYPE
+        ? { coworkerType: process.env.CLAUDE_CODE_COWORKER_TYPE }
+        : {}
+      : {}),
+    ...(process.env.CLAUDE_CODE_CONTAINER_ID && {
+      claudeCodeContainerId: process.env.CLAUDE_CODE_CONTAINER_ID,
+    }),
+    ...(process.env.CLAUDE_CODE_REMOTE_SESSION_ID && {
+      claudeCodeRemoteSessionId: process.env.CLAUDE_CODE_REMOTE_SESSION_ID,
+    }),
+    ...(process.env.CLAUDE_CODE_TAGS && {
+      tags: process.env.CLAUDE_CODE_TAGS,
+    }),
+    isGithubAction: isEnvTruthy(process.env.GITHUB_ACTIONS),
+    isClaudeCodeAction: isEnvTruthy(process.env.CLAUDE_CODE_ACTION),
+    isClaudeAiAuth: isClaudeAISubscriber(),
+    version: MACRO.VERSION,
+    versionBase: getVersionBase(),
+    buildTime: MACRO.BUILD_TIME,
+    deploymentEnvironment: env.detectDeploymentEnvironment(),
+    ...(isEnvTruthy(process.env.GITHUB_ACTIONS) && {
+      githubEventName: process.env.GITHUB_EVENT_NAME,
+      githubActionsRunnerEnvironment: process.env.RUNNER_ENVIRONMENT,
+      githubActionsRunnerOs: process.env.RUNNER_OS,
+      githubActionRef: process.env.GITHUB_ACTION_PATH?.includes(
+        'claude-code-action/',
+      )
+        ? process.env.GITHUB_ACTION_PATH.split('claude-code-action/')[1]
+        : undefined,
+    }),
+    ...(getWslVersion() && { wslVersion: getWslVersion() }),
+    ...(linuxDistroInfo ?? {}),
+    ...(vcs.length > 0 ? { vcs: vcs.join(',') } : {}),
+  }
+})
+
+// --
+// CPU% delta tracking — inherently process-global, same pattern as logBatch/flushTimer in datadog.ts
+let prevCpuUsage: NodeJS.CpuUsage | null = null
+let prevWallTimeMs: number | null = null
+
+/**
+ * Builds process metrics object for all users.
+ */
+function buildProcessMetrics(): ProcessMetrics | undefined {
+  try {
+    const mem = process.memoryUsage()
+    const cpu = process.cpuUsage()
+    const now = Date.now()
+
+    let cpuPercent: number | undefined
+    if (prevCpuUsage && prevWallTimeMs) {
+      const wallDeltaMs = now - prevWallTimeMs
+      if (wallDeltaMs > 0) {
+        const userDeltaUs = cpu.user - prevCpuUsage.user
+        const systemDeltaUs = cpu.system - prevCpuUsage.system
+        cpuPercent =
+          ((userDeltaUs + systemDeltaUs) / (wallDeltaMs * 1000)) * 100
+      }
+    }
+    prevCpuUsage = cpu
+    prevWallTimeMs = now
+
+    return {
+      uptime: process.uptime(),
+      rss: mem.rss,
+      heapTotal: mem.heapTotal,
+      heapUsed: mem.heapUsed,
+      external: mem.external,
+      arrayBuffers: mem.arrayBuffers,
+      // eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
+      constrainedMemory: process.constrainedMemory(),
+      cpuUsage: cpu,
+      cpuPercent,
+    }
+  } catch {
+    return undefined
+  }
+}
+
+/**
+ * Get core event metadata shared across all analytics systems.
+ *
+ * This function collects environment, runtime, and context information
+ * that should be included with all analytics events.
+ *
+ * @param options - Configuration options
+ * @returns Promise resolving to enriched metadata object
+ */
+export async function getEventMetadata(
+  options: EnrichMetadataOptions = {},
+): Promise<EventMetadata> {
+  const model = options.model ? String(options.model) : getMainLoopModel()
+  const betas =
+    typeof options.betas === 'string'
+      ? options.betas
+      : getModelBetas(model).join(',')
+  const [envContext, repoRemoteHash] = await Promise.all([
+    buildEnvContext(),
+    getRepoRemoteHash(),
+  ])
+  const processMetrics = buildProcessMetrics()
+
+  const metadata: EventMetadata = {
+    model,
+    sessionId: getSessionId(),
+    userType: process.env.USER_TYPE || '',
+    ...(betas.length > 0 ? { betas: betas } : {}),
+    envContext,
+    ...(process.env.CLAUDE_CODE_ENTRYPOINT && {
+      entrypoint: process.env.CLAUDE_CODE_ENTRYPOINT,
+    }),
+    ...(process.env.CLAUDE_AGENT_SDK_VERSION && {
+      agentSdkVersion: process.env.CLAUDE_AGENT_SDK_VERSION,
+    }),
+    isInteractive: String(getIsInteractive()),
+    clientType: getClientType(),
+    ...(processMetrics && { processMetrics }),
+    sweBenchRunId: process.env.SWE_BENCH_RUN_ID || '',
+    sweBenchInstanceId: process.env.SWE_BENCH_INSTANCE_ID || '',
+    sweBenchTaskId: process.env.SWE_BENCH_TASK_ID || '',
+    // Swarm/team agent identification
+    // Priority: AsyncLocalStorage context (subagents) > env vars (swarm teammates)
+    ...getAgentIdentification(),
+    // Subscription tier for DAU-by-tier analytics
+    ...(getSubscriptionType() && {
+      subscriptionType: getSubscriptionType()!,
+    }),
+    // Assistant mode tag — lives outside memoized buildEnvContext() because
+    // setKairosActive() runs at main.tsx:~1648, after the first event may
+    // have already fired and memoized the env. Read fresh per-event instead.
+    ...(feature('KAIROS') && getKairosActive()
+      ? { kairosActive: true as const }
+      : {}),
+    // Repo remote hash for joining with server-side repo bundle data
+    ...(repoRemoteHash && { rh: repoRemoteHash }),
+  }
+
+  return metadata
+}
+
+
+/**
+ * Core event metadata for 1P event logging (snake_case format).
+ */
+export type FirstPartyEventLoggingCoreMetadata = {
+  session_id: string
+  model: string
+  user_type: string
+  betas?: string
+  entrypoint?: string
+  agent_sdk_version?: string
+  is_interactive: boolean
+  client_type: string
+  swe_bench_run_id?: string
+  swe_bench_instance_id?: string
+  swe_bench_task_id?: string
+  // Swarm/team agent identification
+  agent_id?: string
+  parent_session_id?: string
+  agent_type?: 'teammate' | 'subagent' | 'standalone'
+  team_name?: string
+}
+
+/**
+ * Complete event logging metadata format for 1P events.
+ */
+export type FirstPartyEventLoggingMetadata = {
+  env: EnvironmentMetadata
+  process?: string
+  // auth is a top-level field on ClaudeCodeInternalEvent (proto PublicApiAuth).
+  // account_id is intentionally omitted — only UUID fields are populated client-side.
+  auth?: PublicApiAuth
+  // core fields correspond to the top level of ClaudeCodeInternalEvent.
+  // They get directly exported to their individual columns in the BigQuery tables
+  core: FirstPartyEventLoggingCoreMetadata
+  // additional fields are populated in the additional_metadata field of the
+  // ClaudeCodeInternalEvent proto. Includes but is not limited to information
+  // that differs by event type.
+  additional: Record<string, unknown>
+}
+
+/**
+ * Convert metadata to 1P event logging format (snake_case fields).
+ *
+ * The /api/event_logging/batch endpoint expects snake_case field names
+ * for environment and core metadata.
+ *
+ * @param metadata - Core event metadata
+ * @param additionalMetadata - Additional metadata to include
+ * @returns Metadata formatted for 1P event logging
+ */
+export function to1PEventFormat(
+  metadata: EventMetadata,
+  userMetadata: CoreUserData,
+  additionalMetadata: Record<string, unknown> = {},
+): FirstPartyEventLoggingMetadata {
+  const {
+    envContext,
+    processMetrics,
+    rh,
+    kairosActive,
+    skillMode,
+    observerMode,
+    ...coreFields
+  } = metadata
+
+  // Convert envContext to snake_case.
+  // IMPORTANT: env is typed as the proto-generated EnvironmentMetadata so that
+  // adding a field here that the proto doesn't define is a compile error. The
+  // generated toJSON() serializer silently drops unknown keys — a hand-written
+  // parallel type previously let #11318, #13924, #19448, and coworker_type all
+  // ship fields that never reached BQ.
+  // Adding a field? Update the monorepo proto first (go/cc-logging):
+  //   event_schemas/.../claude_code/v1/claude_code_internal_event.proto
+  // then run `bun run generate:proto` here.
+  const env: EnvironmentMetadata = {
+    platform: envContext.platform,
+    platform_raw: envContext.platformRaw,
+    arch: envContext.arch,
+    node_version: envContext.nodeVersion,
+    terminal: envContext.terminal || 'unknown',
+    package_managers: envContext.packageManagers,
+    runtimes: envContext.runtimes,
+    is_running_with_bun: envContext.isRunningWithBun,
+    is_ci: envContext.isCi,
+    is_claubbit: envContext.isClaubbit,
+    is_claude_code_remote: envContext.isClaudeCodeRemote,
+    is_local_agent_mode: envContext.isLocalAgentMode,
+    is_conductor: envContext.isConductor,
+    is_github_action: envContext.isGithubAction,
+    is_claude_code_action: envContext.isClaudeCodeAction,
+    is_claude_ai_auth: envContext.isClaudeAiAuth,
+    version: envContext.version,
+    build_time: envContext.buildTime,
+    deployment_environment: envContext.deploymentEnvironment,
+  }
+
+  // Add optional env fields
+  if (envContext.remoteEnvironmentType) {
+    env.remote_environment_type = envContext.remoteEnvironmentType
+  }
+  if (feature('COWORKER_TYPE_TELEMETRY') && envContext.coworkerType) {
+    env.coworker_type = envContext.coworkerType
+  }
+  if (envContext.claudeCodeContainerId) {
+    env.claude_code_container_id = envContext.claudeCodeContainerId
+  }
+  if (envContext.claudeCodeRemoteSessionId) {
+    env.claude_code_remote_session_id = envContext.claudeCodeRemoteSessionId
+  }
+  if (envContext.tags) {
+    env.tags = envContext.tags
+      .split(',')
+      .map(t => t.trim())
+      .filter(Boolean)
+  }
+  if (envContext.githubEventName) {
+    env.github_event_name = envContext.githubEventName
+  }
+  if (envContext.githubActionsRunnerEnvironment) {
+    env.github_actions_runner_environment =
+      envContext.githubActionsRunnerEnvironment
+  }
+  if (envContext.githubActionsRunnerOs) {
+    env.github_actions_runner_os = envContext.githubActionsRunnerOs
+  }
+  if (envContext.githubActionRef) {
+    env.github_action_ref = envContext.githubActionRef
+  }
+  if (envContext.wslVersion) {
+    env.wsl_version = envContext.wslVersion
+  }
+  if (envContext.linuxDistroId) {
+    env.linux_distro_id = envContext.linuxDistroId
+  }
+  if (envContext.linuxDistroVersion) {
+    env.linux_distro_version = envContext.linuxDistroVersion
+  }
+  if (envContext.linuxKernel) {
+    env.linux_kernel = envContext.linuxKernel
+  }
+  if (envContext.vcs) {
+    env.vcs = envContext.vcs
+  }
+  if (envContext.versionBase) {
+    env.version_base = envContext.versionBase
+  }
+
+  // Convert core fields to snake_case
+  const core: FirstPartyEventLoggingCoreMetadata = {
+    session_id: coreFields.sessionId,
+    model: coreFields.model,
+    user_type: coreFields.userType,
+    is_interactive: coreFields.isInteractive === 'true',
+    client_type: coreFields.clientType,
+  }
+
+  // Add other core fields
+  if (coreFields.betas) {
+    core.betas = coreFields.betas
+  }
+  if (coreFields.entrypoint) {
+    core.entrypoint = coreFields.entrypoint
+  }
+  if (coreFields.agentSdkVersion) {
+    core.agent_sdk_version = coreFields.agentSdkVersion
+  }
+  if (coreFields.sweBenchRunId) {
+    core.swe_bench_run_id = coreFields.sweBenchRunId
+  }
+  if (coreFields.sweBenchInstanceId) {
+    core.swe_bench_instance_id = coreFields.sweBenchInstanceId
+  }
+  if (coreFields.sweBenchTaskId) {
+    core.swe_bench_task_id = coreFields.sweBenchTaskId
+  }
+  // Swarm/team agent identification
+  if (coreFields.agentId) {
+    core.agent_id = coreFields.agentId
+  }
+  if (coreFields.parentSessionId) {
+    core.parent_session_id = coreFields.parentSessionId
+  }
+  if (coreFields.agentType) {
+    core.agent_type = coreFields.agentType
+  }
+  if (coreFields.teamName) {
+    core.team_name = coreFields.teamName
+  }
+
+  // Map userMetadata to output fields.
+  // Based on src/utils/user.ts getUser(), but with fields present in other
+  // parts of ClaudeCodeInternalEvent deduplicated.
+  // Convert camelCase GitHubActionsMetadata to snake_case for 1P API
+  // Note: github_actions_metadata is placed inside env (EnvironmentMetadata)
+  // rather than at the top level of ClaudeCodeInternalEvent
+  if (userMetadata.githubActionsMetadata) {
+    const ghMeta = userMetadata.githubActionsMetadata
+    env.github_actions_metadata = {
+      actor_id: ghMeta.actorId,
+      repository_id: ghMeta.repositoryId,
+      repository_owner_id: ghMeta.repositoryOwnerId,
+    }
+  }
+
+  let auth: PublicApiAuth | undefined
+  if (userMetadata.accountUuid || userMetadata.organizationUuid) {
+    auth = {
+      account_uuid: userMetadata.accountUuid,
+      organization_uuid: userMetadata.organizationUuid,
+    }
+  }
+
+  return {
+    env,
+    ...(processMetrics && {
+      process: Buffer.from(jsonStringify(processMetrics)).toString('base64'),
+    }),
+    ...(auth && { auth }),
+    core,
+    additional: {
+      ...(rh && { rh }),
+      ...(kairosActive && { is_assistant_mode: true }),
+      ...(skillMode && { skill_mode: skillMode }),
+      ...(observerMode && { observer_mode: observerMode }),
+      ...additionalMetadata,
+    },
+  }
+}