// biome-ignore-all assist/source/organizeImports: ANT-ONLY import markers must not be reordered /** * Shared event metadata enrichment for analytics systems * * This module provides a single source of truth for collecting and formatting * event metadata across all analytics systems (Datadog, 1P). */ import { extname } from 'path' import memoize from 'lodash-es/memoize.js' import { env, getHostPlatformForAnalytics } from '../../utils/env.js' import { envDynamic } from '../../utils/envDynamic.js' import { getModelBetas } from '../../utils/betas.js' import { getMainLoopModel } from '../../utils/model/model.js' import { getSessionId, getIsInteractive, getKairosActive, getClientType, getParentSessionId as getParentSessionIdFromState, } from '../../bootstrap/state.js' import { isEnvTruthy } from '../../utils/envUtils.js' import { isOfficialMcpUrl } from '../mcp/officialRegistry.js' import { isClaudeAISubscriber, getSubscriptionType } from '../../utils/auth.js' import { getRepoRemoteHash } from '../../utils/git.js' import { getWslVersion, getLinuxDistroInfo, detectVcs, } from '../../utils/platform.js' import type { CoreUserData } from 'src/utils/user.js' import { getAgentContext } from '../../utils/agentContext.js' import type { EnvironmentMetadata } from '../../types/generated/events_mono/claude_code/v1/claude_code_internal_event.js' import type { PublicApiAuth } from '../../types/generated/events_mono/common/v1/auth.js' import { jsonStringify } from '../../utils/slowOperations.js' import { getAgentId, getParentSessionId as getTeammateParentSessionId, getTeamName, isTeammate, } from '../../utils/teammate.js' import { feature } from 'bun:bundle' /** * Marker type for verifying analytics metadata doesn't contain sensitive data * * This type forces explicit verification that string values being logged * don't contain code snippets, file paths, or other sensitive information. * * The metadata is expected to be JSON-serializable. * * Usage: `myString as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS` * * The type is `never` which means it can never actually hold a value - this is * intentional as it's only used for type-casting to document developer intent. */ export type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS = never /** * Sanitizes tool names for analytics logging to avoid PII exposure. * * MCP tool names follow the format `mcp____` and can reveal * user-specific server configurations, which is considered PII-medium. * This function redacts MCP tool names while preserving built-in tool names * (Bash, Read, Write, etc.) which are safe to log. * * @param toolName - The tool name to sanitize * @returns The original name for built-in tools, or 'mcp_tool' for MCP tools */ export function sanitizeToolNameForAnalytics( toolName: string, ): AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS { if (toolName.startsWith('mcp__')) { return 'mcp_tool' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS } return toolName as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS } /** * Check if detailed tool name logging is enabled for OTLP events. * When enabled, MCP server/tool names and Skill names are logged. * Disabled by default to protect PII (user-specific server configurations). * * Enable with OTEL_LOG_TOOL_DETAILS=1 */ export function isToolDetailsLoggingEnabled(): boolean { return isEnvTruthy(process.env.OTEL_LOG_TOOL_DETAILS) } /** * Check if detailed tool name logging (MCP server/tool names) is enabled * for analytics events. * * Per go/taxonomy, MCP names are medium PII. We log them for: * - Cowork (entrypoint=local-agent) — no ZDR concept, log all MCPs * - claude.ai-proxied connectors — always official (from claude.ai's list) * - Servers whose URL matches the official MCP registry — directory * connectors added via `claude mcp add`, not customer-specific config * * Custom/user-configured MCPs stay sanitized (toolName='mcp_tool'). */ export function isAnalyticsToolDetailsLoggingEnabled( mcpServerType: string | undefined, mcpServerBaseUrl: string | undefined, ): boolean { if (process.env.CLAUDE_CODE_ENTRYPOINT === 'local-agent') { return true } if (mcpServerType === 'claudeai-proxy') { return true } if (mcpServerBaseUrl && isOfficialMcpUrl(mcpServerBaseUrl)) { return true } return false } /** * Built-in first-party MCP servers whose names are fixed reserved strings, * not user-configured — so logging them is not PII. Checked in addition to * isAnalyticsToolDetailsLoggingEnabled's transport/URL gates, which a stdio * built-in would otherwise fail. * * Feature-gated so the set is empty when the feature is off: the name * reservation (main.tsx, config.ts addMcpServer) is itself feature-gated, so * a user-configured 'computer-use' is possible in builds without the feature. */ /* eslint-disable @typescript-eslint/no-require-imports */ const BUILTIN_MCP_SERVER_NAMES: ReadonlySet = new Set( feature('CHICAGO_MCP') ? [ ( require('../../utils/computerUse/common.js') as typeof import('../../utils/computerUse/common.js') ).COMPUTER_USE_MCP_SERVER_NAME, ] : [], ) /* eslint-enable @typescript-eslint/no-require-imports */ /** * Spreadable helper for logEvent payloads — returns {mcpServerName, mcpToolName} * if the gate passes, empty object otherwise. Consolidates the identical IIFE * pattern at each tengu_tool_use_* call site. */ export function mcpToolDetailsForAnalytics( toolName: string, mcpServerType: string | undefined, mcpServerBaseUrl: string | undefined, ): { mcpServerName?: AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS mcpToolName?: AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS } { const details = extractMcpToolDetails(toolName) if (!details) { return {} } if ( !BUILTIN_MCP_SERVER_NAMES.has(details.serverName) && !isAnalyticsToolDetailsLoggingEnabled(mcpServerType, mcpServerBaseUrl) ) { return {} } return { mcpServerName: details.serverName, mcpToolName: details.mcpToolName, } } /** * Extract MCP server and tool names from a full MCP tool name. * MCP tool names follow the format: mcp____ * * @param toolName - The full tool name (e.g., 'mcp__slack__read_channel') * @returns Object with serverName and toolName, or undefined if not an MCP tool */ export function extractMcpToolDetails(toolName: string): | { serverName: AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS mcpToolName: AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS } | undefined { if (!toolName.startsWith('mcp__')) { return undefined } // Format: mcp____ const parts = toolName.split('__') if (parts.length < 3) { return undefined } const serverName = parts[1] // Tool name may contain __ so rejoin remaining parts const mcpToolName = parts.slice(2).join('__') if (!serverName || !mcpToolName) { return undefined } return { serverName: serverName as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, mcpToolName: mcpToolName as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS, } } /** * Extract skill name from Skill tool input. * * @param toolName - The tool name (should be 'Skill') * @param input - The tool input containing the skill name * @returns The skill name if this is a Skill tool call, undefined otherwise */ export function extractSkillName( toolName: string, input: unknown, ): AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS | undefined { if (toolName !== 'Skill') { return undefined } if ( typeof input === 'object' && input !== null && 'skill' in input && typeof (input as { skill: unknown }).skill === 'string' ) { return (input as { skill: string }) .skill as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS } return undefined } const TOOL_INPUT_STRING_TRUNCATE_AT = 512 const TOOL_INPUT_STRING_TRUNCATE_TO = 128 const TOOL_INPUT_MAX_JSON_CHARS = 4 * 1024 const TOOL_INPUT_MAX_COLLECTION_ITEMS = 20 const TOOL_INPUT_MAX_DEPTH = 2 function truncateToolInputValue(value: unknown, depth = 0): unknown { if (typeof value === 'string') { if (value.length > TOOL_INPUT_STRING_TRUNCATE_AT) { return `${value.slice(0, TOOL_INPUT_STRING_TRUNCATE_TO)}…[${value.length} chars]` } return value } if ( typeof value === 'number' || typeof value === 'boolean' || value === null || value === undefined ) { return value } if (depth >= TOOL_INPUT_MAX_DEPTH) { return '' } if (Array.isArray(value)) { const mapped = value .slice(0, TOOL_INPUT_MAX_COLLECTION_ITEMS) .map(v => truncateToolInputValue(v, depth + 1)) if (value.length > TOOL_INPUT_MAX_COLLECTION_ITEMS) { mapped.push(`…[${value.length} items]`) } return mapped } if (typeof value === 'object') { const entries = Object.entries(value as Record) // Skip internal marker keys (e.g. _simulatedSedEdit re-introduced by // SedEditPermissionRequest) so they don't leak into telemetry. .filter(([k]) => !k.startsWith('_')) const mapped = entries .slice(0, TOOL_INPUT_MAX_COLLECTION_ITEMS) .map(([k, v]) => [k, truncateToolInputValue(v, depth + 1)]) if (entries.length > TOOL_INPUT_MAX_COLLECTION_ITEMS) { mapped.push(['…', `${entries.length} keys`]) } return Object.fromEntries(mapped) } return String(value) } /** * Serialize a tool's input arguments for the OTel tool_result event. * Truncates long strings and deep nesting to keep the output bounded while * preserving forensically useful fields like file paths, URLs, and MCP args. * Returns undefined when OTEL_LOG_TOOL_DETAILS is not enabled. */ export function extractToolInputForTelemetry( input: unknown, ): string | undefined { if (!isToolDetailsLoggingEnabled()) { return undefined } const truncated = truncateToolInputValue(input) let json = jsonStringify(truncated) if (json.length > TOOL_INPUT_MAX_JSON_CHARS) { json = json.slice(0, TOOL_INPUT_MAX_JSON_CHARS) + '…[truncated]' } return json } /** * Maximum length for file extensions to be logged. * Extensions longer than this are considered potentially sensitive * (e.g., hash-based filenames like "key-hash-abcd-123-456") and * will be replaced with 'other'. */ const MAX_FILE_EXTENSION_LENGTH = 10 /** * Extracts and sanitizes a file extension for analytics logging. * * Uses Node's path.extname for reliable cross-platform extension extraction. * Returns 'other' for extensions exceeding MAX_FILE_EXTENSION_LENGTH to avoid * logging potentially sensitive data (like hash-based filenames). * * @param filePath - The file path to extract the extension from * @returns The sanitized extension, 'other' for long extensions, or undefined if no extension */ export function getFileExtensionForAnalytics( filePath: string, ): AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS | undefined { const ext = extname(filePath).toLowerCase() if (!ext || ext === '.') { return undefined } const extension = ext.slice(1) // remove leading dot if (extension.length > MAX_FILE_EXTENSION_LENGTH) { return 'other' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS } return extension as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS } /** Allow list of commands we extract file extensions from. */ const FILE_COMMANDS = new Set([ 'rm', 'mv', 'cp', 'touch', 'mkdir', 'chmod', 'chown', 'cat', 'head', 'tail', 'sort', 'stat', 'diff', 'wc', 'grep', 'rg', 'sed', ]) /** Regex to split bash commands on compound operators (&&, ||, ;, |). */ const COMPOUND_OPERATOR_REGEX = /\s*(?:&&|\|\||[;|])\s*/ /** Regex to split on whitespace. */ const WHITESPACE_REGEX = /\s+/ /** * Extracts file extensions from a bash command for analytics. * Best-effort: splits on operators and whitespace, extracts extensions * from non-flag args of allowed commands. No heavy shell parsing needed * because grep patterns and sed scripts rarely resemble file extensions. */ export function getFileExtensionsFromBashCommand( command: string, simulatedSedEditFilePath?: string, ): AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS | undefined { if (!command.includes('.') && !simulatedSedEditFilePath) return undefined let result: string | undefined const seen = new Set() if (simulatedSedEditFilePath) { const ext = getFileExtensionForAnalytics(simulatedSedEditFilePath) if (ext) { seen.add(ext) result = ext } } for (const subcmd of command.split(COMPOUND_OPERATOR_REGEX)) { if (!subcmd) continue const tokens = subcmd.split(WHITESPACE_REGEX) if (tokens.length < 2) continue const firstToken = tokens[0]! const slashIdx = firstToken.lastIndexOf('/') const baseCmd = slashIdx >= 0 ? firstToken.slice(slashIdx + 1) : firstToken if (!FILE_COMMANDS.has(baseCmd)) continue for (let i = 1; i < tokens.length; i++) { const arg = tokens[i]! if (arg.charCodeAt(0) === 45 /* - */) continue const ext = getFileExtensionForAnalytics(arg) if (ext && !seen.has(ext)) { seen.add(ext) result = result ? result + ',' + ext : ext } } } if (!result) return undefined return result as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS } /** * Environment context metadata */ export type EnvContext = { platform: string platformRaw: string arch: string nodeVersion: string terminal: string | null packageManagers: string runtimes: string isRunningWithBun: boolean isCi: boolean isClaubbit: boolean isClaudeCodeRemote: boolean isLocalAgentMode: boolean isConductor: boolean remoteEnvironmentType?: string coworkerType?: string claudeCodeContainerId?: string claudeCodeRemoteSessionId?: string tags?: string isGithubAction: boolean isClaudeCodeAction: boolean isClaudeAiAuth: boolean version: string versionBase?: string buildTime: string deploymentEnvironment: string githubEventName?: string githubActionsRunnerEnvironment?: string githubActionsRunnerOs?: string githubActionRef?: string wslVersion?: string linuxDistroId?: string linuxDistroVersion?: string linuxKernel?: string vcs?: string } /** * Process metrics included with all analytics events. */ export type ProcessMetrics = { uptime: number rss: number heapTotal: number heapUsed: number external: number arrayBuffers: number constrainedMemory: number | undefined cpuUsage: NodeJS.CpuUsage cpuPercent: number | undefined } /** * Core event metadata shared across all analytics systems */ export type EventMetadata = { model: string sessionId: string userType: string betas?: string envContext: EnvContext entrypoint?: string agentSdkVersion?: string isInteractive: string clientType: string processMetrics?: ProcessMetrics sweBenchRunId: string sweBenchInstanceId: string sweBenchTaskId: string // Swarm/team agent identification for analytics attribution agentId?: string // CLAUDE_CODE_AGENT_ID (format: agentName@teamName) or subagent UUID parentSessionId?: string // CLAUDE_CODE_PARENT_SESSION_ID (team lead's session) agentType?: 'teammate' | 'subagent' | 'standalone' // Distinguishes swarm teammates, Agent tool subagents, and standalone agents teamName?: string // Team name for swarm agents (from env var or AsyncLocalStorage) subscriptionType?: string // OAuth subscription tier (max, pro, enterprise, team) rh?: string // Hashed repo remote URL (first 16 chars of SHA256), for joining with server-side data kairosActive?: true // KAIROS assistant mode active (ant-only; set in main.tsx after gate check) skillMode?: 'discovery' | 'coach' | 'discovery_and_coach' // Which skill surfacing mechanism(s) are gated on (ant-only; for BQ session segmentation) observerMode?: 'backseat' | 'skillcoach' | 'both' // Which observer classifiers are gated on (ant-only; for BQ cohort splits on tengu_backseat_* events) } /** * Options for enriching event metadata */ export type EnrichMetadataOptions = { // Model to use, falls back to getMainLoopModel() if not provided model?: unknown // Explicit betas string (already joined) betas?: unknown // Additional metadata to include (optional) additionalMetadata?: Record } /** * Get agent identification for analytics. * Priority: AsyncLocalStorage context (subagents) > env vars (swarm teammates) */ function getAgentIdentification(): { agentId?: string parentSessionId?: string agentType?: 'teammate' | 'subagent' | 'standalone' teamName?: string } { // Check AsyncLocalStorage first (for subagents running in same process) const agentContext = getAgentContext() if (agentContext) { const result: ReturnType = { agentId: agentContext.agentId, parentSessionId: agentContext.parentSessionId, agentType: agentContext.agentType, } if (agentContext.agentType === 'teammate') { result.teamName = agentContext.teamName } return result } // Fall back to swarm helpers (for swarm agents) const agentId = getAgentId() const parentSessionId = getTeammateParentSessionId() const teamName = getTeamName() const isSwarmAgent = isTeammate() // For standalone agents (have agent ID but not a teammate), set agentType to 'standalone' const agentType = isSwarmAgent ? ('teammate' as const) : agentId ? ('standalone' as const) : undefined if (agentId || agentType || parentSessionId || teamName) { return { ...(agentId ? { agentId } : {}), ...(agentType ? { agentType } : {}), ...(parentSessionId ? { parentSessionId } : {}), ...(teamName ? { teamName } : {}), } } // Check bootstrap state for parent session ID (e.g., plan mode -> implementation) const stateParentSessionId = getParentSessionIdFromState() if (stateParentSessionId) { return { parentSessionId: stateParentSessionId } } return {} } /** * Extract base version from full version string. "2.0.36-dev.20251107.t174150.sha2709699" → "2.0.36-dev" */ const getVersionBase = memoize((): string | undefined => { const match = MACRO.VERSION.match(/^\d+\.\d+\.\d+(?:-[a-z]+)?/) return match ? match[0] : undefined }) /** * Builds the environment context object */ const buildEnvContext = memoize(async (): Promise => { const [packageManagers, runtimes, linuxDistroInfo, vcs] = await Promise.all([ env.getPackageManagers(), env.getRuntimes(), getLinuxDistroInfo(), detectVcs(), ]) return { platform: getHostPlatformForAnalytics(), // Raw process.platform so freebsd/openbsd/aix/sunos are visible in BQ. // getHostPlatformForAnalytics() buckets those into 'linux'; here we want // the truth. CLAUDE_CODE_HOST_PLATFORM still overrides for container/remote. platformRaw: process.env.CLAUDE_CODE_HOST_PLATFORM || process.platform, arch: env.arch, nodeVersion: env.nodeVersion, terminal: envDynamic.terminal, packageManagers: packageManagers.join(','), runtimes: runtimes.join(','), isRunningWithBun: env.isRunningWithBun(), isCi: isEnvTruthy(process.env.CI), isClaubbit: isEnvTruthy(process.env.CLAUBBIT), isClaudeCodeRemote: isEnvTruthy(process.env.CLAUDE_CODE_REMOTE), isLocalAgentMode: process.env.CLAUDE_CODE_ENTRYPOINT === 'local-agent', isConductor: env.isConductor(), ...(process.env.CLAUDE_CODE_REMOTE_ENVIRONMENT_TYPE && { remoteEnvironmentType: process.env.CLAUDE_CODE_REMOTE_ENVIRONMENT_TYPE, }), // Gated by feature flag to prevent leaking "coworkerType" string in external builds ...(feature('COWORKER_TYPE_TELEMETRY') ? process.env.CLAUDE_CODE_COWORKER_TYPE ? { coworkerType: process.env.CLAUDE_CODE_COWORKER_TYPE } : {} : {}), ...(process.env.CLAUDE_CODE_CONTAINER_ID && { claudeCodeContainerId: process.env.CLAUDE_CODE_CONTAINER_ID, }), ...(process.env.CLAUDE_CODE_REMOTE_SESSION_ID && { claudeCodeRemoteSessionId: process.env.CLAUDE_CODE_REMOTE_SESSION_ID, }), ...(process.env.CLAUDE_CODE_TAGS && { tags: process.env.CLAUDE_CODE_TAGS, }), isGithubAction: isEnvTruthy(process.env.GITHUB_ACTIONS), isClaudeCodeAction: isEnvTruthy(process.env.CLAUDE_CODE_ACTION), isClaudeAiAuth: isClaudeAISubscriber(), version: MACRO.VERSION, versionBase: getVersionBase(), buildTime: MACRO.BUILD_TIME, deploymentEnvironment: env.detectDeploymentEnvironment(), ...(isEnvTruthy(process.env.GITHUB_ACTIONS) && { githubEventName: process.env.GITHUB_EVENT_NAME, githubActionsRunnerEnvironment: process.env.RUNNER_ENVIRONMENT, githubActionsRunnerOs: process.env.RUNNER_OS, githubActionRef: process.env.GITHUB_ACTION_PATH?.includes( 'claude-code-action/', ) ? process.env.GITHUB_ACTION_PATH.split('claude-code-action/')[1] : undefined, }), ...(getWslVersion() && { wslVersion: getWslVersion() }), ...(linuxDistroInfo ?? {}), ...(vcs.length > 0 ? { vcs: vcs.join(',') } : {}), } }) // -- // CPU% delta tracking — inherently process-global, same pattern as logBatch/flushTimer in datadog.ts let prevCpuUsage: NodeJS.CpuUsage | null = null let prevWallTimeMs: number | null = null /** * Builds process metrics object for all users. */ function buildProcessMetrics(): ProcessMetrics | undefined { try { const mem = process.memoryUsage() const cpu = process.cpuUsage() const now = Date.now() let cpuPercent: number | undefined if (prevCpuUsage && prevWallTimeMs) { const wallDeltaMs = now - prevWallTimeMs if (wallDeltaMs > 0) { const userDeltaUs = cpu.user - prevCpuUsage.user const systemDeltaUs = cpu.system - prevCpuUsage.system cpuPercent = ((userDeltaUs + systemDeltaUs) / (wallDeltaMs * 1000)) * 100 } } prevCpuUsage = cpu prevWallTimeMs = now return { uptime: process.uptime(), rss: mem.rss, heapTotal: mem.heapTotal, heapUsed: mem.heapUsed, external: mem.external, arrayBuffers: mem.arrayBuffers, // eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins constrainedMemory: process.constrainedMemory(), cpuUsage: cpu, cpuPercent, } } catch { return undefined } } /** * Get core event metadata shared across all analytics systems. * * This function collects environment, runtime, and context information * that should be included with all analytics events. * * @param options - Configuration options * @returns Promise resolving to enriched metadata object */ export async function getEventMetadata( options: EnrichMetadataOptions = {}, ): Promise { const model = options.model ? String(options.model) : getMainLoopModel() const betas = typeof options.betas === 'string' ? options.betas : getModelBetas(model).join(',') const [envContext, repoRemoteHash] = await Promise.all([ buildEnvContext(), getRepoRemoteHash(), ]) const processMetrics = buildProcessMetrics() const metadata: EventMetadata = { model, sessionId: getSessionId(), userType: process.env.USER_TYPE || '', ...(betas.length > 0 ? { betas: betas } : {}), envContext, ...(process.env.CLAUDE_CODE_ENTRYPOINT && { entrypoint: process.env.CLAUDE_CODE_ENTRYPOINT, }), ...(process.env.CLAUDE_AGENT_SDK_VERSION && { agentSdkVersion: process.env.CLAUDE_AGENT_SDK_VERSION, }), isInteractive: String(getIsInteractive()), clientType: getClientType(), ...(processMetrics && { processMetrics }), sweBenchRunId: process.env.SWE_BENCH_RUN_ID || '', sweBenchInstanceId: process.env.SWE_BENCH_INSTANCE_ID || '', sweBenchTaskId: process.env.SWE_BENCH_TASK_ID || '', // Swarm/team agent identification // Priority: AsyncLocalStorage context (subagents) > env vars (swarm teammates) ...getAgentIdentification(), // Subscription tier for DAU-by-tier analytics ...(getSubscriptionType() && { subscriptionType: getSubscriptionType()!, }), // Assistant mode tag — lives outside memoized buildEnvContext() because // setKairosActive() runs at main.tsx:~1648, after the first event may // have already fired and memoized the env. Read fresh per-event instead. ...(feature('KAIROS') && getKairosActive() ? { kairosActive: true as const } : {}), // Repo remote hash for joining with server-side repo bundle data ...(repoRemoteHash && { rh: repoRemoteHash }), } return metadata } /** * Core event metadata for 1P event logging (snake_case format). */ export type FirstPartyEventLoggingCoreMetadata = { session_id: string model: string user_type: string betas?: string entrypoint?: string agent_sdk_version?: string is_interactive: boolean client_type: string swe_bench_run_id?: string swe_bench_instance_id?: string swe_bench_task_id?: string // Swarm/team agent identification agent_id?: string parent_session_id?: string agent_type?: 'teammate' | 'subagent' | 'standalone' team_name?: string } /** * Complete event logging metadata format for 1P events. */ export type FirstPartyEventLoggingMetadata = { env: EnvironmentMetadata process?: string // auth is a top-level field on ClaudeCodeInternalEvent (proto PublicApiAuth). // account_id is intentionally omitted — only UUID fields are populated client-side. auth?: PublicApiAuth // core fields correspond to the top level of ClaudeCodeInternalEvent. // They get directly exported to their individual columns in the BigQuery tables core: FirstPartyEventLoggingCoreMetadata // additional fields are populated in the additional_metadata field of the // ClaudeCodeInternalEvent proto. Includes but is not limited to information // that differs by event type. additional: Record } /** * Convert metadata to 1P event logging format (snake_case fields). * * The /api/event_logging/batch endpoint expects snake_case field names * for environment and core metadata. * * @param metadata - Core event metadata * @param additionalMetadata - Additional metadata to include * @returns Metadata formatted for 1P event logging */ export function to1PEventFormat( metadata: EventMetadata, userMetadata: CoreUserData, additionalMetadata: Record = {}, ): FirstPartyEventLoggingMetadata { const { envContext, processMetrics, rh, kairosActive, skillMode, observerMode, ...coreFields } = metadata // Convert envContext to snake_case. // IMPORTANT: env is typed as the proto-generated EnvironmentMetadata so that // adding a field here that the proto doesn't define is a compile error. The // generated toJSON() serializer silently drops unknown keys — a hand-written // parallel type previously let #11318, #13924, #19448, and coworker_type all // ship fields that never reached BQ. // Adding a field? Update the monorepo proto first (go/cc-logging): // event_schemas/.../claude_code/v1/claude_code_internal_event.proto // then run `bun run generate:proto` here. const env: EnvironmentMetadata = { platform: envContext.platform, platform_raw: envContext.platformRaw, arch: envContext.arch, node_version: envContext.nodeVersion, terminal: envContext.terminal || 'unknown', package_managers: envContext.packageManagers, runtimes: envContext.runtimes, is_running_with_bun: envContext.isRunningWithBun, is_ci: envContext.isCi, is_claubbit: envContext.isClaubbit, is_claude_code_remote: envContext.isClaudeCodeRemote, is_local_agent_mode: envContext.isLocalAgentMode, is_conductor: envContext.isConductor, is_github_action: envContext.isGithubAction, is_claude_code_action: envContext.isClaudeCodeAction, is_claude_ai_auth: envContext.isClaudeAiAuth, version: envContext.version, build_time: envContext.buildTime, deployment_environment: envContext.deploymentEnvironment, } // Add optional env fields if (envContext.remoteEnvironmentType) { env.remote_environment_type = envContext.remoteEnvironmentType } if (feature('COWORKER_TYPE_TELEMETRY') && envContext.coworkerType) { env.coworker_type = envContext.coworkerType } if (envContext.claudeCodeContainerId) { env.claude_code_container_id = envContext.claudeCodeContainerId } if (envContext.claudeCodeRemoteSessionId) { env.claude_code_remote_session_id = envContext.claudeCodeRemoteSessionId } if (envContext.tags) { env.tags = envContext.tags .split(',') .map(t => t.trim()) .filter(Boolean) } if (envContext.githubEventName) { env.github_event_name = envContext.githubEventName } if (envContext.githubActionsRunnerEnvironment) { env.github_actions_runner_environment = envContext.githubActionsRunnerEnvironment } if (envContext.githubActionsRunnerOs) { env.github_actions_runner_os = envContext.githubActionsRunnerOs } if (envContext.githubActionRef) { env.github_action_ref = envContext.githubActionRef } if (envContext.wslVersion) { env.wsl_version = envContext.wslVersion } if (envContext.linuxDistroId) { env.linux_distro_id = envContext.linuxDistroId } if (envContext.linuxDistroVersion) { env.linux_distro_version = envContext.linuxDistroVersion } if (envContext.linuxKernel) { env.linux_kernel = envContext.linuxKernel } if (envContext.vcs) { env.vcs = envContext.vcs } if (envContext.versionBase) { env.version_base = envContext.versionBase } // Convert core fields to snake_case const core: FirstPartyEventLoggingCoreMetadata = { session_id: coreFields.sessionId, model: coreFields.model, user_type: coreFields.userType, is_interactive: coreFields.isInteractive === 'true', client_type: coreFields.clientType, } // Add other core fields if (coreFields.betas) { core.betas = coreFields.betas } if (coreFields.entrypoint) { core.entrypoint = coreFields.entrypoint } if (coreFields.agentSdkVersion) { core.agent_sdk_version = coreFields.agentSdkVersion } if (coreFields.sweBenchRunId) { core.swe_bench_run_id = coreFields.sweBenchRunId } if (coreFields.sweBenchInstanceId) { core.swe_bench_instance_id = coreFields.sweBenchInstanceId } if (coreFields.sweBenchTaskId) { core.swe_bench_task_id = coreFields.sweBenchTaskId } // Swarm/team agent identification if (coreFields.agentId) { core.agent_id = coreFields.agentId } if (coreFields.parentSessionId) { core.parent_session_id = coreFields.parentSessionId } if (coreFields.agentType) { core.agent_type = coreFields.agentType } if (coreFields.teamName) { core.team_name = coreFields.teamName } // Map userMetadata to output fields. // Based on src/utils/user.ts getUser(), but with fields present in other // parts of ClaudeCodeInternalEvent deduplicated. // Convert camelCase GitHubActionsMetadata to snake_case for 1P API // Note: github_actions_metadata is placed inside env (EnvironmentMetadata) // rather than at the top level of ClaudeCodeInternalEvent if (userMetadata.githubActionsMetadata) { const ghMeta = userMetadata.githubActionsMetadata env.github_actions_metadata = { actor_id: ghMeta.actorId, repository_id: ghMeta.repositoryId, repository_owner_id: ghMeta.repositoryOwnerId, } } let auth: PublicApiAuth | undefined if (userMetadata.accountUuid || userMetadata.organizationUuid) { auth = { account_uuid: userMetadata.accountUuid, organization_uuid: userMetadata.organizationUuid, } } return { env, ...(processMetrics && { process: Buffer.from(jsonStringify(processMetrics)).toString('base64'), }), ...(auth && { auth }), core, additional: { ...(rh && { rh }), ...(kairosActive && { is_assistant_mode: true }), ...(skillMode && { skill_mode: skillMode }), ...(observerMode && { observer_mode: observerMode }), ...additionalMetadata, }, } }