524 lines
17 KiB
TypeScript
524 lines
17 KiB
TypeScript
import { getIsNonInteractiveSession } from '../../bootstrap/state.js'
|
|
import type { AppState } from '../../state/AppState.js'
|
|
import type { Message } from '../../types/message.js'
|
|
import { isAgentSwarmsEnabled } from '../../utils/agentSwarmsEnabled.js'
|
|
import { count } from '../../utils/array.js'
|
|
import { isEnvDefinedFalsy, isEnvTruthy } from '../../utils/envUtils.js'
|
|
import { toError } from '../../utils/errors.js'
|
|
import {
|
|
type CacheSafeParams,
|
|
createCacheSafeParams,
|
|
runForkedAgent,
|
|
} from '../../utils/forkedAgent.js'
|
|
import type { REPLHookContext } from '../../utils/hooks/postSamplingHooks.js'
|
|
import { logError } from '../../utils/log.js'
|
|
import {
|
|
createUserMessage,
|
|
getLastAssistantMessage,
|
|
} from '../../utils/messages.js'
|
|
import { getInitialSettings } from '../../utils/settings/settings.js'
|
|
import { isTeammate } from '../../utils/teammate.js'
|
|
import { getFeatureValue_CACHED_MAY_BE_STALE } from '../analytics/growthbook.js'
|
|
import {
|
|
type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
logEvent,
|
|
} from '../analytics/index.js'
|
|
import { currentLimits } from '../claudeAiLimits.js'
|
|
import { isSpeculationEnabled, startSpeculation } from './speculation.js'
|
|
|
|
let currentAbortController: AbortController | null = null
|
|
|
|
export type PromptVariant = 'user_intent' | 'stated_intent'
|
|
|
|
export function getPromptVariant(): PromptVariant {
|
|
return 'user_intent'
|
|
}
|
|
|
|
export function shouldEnablePromptSuggestion(): boolean {
|
|
// Env var overrides everything (for testing)
|
|
const envOverride = process.env.CLAUDE_CODE_ENABLE_PROMPT_SUGGESTION
|
|
if (isEnvDefinedFalsy(envOverride)) {
|
|
logEvent('tengu_prompt_suggestion_init', {
|
|
enabled: false,
|
|
source:
|
|
'env' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
})
|
|
return false
|
|
}
|
|
if (isEnvTruthy(envOverride)) {
|
|
logEvent('tengu_prompt_suggestion_init', {
|
|
enabled: true,
|
|
source:
|
|
'env' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
})
|
|
return true
|
|
}
|
|
|
|
// Keep default in sync with Config.tsx (settings toggle visibility)
|
|
if (!getFeatureValue_CACHED_MAY_BE_STALE('tengu_chomp_inflection', false)) {
|
|
logEvent('tengu_prompt_suggestion_init', {
|
|
enabled: false,
|
|
source:
|
|
'growthbook' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
})
|
|
return false
|
|
}
|
|
|
|
// Disable in non-interactive mode (print mode, piped input, SDK)
|
|
if (getIsNonInteractiveSession()) {
|
|
logEvent('tengu_prompt_suggestion_init', {
|
|
enabled: false,
|
|
source:
|
|
'non_interactive' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
})
|
|
return false
|
|
}
|
|
|
|
// Disable for swarm teammates (only leader should show suggestions)
|
|
if (isAgentSwarmsEnabled() && isTeammate()) {
|
|
logEvent('tengu_prompt_suggestion_init', {
|
|
enabled: false,
|
|
source:
|
|
'swarm_teammate' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
})
|
|
return false
|
|
}
|
|
|
|
const enabled = getInitialSettings()?.promptSuggestionEnabled !== false
|
|
logEvent('tengu_prompt_suggestion_init', {
|
|
enabled,
|
|
source:
|
|
'setting' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
})
|
|
return enabled
|
|
}
|
|
|
|
export function abortPromptSuggestion(): void {
|
|
if (currentAbortController) {
|
|
currentAbortController.abort()
|
|
currentAbortController = null
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Returns a suppression reason if suggestions should not be generated,
|
|
* or null if generation is allowed. Shared by main and pipelined paths.
|
|
*/
|
|
export function getSuggestionSuppressReason(appState: AppState): string | null {
|
|
if (!appState.promptSuggestionEnabled) return 'disabled'
|
|
if (appState.pendingWorkerRequest || appState.pendingSandboxRequest)
|
|
return 'pending_permission'
|
|
if (appState.elicitation.queue.length > 0) return 'elicitation_active'
|
|
if (appState.toolPermissionContext.mode === 'plan') return 'plan_mode'
|
|
if (
|
|
process.env.USER_TYPE === 'external' &&
|
|
currentLimits.status !== 'allowed'
|
|
)
|
|
return 'rate_limit'
|
|
return null
|
|
}
|
|
|
|
/**
|
|
* Shared guard + generation logic used by both CLI TUI and SDK push paths.
|
|
* Returns the suggestion with metadata, or null if suppressed/filtered.
|
|
*/
|
|
export async function tryGenerateSuggestion(
|
|
abortController: AbortController,
|
|
messages: Message[],
|
|
getAppState: () => AppState,
|
|
cacheSafeParams: CacheSafeParams,
|
|
source?: 'cli' | 'sdk',
|
|
): Promise<{
|
|
suggestion: string
|
|
promptId: PromptVariant
|
|
generationRequestId: string | null
|
|
} | null> {
|
|
if (abortController.signal.aborted) {
|
|
logSuggestionSuppressed('aborted', undefined, undefined, source)
|
|
return null
|
|
}
|
|
|
|
const assistantTurnCount = count(messages, m => m.type === 'assistant')
|
|
if (assistantTurnCount < 2) {
|
|
logSuggestionSuppressed('early_conversation', undefined, undefined, source)
|
|
return null
|
|
}
|
|
|
|
const lastAssistantMessage = getLastAssistantMessage(messages)
|
|
if (lastAssistantMessage?.isApiErrorMessage) {
|
|
logSuggestionSuppressed('last_response_error', undefined, undefined, source)
|
|
return null
|
|
}
|
|
const cacheReason = getParentCacheSuppressReason(lastAssistantMessage)
|
|
if (cacheReason) {
|
|
logSuggestionSuppressed(cacheReason, undefined, undefined, source)
|
|
return null
|
|
}
|
|
|
|
const appState = getAppState()
|
|
const suppressReason = getSuggestionSuppressReason(appState)
|
|
if (suppressReason) {
|
|
logSuggestionSuppressed(suppressReason, undefined, undefined, source)
|
|
return null
|
|
}
|
|
|
|
const promptId = getPromptVariant()
|
|
const { suggestion, generationRequestId } = await generateSuggestion(
|
|
abortController,
|
|
promptId,
|
|
cacheSafeParams,
|
|
)
|
|
if (abortController.signal.aborted) {
|
|
logSuggestionSuppressed('aborted', undefined, undefined, source)
|
|
return null
|
|
}
|
|
if (!suggestion) {
|
|
logSuggestionSuppressed('empty', undefined, promptId, source)
|
|
return null
|
|
}
|
|
if (shouldFilterSuggestion(suggestion, promptId, source)) return null
|
|
|
|
return { suggestion, promptId, generationRequestId }
|
|
}
|
|
|
|
export async function executePromptSuggestion(
|
|
context: REPLHookContext,
|
|
): Promise<void> {
|
|
if (context.querySource !== 'repl_main_thread') return
|
|
|
|
currentAbortController = new AbortController()
|
|
const abortController = currentAbortController
|
|
const cacheSafeParams = createCacheSafeParams(context)
|
|
|
|
try {
|
|
const result = await tryGenerateSuggestion(
|
|
abortController,
|
|
context.messages,
|
|
context.toolUseContext.getAppState,
|
|
cacheSafeParams,
|
|
'cli',
|
|
)
|
|
if (!result) return
|
|
|
|
context.toolUseContext.setAppState(prev => ({
|
|
...prev,
|
|
promptSuggestion: {
|
|
text: result.suggestion,
|
|
promptId: result.promptId,
|
|
shownAt: 0,
|
|
acceptedAt: 0,
|
|
generationRequestId: result.generationRequestId,
|
|
},
|
|
}))
|
|
|
|
if (isSpeculationEnabled() && result.suggestion) {
|
|
void startSpeculation(
|
|
result.suggestion,
|
|
context,
|
|
context.toolUseContext.setAppState,
|
|
false,
|
|
cacheSafeParams,
|
|
)
|
|
}
|
|
} catch (error) {
|
|
if (
|
|
error instanceof Error &&
|
|
(error.name === 'AbortError' || error.name === 'APIUserAbortError')
|
|
) {
|
|
logSuggestionSuppressed('aborted', undefined, undefined, 'cli')
|
|
return
|
|
}
|
|
logError(toError(error))
|
|
} finally {
|
|
if (currentAbortController === abortController) {
|
|
currentAbortController = null
|
|
}
|
|
}
|
|
}
|
|
|
|
const MAX_PARENT_UNCACHED_TOKENS = 10_000
|
|
|
|
export function getParentCacheSuppressReason(
|
|
lastAssistantMessage: ReturnType<typeof getLastAssistantMessage>,
|
|
): string | null {
|
|
if (!lastAssistantMessage) return null
|
|
|
|
const usage = lastAssistantMessage.message.usage
|
|
const inputTokens = usage.input_tokens ?? 0
|
|
const cacheWriteTokens = usage.cache_creation_input_tokens ?? 0
|
|
// The fork re-processes the parent's output (never cached) plus its own prompt.
|
|
const outputTokens = usage.output_tokens ?? 0
|
|
|
|
return inputTokens + cacheWriteTokens + outputTokens >
|
|
MAX_PARENT_UNCACHED_TOKENS
|
|
? 'cache_cold'
|
|
: null
|
|
}
|
|
|
|
const SUGGESTION_PROMPT = `[SUGGESTION MODE: Suggest what the user might naturally type next into Claude Code.]
|
|
|
|
FIRST: Look at the user's recent messages and original request.
|
|
|
|
Your job is to predict what THEY would type - not what you think they should do.
|
|
|
|
THE TEST: Would they think "I was just about to type that"?
|
|
|
|
EXAMPLES:
|
|
User asked "fix the bug and run tests", bug is fixed → "run the tests"
|
|
After code written → "try it out"
|
|
Claude offers options → suggest the one the user would likely pick, based on conversation
|
|
Claude asks to continue → "yes" or "go ahead"
|
|
Task complete, obvious follow-up → "commit this" or "push it"
|
|
After error or misunderstanding → silence (let them assess/correct)
|
|
|
|
Be specific: "run the tests" beats "continue".
|
|
|
|
NEVER SUGGEST:
|
|
- Evaluative ("looks good", "thanks")
|
|
- Questions ("what about...?")
|
|
- Claude-voice ("Let me...", "I'll...", "Here's...")
|
|
- New ideas they didn't ask about
|
|
- Multiple sentences
|
|
|
|
Stay silent if the next step isn't obvious from what the user said.
|
|
|
|
Format: 2-12 words, match the user's style. Or nothing.
|
|
|
|
Reply with ONLY the suggestion, no quotes or explanation.`
|
|
|
|
const SUGGESTION_PROMPTS: Record<PromptVariant, string> = {
|
|
user_intent: SUGGESTION_PROMPT,
|
|
stated_intent: SUGGESTION_PROMPT,
|
|
}
|
|
|
|
export async function generateSuggestion(
|
|
abortController: AbortController,
|
|
promptId: PromptVariant,
|
|
cacheSafeParams: CacheSafeParams,
|
|
): Promise<{ suggestion: string | null; generationRequestId: string | null }> {
|
|
const prompt = SUGGESTION_PROMPTS[promptId]
|
|
|
|
// Deny tools via callback, NOT by passing tools:[] - that busts cache (0% hit)
|
|
const canUseTool = async () => ({
|
|
behavior: 'deny' as const,
|
|
message: 'No tools needed for suggestion',
|
|
decisionReason: { type: 'other' as const, reason: 'suggestion only' },
|
|
})
|
|
|
|
// DO NOT override any API parameter that differs from the parent request.
|
|
// The fork piggybacks on the main thread's prompt cache by sending identical
|
|
// cache-key params. The billing cache key includes more than just
|
|
// system/tools/model/messages/thinking — empirically, setting effortValue
|
|
// or maxOutputTokens on the fork (even via output_config or getAppState)
|
|
// busts cache. PR #18143 tried effort:'low' and caused a 45x spike in cache
|
|
// writes (92.7% → 61% hit rate). The only safe overrides are:
|
|
// - abortController (not sent to API)
|
|
// - skipTranscript (client-side only)
|
|
// - skipCacheWrite (controls cache_control markers, not the cache key)
|
|
// - canUseTool (client-side permission check)
|
|
const result = await runForkedAgent({
|
|
promptMessages: [createUserMessage({ content: prompt })],
|
|
cacheSafeParams, // Don't override tools/thinking settings - busts cache
|
|
canUseTool,
|
|
querySource: 'prompt_suggestion',
|
|
forkLabel: 'prompt_suggestion',
|
|
overrides: {
|
|
abortController,
|
|
},
|
|
skipTranscript: true,
|
|
skipCacheWrite: true,
|
|
})
|
|
|
|
// Check ALL messages - model may loop (try tool → denied → text in next message)
|
|
// Also extract the requestId from the first assistant message for RL dataset joins
|
|
const firstAssistantMsg = result.messages.find(m => m.type === 'assistant')
|
|
const generationRequestId =
|
|
firstAssistantMsg?.type === 'assistant'
|
|
? (firstAssistantMsg.requestId ?? null)
|
|
: null
|
|
|
|
for (const msg of result.messages) {
|
|
if (msg.type !== 'assistant') continue
|
|
const textBlock = msg.message.content.find(b => b.type === 'text')
|
|
if (textBlock?.type === 'text') {
|
|
const suggestion = textBlock.text.trim()
|
|
if (suggestion) {
|
|
return { suggestion, generationRequestId }
|
|
}
|
|
}
|
|
}
|
|
|
|
return { suggestion: null, generationRequestId }
|
|
}
|
|
|
|
export function shouldFilterSuggestion(
|
|
suggestion: string | null,
|
|
promptId: PromptVariant,
|
|
source?: 'cli' | 'sdk',
|
|
): boolean {
|
|
if (!suggestion) {
|
|
logSuggestionSuppressed('empty', undefined, promptId, source)
|
|
return true
|
|
}
|
|
|
|
const lower = suggestion.toLowerCase()
|
|
const wordCount = suggestion.trim().split(/\s+/).length
|
|
|
|
const filters: Array<[string, () => boolean]> = [
|
|
['done', () => lower === 'done'],
|
|
[
|
|
'meta_text',
|
|
() =>
|
|
lower === 'nothing found' ||
|
|
lower === 'nothing found.' ||
|
|
lower.startsWith('nothing to suggest') ||
|
|
lower.startsWith('no suggestion') ||
|
|
// Model spells out the prompt's "stay silent" instruction
|
|
/\bsilence is\b|\bstay(s|ing)? silent\b/.test(lower) ||
|
|
// Model outputs bare "silence" wrapped in punctuation/whitespace
|
|
/^\W*silence\W*$/.test(lower),
|
|
],
|
|
[
|
|
'meta_wrapped',
|
|
// Model wraps meta-reasoning in parens/brackets: (silence — ...), [no suggestion]
|
|
() => /^\(.*\)$|^\[.*\]$/.test(suggestion),
|
|
],
|
|
[
|
|
'error_message',
|
|
() =>
|
|
lower.startsWith('api error:') ||
|
|
lower.startsWith('prompt is too long') ||
|
|
lower.startsWith('request timed out') ||
|
|
lower.startsWith('invalid api key') ||
|
|
lower.startsWith('image was too large'),
|
|
],
|
|
['prefixed_label', () => /^\w+:\s/.test(suggestion)],
|
|
[
|
|
'too_few_words',
|
|
() => {
|
|
if (wordCount >= 2) return false
|
|
// Allow slash commands — these are valid user commands
|
|
if (suggestion.startsWith('/')) return false
|
|
// Allow common single-word inputs that are valid user commands
|
|
const ALLOWED_SINGLE_WORDS = new Set([
|
|
// Affirmatives
|
|
'yes',
|
|
'yeah',
|
|
'yep',
|
|
'yea',
|
|
'yup',
|
|
'sure',
|
|
'ok',
|
|
'okay',
|
|
// Actions
|
|
'push',
|
|
'commit',
|
|
'deploy',
|
|
'stop',
|
|
'continue',
|
|
'check',
|
|
'exit',
|
|
'quit',
|
|
// Negation
|
|
'no',
|
|
])
|
|
return !ALLOWED_SINGLE_WORDS.has(lower)
|
|
},
|
|
],
|
|
['too_many_words', () => wordCount > 12],
|
|
['too_long', () => suggestion.length >= 100],
|
|
['multiple_sentences', () => /[.!?]\s+[A-Z]/.test(suggestion)],
|
|
['has_formatting', () => /[\n*]|\*\*/.test(suggestion)],
|
|
[
|
|
'evaluative',
|
|
() =>
|
|
/thanks|thank you|looks good|sounds good|that works|that worked|that's all|nice|great|perfect|makes sense|awesome|excellent/.test(
|
|
lower,
|
|
),
|
|
],
|
|
[
|
|
'claude_voice',
|
|
() =>
|
|
/^(let me|i'll|i've|i'm|i can|i would|i think|i notice|here's|here is|here are|that's|this is|this will|you can|you should|you could|sure,|of course|certainly)/i.test(
|
|
suggestion,
|
|
),
|
|
],
|
|
]
|
|
|
|
for (const [reason, check] of filters) {
|
|
if (check()) {
|
|
logSuggestionSuppressed(reason, suggestion, promptId, source)
|
|
return true
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
/**
|
|
* Log acceptance/ignoring of a prompt suggestion. Used by the SDK push path
|
|
* to track outcomes when the next user message arrives.
|
|
*/
|
|
export function logSuggestionOutcome(
|
|
suggestion: string,
|
|
userInput: string,
|
|
emittedAt: number,
|
|
promptId: PromptVariant,
|
|
generationRequestId: string | null,
|
|
): void {
|
|
const similarity =
|
|
Math.round((userInput.length / (suggestion.length || 1)) * 100) / 100
|
|
const wasAccepted = userInput === suggestion
|
|
const timeMs = Math.max(0, Date.now() - emittedAt)
|
|
|
|
logEvent('tengu_prompt_suggestion', {
|
|
source: 'sdk' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
outcome: (wasAccepted
|
|
? 'accepted'
|
|
: 'ignored') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
prompt_id:
|
|
promptId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
...(generationRequestId && {
|
|
generationRequestId:
|
|
generationRequestId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
}),
|
|
...(wasAccepted && {
|
|
timeToAcceptMs: timeMs,
|
|
}),
|
|
...(!wasAccepted && { timeToIgnoreMs: timeMs }),
|
|
similarity,
|
|
...(process.env.USER_TYPE === 'ant' && {
|
|
suggestion:
|
|
suggestion as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
userInput:
|
|
userInput as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
}),
|
|
})
|
|
}
|
|
|
|
export function logSuggestionSuppressed(
|
|
reason: string,
|
|
suggestion?: string,
|
|
promptId?: PromptVariant,
|
|
source?: 'cli' | 'sdk',
|
|
): void {
|
|
const resolvedPromptId = promptId ?? getPromptVariant()
|
|
logEvent('tengu_prompt_suggestion', {
|
|
...(source && {
|
|
source:
|
|
source as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
}),
|
|
outcome:
|
|
'suppressed' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
reason:
|
|
reason as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
prompt_id:
|
|
resolvedPromptId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
...(process.env.USER_TYPE === 'ant' &&
|
|
suggestion && {
|
|
suggestion:
|
|
suggestion as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
|
|
}),
|
|
})
|
|
}
|