feat: add unit tests for error handling and retry functionality

feat: implement error handling and retry mechanism for assistant messages
2026-06-30 20:00:44 +01:00 · 2026-06-30 16:28:07 +05:30 · 2026-06-30 16:15:46 +05:30
12 changed files with 1040 additions and 283 deletions
--- a/frontend/src/container/AIAssistant/components/MessageBubble/MessageBubble.module.scss
+++ b/frontend/src/container/AIAssistant/components/MessageBubble/MessageBubble.module.scss
@@ -75,6 +75,38 @@
 	word-break: break-word;
 }

+// Error bubble: a subtle error-tinted callout replacing the default
+// assistant background, rendered when a turn fails.
+.bubble.error {
+	.assistant & {
+		background: var(--callout-error-background);
+		border: 1px solid var(--callout-error-border);
+	}
+}
+
+.errorContent {
+	display: flex;
+	align-items: flex-start;
+	gap: 8px;
+}
+
+.errorIcon {
+	flex-shrink: 0;
+	margin-top: 2px;
+	color: var(--destructive);
+}
+
+.errorText {
+	color: var(--callout-error-title);
+	white-space: pre-wrap;
+	word-break: break-word;
+}
+
+.retryButton {
+	margin-top: 6px;
+	align-self: flex-start;
+}
+
 // User-bubble row: pencil button sits to the LEFT of the bubble within
 // the right-aligned message line, so it visually "ends" at the bubble's
 // right edge while keeping the bubble in its original position.
--- a/frontend/src/container/AIAssistant/components/MessageBubble/MessageBubble.tsx
+++ b/frontend/src/container/AIAssistant/components/MessageBubble/MessageBubble.tsx
@@ -2,6 +2,10 @@ import React, { useMemo } from 'react';
 import cx from 'classnames';
 import ReactMarkdown from 'react-markdown';
 import remarkGfm from 'remark-gfm';
+import { Button } from '@signozhq/ui/button';
+import { RotateCw, TriangleAlert } from '@signozhq/icons';
+
+import { RetryActionDTO } from 'api/ai-assistant/sigNozAIAssistantAPI.schemas';

 // Side-effect: registers all built-in block types into the BlockRegistry
 import '../blocks';
@@ -104,18 +108,23 @@ function renderGroup(group: RenderGroup): JSX.Element {
 interface MessageBubbleProps {
 	message: Message;
 	onRegenerate?: () => void;
+	onRetry?: () => void;
 	isLastAssistant?: boolean;
 }

 export default function MessageBubble({
 	message,
 	onRegenerate,
+	onRetry,
 	isLastAssistant = false,
 }: MessageBubbleProps): JSX.Element {
 	const variant = useVariant();
 	const isCompact = variant === 'panel';
 	const isUser = message.role === 'user';
+	const isError = !isUser && Boolean(message.isError);
 	const hasBlocks = !isUser && message.blocks && message.blocks.length > 0;
+	const showRetry =
+		isError && message.retryAction === RetryActionDTO.manual && Boolean(onRetry);

 	// Recompute groups only when the blocks array identity changes — store
 	// updates that don't touch this message's blocks should not re-render the
@@ -138,7 +147,7 @@ export default function MessageBubble({
 		<div className={messageClass} data-testid={`ai-message-${message.id}`}>
 			<div className={bodyClass}>
 				<div className={styles.bubbleRow}>
-					<div className={styles.bubble}>
+					<div className={cx(styles.bubble, { [styles.error]: isError })}>
 						{message.attachments && message.attachments.length > 0 && (
 							<div className={styles.attachments}>
 								{message.attachments.map((att) => {
@@ -161,6 +170,11 @@ export default function MessageBubble({

 						{isUser ? (
 							<p className={styles.text}>{message.content}</p>
+						) : isError ? (
+							<div className={styles.errorContent}>
+								<TriangleAlert size={14} className={styles.errorIcon} />
+								<span className={styles.errorText}>{message.content}</span>
+							</div>
 						) : hasBlocks ? (
 							<MessageContext.Provider value={{ messageId: message.id }}>
 								{groups.map((g) => renderGroup(g))}
@@ -183,7 +197,21 @@ export default function MessageBubble({
 					</div>
 				</div>

-				{!isUser && !message.isRateLimitError && (
+				{showRetry && (
+					<Button
+						className={styles.retryButton}
+						size="sm"
+						variant="ghost"
+						color="secondary"
+						onClick={onRetry}
+						testId={`ai-message-retry-${message.id}`}
+					>
+						<RotateCw size={12} />
+						Retry
+					</Button>
+				)}
+
+				{!isUser && !isError && !message.isRateLimitError && (
 					<MessageFeedback
 						message={message}
 						onRegenerate={onRegenerate}
--- a/frontend/src/container/AIAssistant/components/MessageBubble/tests/MessageBubble.test.tsx
+++ b/frontend/src/container/AIAssistant/components/MessageBubble/tests/MessageBubble.test.tsx
@@ -0,0 +1,85 @@
+import React from 'react';
+import { render, screen, userEvent } from 'tests/test-utils';
+import {
+	ErrorCodeDTO,
+	RetryActionDTO,
+} from 'api/ai-assistant/sigNozAIAssistantAPI.schemas';
+
+import { Message } from '../../../types';
+
+// react-markdown + remark-gfm are ESM-only and pull a large untransformed
+// dependency chain into jest. The error-rendering path under test renders
+// plain text (no markdown), so stub them to keep the import graph loadable.
+jest.mock('react-markdown', () => ({
+	__esModule: true,
+	default: ({ children }: { children?: React.ReactNode }): React.ReactNode =>
+		children,
+}));
+jest.mock('remark-gfm', () => ({
+	__esModule: true,
+	default: (): void => undefined,
+}));
+
+// eslint-disable-next-line import/first
+import MessageBubble from '../MessageBubble';
+
+function errorMessage(overrides: Partial<Message> = {}): Message {
+	return {
+		id: 'err-1',
+		role: 'assistant',
+		content: 'This conversation is still finishing a previous response.',
+		isError: true,
+		errorCode: ErrorCodeDTO.thread_busy,
+		retryAction: RetryActionDTO.manual,
+		createdAt: 0,
+		...overrides,
+	};
+}
+
+const retryButton = (): HTMLElement | null =>
+	screen.queryByRole('button', { name: /retry/i });
+
+describe('MessageBubble — error rendering', () => {
+	it('shows a Retry button for a manual error and invokes onRetry on click', async () => {
+		const onRetry = jest.fn();
+		render(<MessageBubble message={errorMessage()} onRetry={onRetry} />);
+
+		// Error copy is rendered, and the feedback bar is suppressed on errors.
+		expect(
+			screen.getByText(/still finishing a previous response/i),
+		).toBeInTheDocument();
+		expect(
+			screen.queryByRole('button', { name: /copy message/i }),
+		).not.toBeInTheDocument();
+
+		const button = retryButton();
+		expect(button).toBeInTheDocument();
+		await userEvent.click(button as HTMLElement);
+		expect(onRetry).toHaveBeenCalledTimes(1);
+	});
+
+	it('hides the Retry button when retryAction is none', () => {
+		render(
+			<MessageBubble
+				message={errorMessage({ retryAction: RetryActionDTO.none })}
+				onRetry={jest.fn()}
+			/>,
+		);
+		expect(retryButton()).not.toBeInTheDocument();
+	});
+
+	it('hides the Retry button when retryAction is auto', () => {
+		render(
+			<MessageBubble
+				message={errorMessage({ retryAction: RetryActionDTO.auto })}
+				onRetry={jest.fn()}
+			/>,
+		);
+		expect(retryButton()).not.toBeInTheDocument();
+	});
+
+	it('hides the Retry button when no onRetry handler is provided', () => {
+		render(<MessageBubble message={errorMessage()} />);
+		expect(retryButton()).not.toBeInTheDocument();
+	});
+});
--- a/frontend/src/container/AIAssistant/components/VirtualizedMessages/VirtualizedMessages.tsx
+++ b/frontend/src/container/AIAssistant/components/VirtualizedMessages/VirtualizedMessages.tsx
@@ -37,6 +37,9 @@ export default function VirtualizedMessages({
 	const regenerateAssistantMessage = useAIAssistantStore(
 		(s) => s.regenerateAssistantMessage,
 	);
+	const retryAssistantMessage = useAIAssistantStore(
+		(s) => s.retryAssistantMessage,
+	);
 	const { threadId } = useAIAssistantAnalyticsContext(conversationId);
 	const streamingStatus = useAIAssistantStore(
 		(s) => s.streams[conversationId]?.streamingStatus ?? '',
@@ -85,6 +88,14 @@ export default function VirtualizedMessages({
 		[conversationId, isStreaming, regenerateAssistantMessage, threadId],
 	);

+	const handleRetry = useCallback((): void => {
+		if (isStreaming) {
+			return;
+		}
+		void logEvent(AIAssistantEvents.RetryClicked, { threadId });
+		void retryAssistantMessage(conversationId);
+	}, [conversationId, isStreaming, retryAssistantMessage, threadId]);
+
 	// Scroll all the way to the actual bottom — including the 64px of bottom
 	// padding on the scroller — so the last bubble has visible breathing room
 	// above the disclaimer / input bar. Virtuoso's `scrollToIndex(LAST,
@@ -206,6 +217,11 @@ export default function VirtualizedMessages({
 									? (): void => handleRegenerate(msg.id)
 									: undefined
 							}
+							onRetry={
+								msg.isError && isLastAssistant && !showStreamingSlot
+									? handleRetry
+									: undefined
+							}
 							isLastAssistant={isLastAssistant}
 						/>
 					);
--- a/frontend/src/container/AIAssistant/events.ts
+++ b/frontend/src/container/AIAssistant/events.ts
@@ -90,6 +90,7 @@ export enum AIAssistantEvents {
 	SuggestedPromptClicked = 'AI Assistant: Suggested prompt clicked',
 	CancelClicked = 'AI Assistant: Cancel clicked',
 	RegenerateClicked = 'AI Assistant: Regenerate clicked',
+	RetryClicked = 'AI Assistant: Retry clicked',
 	MessageCopied = 'AI Assistant: Message copied',
 	FeedbackSubmitted = 'AI Assistant: Feedback submitted',
 	ResourceOpened = 'AI Assistant: Resource opened',
--- a/frontend/src/container/AIAssistant/store/tests/useAIAssistantStore.test.ts
+++ b/frontend/src/container/AIAssistant/store/tests/useAIAssistantStore.test.ts
@@ -0,0 +1,263 @@
+import {
+	ErrorCodeDTO,
+	RetryActionDTO,
+} from 'api/ai-assistant/sigNozAIAssistantAPI.schemas';
+import type { SSEEvent } from 'api/ai-assistant/chat';
+
+import { useAIAssistantStore } from '../useAIAssistantStore';
+import type { Message } from '../../types';
+
+// The store talks to the chat API only through these named exports. Mock the
+// whole module so we can drive the SSE stream + REST calls deterministically.
+jest.mock('api/ai-assistant/chat', () => ({
+	__esModule: true,
+	createThread: jest.fn(),
+	sendMessage: jest.fn(),
+	streamEvents: jest.fn(),
+	approveExecution: jest.fn(),
+	clarifyExecution: jest.fn(),
+	regenerateMessage: jest.fn(),
+	rejectExecution: jest.fn(),
+	cancelExecution: jest.fn(),
+	listThreads: jest.fn(),
+	getThreadDetail: jest.fn(),
+	updateThread: jest.fn(),
+	submitFeedback: jest.fn(),
+}));
+
+// eslint-disable-next-line @typescript-eslint/no-var-requires, global-require
+const chat = jest.requireMock('api/ai-assistant/chat') as Record<
+	string,
+	jest.Mock
+>;
+
+// Builds a single-use async stream from a fixed list of SSE events.
+async function* eventStream(events: SSEEvent[]): AsyncGenerator<SSEEvent> {
+	for (const event of events) {
+		yield event;
+	}
+}
+
+function errorEvent(
+	executionId: string,
+	code: ErrorCodeDTO,
+	retryAction: RetryActionDTO,
+): SSEEvent {
+	return {
+		type: 'error',
+		executionId,
+		error: { code, message: 'backend message' },
+		retryAction,
+	};
+}
+
+function lastMessage(conversationId: string): Message {
+	const conv = useAIAssistantStore.getState().conversations[conversationId];
+	return conv.messages[conv.messages.length - 1];
+}
+
+describe('useAIAssistantStore — streaming error handling', () => {
+	beforeEach(() => {
+		jest.clearAllMocks();
+		useAIAssistantStore.setState((s) => {
+			s.conversations = {};
+			s.streams = {};
+			s.activeConversationId = null;
+		});
+	});
+
+	it('commits a manually-retryable error bubble with friendly copy and metadata', async () => {
+		chat.createThread.mockResolvedValue('thread-1');
+		chat.sendMessage.mockResolvedValue('exec-1');
+		chat.streamEvents.mockReturnValueOnce(
+			eventStream([
+				errorEvent('exec-1', ErrorCodeDTO.thread_busy, RetryActionDTO.manual),
+			]),
+		);
+
+		useAIAssistantStore.getState().startNewConversation();
+		await useAIAssistantStore.getState().sendMessage('hello');
+
+		const conv = useAIAssistantStore.getState().conversations['thread-1'];
+		expect(conv.messages).toHaveLength(2);
+		expect(conv.messages[0]).toMatchObject({ role: 'user', content: 'hello' });
+		expect(conv.messages[1]).toMatchObject({
+			role: 'assistant',
+			isError: true,
+			errorCode: ErrorCodeDTO.thread_busy,
+			retryAction: RetryActionDTO.manual,
+		});
+		// Code-specific FE copy, not the raw backend message.
+		expect(conv.messages[1].content).toContain(
+			'still finishing a previous response',
+		);
+	});
+
+	it('replays the send on retry without re-pushing the user message', async () => {
+		chat.createThread.mockResolvedValue('thread-1');
+		chat.sendMessage.mockResolvedValue('exec-1');
+		chat.streamEvents.mockReturnValueOnce(
+			eventStream([
+				errorEvent('exec-1', ErrorCodeDTO.thread_busy, RetryActionDTO.manual),
+			]),
+		);
+
+		useAIAssistantStore.getState().startNewConversation();
+		await useAIAssistantStore.getState().sendMessage('hello');
+
+		// The retry succeeds this time.
+		chat.streamEvents.mockReturnValueOnce(
+			eventStream([
+				{
+					type: 'message',
+					executionId: 'exec-1',
+					messageId: 'm1',
+					delta: 'Hi there',
+					done: true,
+				},
+			]),
+		);
+
+		await useAIAssistantStore.getState().retryAssistantMessage('thread-1');
+
+		const conv = useAIAssistantStore.getState().conversations['thread-1'];
+		// Error bubble replaced by the assistant reply; the user message stays.
+		expect(conv.messages).toHaveLength(2);
+		expect(conv.messages[0]).toMatchObject({ role: 'user', content: 'hello' });
+		expect(conv.messages[1]).toMatchObject({
+			role: 'assistant',
+			content: 'Hi there',
+		});
+		expect(conv.messages[1].isError).toBeUndefined();
+		// Thread already existed on retry; the user message was never re-sent as new.
+		expect(chat.createThread).toHaveBeenCalledTimes(1);
+		expect(chat.sendMessage).toHaveBeenCalledTimes(2);
+	});
+
+	it('silently retries auto-flagged errors, then downgrades to manual once spent', async () => {
+		chat.createThread.mockResolvedValue('thread-2');
+		chat.sendMessage.mockResolvedValue('exec');
+		// Always auto-retryable: 1 initial attempt + MAX_AUTO_RETRIES (2) = 3 sends.
+		chat.streamEvents.mockImplementation(() =>
+			eventStream([
+				errorEvent('exec', ErrorCodeDTO.internal_error, RetryActionDTO.auto),
+			]),
+		);
+
+		useAIAssistantStore.getState().startNewConversation();
+		await useAIAssistantStore.getState().sendMessage('hi');
+
+		expect(chat.sendMessage).toHaveBeenCalledTimes(3);
+		expect(lastMessage('thread-2')).toMatchObject({
+			isError: true,
+			errorCode: ErrorCodeDTO.internal_error,
+			// Auto budget exhausted → presented as manual so a Retry button shows.
+			retryAction: RetryActionDTO.manual,
+		});
+	}, 10000);
+
+	it('marks rate-limit errors and offers no retry', async () => {
+		chat.createThread.mockResolvedValue('thread-3');
+		chat.sendMessage.mockResolvedValue('exec');
+		chat.streamEvents.mockReturnValueOnce(
+			eventStream([
+				errorEvent('exec', ErrorCodeDTO.hourly_message_limit, RetryActionDTO.none),
+			]),
+		);
+
+		useAIAssistantStore.getState().startNewConversation();
+		await useAIAssistantStore.getState().sendMessage('hi');
+
+		expect(lastMessage('thread-3')).toMatchObject({
+			isError: true,
+			isRateLimitError: true,
+			retryAction: RetryActionDTO.none,
+		});
+
+		// No retry thunk registered for a non-retryable error — retry is a no-op.
+		const before =
+			useAIAssistantStore.getState().conversations['thread-3'].messages.length;
+		await useAIAssistantStore.getState().retryAssistantMessage('thread-3');
+		expect(
+			useAIAssistantStore.getState().conversations['thread-3'].messages,
+		).toHaveLength(before);
+	});
+
+	it('recovers silently when an auto-flagged error succeeds on retry', async () => {
+		chat.createThread.mockResolvedValue('thread-4');
+		chat.sendMessage.mockResolvedValue('exec');
+		chat.streamEvents
+			.mockReturnValueOnce(
+				eventStream([
+					errorEvent('exec', ErrorCodeDTO.internal_error, RetryActionDTO.auto),
+				]),
+			)
+			.mockReturnValueOnce(
+				eventStream([
+					{
+						type: 'message',
+						executionId: 'exec',
+						messageId: 'm1',
+						delta: 'Recovered',
+						done: true,
+					},
+				]),
+			);
+
+		useAIAssistantStore.getState().startNewConversation();
+		await useAIAssistantStore.getState().sendMessage('hi');
+
+		// 1 initial attempt + 1 silent auto retry, then success — no error bubble.
+		expect(chat.sendMessage).toHaveBeenCalledTimes(2);
+		const conv = useAIAssistantStore.getState().conversations['thread-4'];
+		expect(conv.messages).toHaveLength(2);
+		expect(conv.messages[0]).toMatchObject({ role: 'user', content: 'hi' });
+		expect(conv.messages[1]).toMatchObject({
+			role: 'assistant',
+			content: 'Recovered',
+		});
+		expect(conv.messages.some((m) => m.isError)).toBe(false);
+	}, 10000);
+
+	it('replays the originating action on retry for a non-send error (approve)', async () => {
+		chat.approveExecution.mockResolvedValue('exec-a');
+		chat.streamEvents.mockReturnValueOnce(
+			eventStream([
+				errorEvent('exec-a', ErrorCodeDTO.thread_busy, RetryActionDTO.manual),
+			]),
+		);
+
+		const convId = useAIAssistantStore.getState().startNewConversation();
+		await useAIAssistantStore.getState().approveAction(convId, 'approval-1');
+
+		expect(lastMessage(convId)).toMatchObject({
+			isError: true,
+			retryAction: RetryActionDTO.manual,
+		});
+		expect(chat.approveExecution).toHaveBeenCalledTimes(1);
+
+		// Retry replays the approval (not a send) and succeeds this time.
+		chat.streamEvents.mockReturnValueOnce(
+			eventStream([
+				{
+					type: 'message',
+					executionId: 'exec-a',
+					messageId: 'm1',
+					delta: 'Approved',
+					done: true,
+				},
+			]),
+		);
+		await useAIAssistantStore.getState().retryAssistantMessage(convId);
+
+		const conv = useAIAssistantStore.getState().conversations[convId];
+		expect(conv.messages).toHaveLength(1);
+		expect(conv.messages[0]).toMatchObject({
+			role: 'assistant',
+			content: 'Approved',
+		});
+		expect(conv.messages[0].isError).toBeUndefined();
+		expect(chat.approveExecution).toHaveBeenCalledTimes(2);
+		expect(chat.sendMessage).not.toHaveBeenCalled();
+	});
+});
--- a/frontend/src/container/AIAssistant/store/useAIAssistantStore.ts
+++ b/frontend/src/container/AIAssistant/store/useAIAssistantStore.ts
@@ -8,6 +8,7 @@ import type {
 	MessageActionDTO,
 	MessageSummaryDTOBlocksAnyOfItem,
 } from 'api/ai-assistant/sigNozAIAssistantAPI.schemas';
+import { RetryActionDTO } from 'api/ai-assistant/sigNozAIAssistantAPI.schemas';

 import {
 	approveExecution,
@@ -35,7 +36,10 @@ import {
 	MessageBlock,
 	MessageRole,
 } from '../types';
-import { resolveAssistantErrorMessage } from '../utils/resolveAssistantErrorMessage';
+import {
+	resolveAssistantError,
+	type AssistantErrorResolution,
+} from '../utils/resolveAssistantError';

 // ---------------------------------------------------------------------------
 // Types used by module-level helpers
@@ -56,6 +60,15 @@ interface SSEStreamCtx {

 const streamControllers = new Map<string, AbortController>();

+/**
+ * Per-conversation retry thunks for the most recent failed turn. Populated by
+ * `finalizeStreamingError` when the error is manually retryable; consumed by
+ * the `retryAssistantMessage` action when the user clicks Retry. Transient
+ * (not persisted) — it shares the in-memory lifetime of the error bubble it
+ * backs, so a page reload drops both together.
+ */
+const retryRegistry = new Map<string, () => Promise<void>>();
+
 function abortStream(conversationId: string): void {
 	const ctrl = streamControllers.get(conversationId);
 	if (ctrl) {
@@ -197,7 +210,7 @@ function resetStreamingState(
 * Marker thrown by `runStreamingLoop` when an SSE event reports
 * `invalid_token`. Callers that own an originating action (sendMessage /
 * approve / clarify / regenerate) catch this and re-issue that action via
- * `streamWithAuthRetry`; the retry's first REST call will 401, at which point
+ * `streamWithRetry`; the retry's first REST call will 401, at which point
 * the shared axios `interceptorRejected` rotates the access token and replays.
 */
 class AuthExpiredError extends Error {
@@ -207,27 +220,50 @@ class AuthExpiredError extends Error {
 	}
 }

+/** Capped silent re-attempts for backend-flagged transient (`auto`) errors. */
+const MAX_AUTO_RETRIES = 2;
+/** Backoff before each auto re-attempt, indexed by prior auto-retry count. */
+const AUTO_RETRY_BACKOFF_MS = [500, 1500];
+
+function delay(ms: number): Promise<void> {
+	return new Promise((resolve) => {
+		setTimeout(resolve, ms);
+	});
+}
+
+/** True when the SSE error carries the backend's `retryAction: 'auto'` flag. */
+function isAutoRetryableError(err: unknown): boolean {
+	return (
+		(err as { retryAction?: unknown } | undefined)?.retryAction ===
+		RetryActionDTO.auto
+	);
+}
+
 /**
 * Runs the originating action (e.g. sendMessage POST) and streams the
- * resulting execution. On `AuthExpiredError`, re-issues `start` once — the
- * retry's REST call hits 401, the shared axios interceptor rotates the
- * access token and replays, and the new SSE picks up the rotated token from
- * localStorage. Backend signals `retryAction: 'manual'` for `invalid_token`,
- * so the dead execution can't be resumed — only a fresh one helps.
+ * resulting execution, with two independent retry budgets:
+ *
+ *   • Auth — on `AuthExpiredError` (SSE `invalid_token`), re-issues `start`
+ *     once. The retry's REST call 401s, the shared axios interceptor rotates
+ *     the access token + replays, and the new SSE picks up the rotated token.
+ *     Backend flags `invalid_token` as `manual`, so only a fresh execution helps.
+ *   • Auto — on an SSE error the backend flagged `retryAction: 'auto'`
+ *     (transient), silently re-issues `start` up to `MAX_AUTO_RETRIES` times
+ *     with backoff. Once exhausted the error propagates so the caller can
+ *     surface a manual Retry affordance.
+ *
+ * Both reset the stream state before re-attempting so a dead execution's
+ * partial output isn't concatenated onto the retry.
 */
-async function streamWithAuthRetry(
+async function streamWithRetry(
 	conversationId: string,
 	start: () => Promise<string>,
 	set: StoreSetter,
 ): Promise<void> {
-	for (let attempt = 0; attempt <= 1; attempt += 1) {
-		if (attempt > 0) {
-			// Drop any partial content/events from the previous attempt so the
-			// retried execution's stream isn't concatenated with the dead one.
-			set((s) => {
-				resetStreamingState(s, conversationId);
-			});
-		}
+	let authRetried = false;
+	let autoRetries = 0;
+
+	for (;;) {
 		// eslint-disable-next-line no-await-in-loop
 		const executionId = await start();
 		const ctrl = newStreamController(conversationId);
@@ -242,10 +278,28 @@ async function streamWithAuthRetry(
 			return;
 		} catch (err) {
 			streamControllers.delete(conversationId);
-			if (err instanceof AuthExpiredError && attempt < 1) {
-				continue;
+
+			if (err instanceof AuthExpiredError && !authRetried) {
+				authRetried = true;
+			} else if (isAutoRetryableError(err) && autoRetries < MAX_AUTO_RETRIES) {
+				// eslint-disable-next-line no-await-in-loop
+				await delay(AUTO_RETRY_BACKOFF_MS[autoRetries] ?? 1500);
+				autoRetries += 1;
+			} else {
+				if (isAutoRetryableError(err)) {
+					// Auto-retry budget spent — present the failure as manually
+					// retryable so the caller surfaces a Retry button rather than
+					// silently giving up.
+					(err as { retryAction?: RetryActionDTO }).retryAction =
+						RetryActionDTO.manual;
+				}
+				throw err;
 			}
-			throw err;
+
+			// Drop partial content/events from the failed attempt before retrying.
+			set((s) => {
+				resetStreamingState(s, conversationId);
+			});
 		}
 	}
 }
@@ -258,7 +312,7 @@ async function streamWithAuthRetry(
 *
 * On an `invalid_token` error event (e.g. MCP auth expired mid-execution),
 * throws `AuthExpiredError` so the caller can re-issue the originating
- * action via `streamWithAuthRetry`. We don't refresh here ourselves — the
+ * action via `streamWithRetry`. We don't refresh here ourselves — the
 * retry's REST call will 401 and the shared axios `interceptorRejected`
 * handles rotation + replay. Throws on any other `error` event — the
 * caller's catch block handles UI feedback.
@@ -484,20 +538,37 @@ function hasPendingInput(conversationId: string, get: StoreGetter): boolean {
 	return Boolean(stream?.pendingApproval || stream?.pendingClarification);
 }

+/**
+ * Commits a failed turn as an error message and removes the stream entry.
+ * When the failure is manually retryable and a `retry` thunk is supplied, the
+ * thunk is stashed in `retryRegistry` so the bubble's Retry button can replay
+ * the originating action.
+ */
 function finalizeStreamingError(
 	conversationId: string,
-	errorContent: string,
+	resolution: AssistantErrorResolution,
 	set: StoreSetter,
-	isRateLimit = false,
+	retry?: () => Promise<void>,
 ): void {
+	const { message, code, retryAction, isRateLimit } = resolution;
+
+	if (retryAction === RetryActionDTO.manual && retry) {
+		retryRegistry.set(conversationId, retry);
+	} else {
+		retryRegistry.delete(conversationId);
+	}
+
 	set((s) => {
 		const conv = s.conversations[conversationId];
 		if (conv) {
 			conv.messages.push({
 				id: uuidv4(),
 				role: 'assistant',
-				content: errorContent,
+				content: message,
 				createdAt: Date.now(),
+				isError: true,
+				retryAction,
+				...(code ? { errorCode: code } : {}),
 				...(isRateLimit ? { isRateLimitError: true } : {}),
 			});
 			conv.updatedAt = Date.now();
@@ -506,6 +577,40 @@ function finalizeStreamingError(
 	});
 }

+/**
+ * Shared streaming wrapper for actions that have no pre-stream setup beyond
+ * resetting state (approve / clarify / regenerate). Streams the execution,
+ * finalizes the message on success, and on failure resolves the error +
+ * registers `retry` (the caller's own re-invocation) so the bubble can replay
+ * it. `sendMessage` does not use this — it owns thread-creation/re-keying and
+ * runs its own equivalent loop.
+ */
+async function streamAndFinalize(
+	conversationId: string,
+	start: () => Promise<string>,
+	fallback: string,
+	logLabel: string,
+	set: StoreSetter,
+	get: StoreGetter,
+	retry: () => Promise<void>,
+): Promise<void> {
+	try {
+		await streamWithRetry(conversationId, start, set);
+		if (!hasPendingInput(conversationId, get)) {
+			finalizeStreamingMessage(conversationId, set, get);
+		}
+	} catch (err) {
+		// Abort errors are expected when the user cancels — not a failure.
+		if (err instanceof DOMException && err.name === 'AbortError') {
+			return;
+		}
+		// eslint-disable-next-line no-console
+		console.error(logLabel, err);
+		const resolution = resolveAssistantError(err, fallback);
+		finalizeStreamingError(conversationId, resolution, set, retry);
+	}
+}
+
 // ---------------------------------------------------------------------------
 // Store interface
 // ---------------------------------------------------------------------------
@@ -564,6 +669,8 @@ export interface AIAssistantStore {
 		conversationId: string,
 		messageId: string,
 	) => Promise<void>;
+	/** Replays the originating action for a manually-retryable error bubble. */
+	retryAssistantMessage: (conversationId: string) => Promise<void>;
 	submitMessageFeedback: (
 		messageId: string,
 		rating: FeedbackRating,
@@ -877,7 +984,7 @@ export const useAIAssistantStore = create<AIAssistantStore>()(
 					// there's no "originating action" to redo — reopening the
 					// same dead executionId would just re-emit the failure.
 					// Let the error bubble; the user can send a new message,
-					// which will go through `streamWithAuthRetry`.
+					// which will go through `streamWithRetry`.
 					if (
 						detail.activeExecutionId &&
 						!streamControllers.has(threadId) &&
@@ -1060,7 +1167,7 @@ export const useAIAssistantStore = create<AIAssistantStore>()(
 				attachments?: MessageAttachment[],
 				contexts?: MessageContext[],
 			): Promise<void> => {
-				let convId = get().activeConversationId;
+				const convId = get().activeConversationId;
 				if (!convId || !get().conversations[convId]) {
 					return;
 				}
@@ -1093,63 +1200,75 @@ export const useAIAssistantStore = create<AIAssistantStore>()(
 				};

 				set((state) => {
-					const conv = state.conversations[convId!];
+					const conv = state.conversations[convId];
 					conv.messages.push(userMessage);
 					conv.updatedAt = Date.now();
 					if (!conv.title && text.trim()) {
 						conv.title = deriveTitle(text);
 					}
-					resetStreamingState(state, convId!);
+					resetStreamingState(state, convId);
 				});

-				try {
-					let { threadId } = get().conversations[convId];
-					if (!threadId) {
-						threadId = await createThread();
-						// Re-key the conversation from client UUID to backend threadId
-						// so fetchThreads won't create a duplicate entry later.
-						const oldId = convId;
-						convId = threadId;
-						set((s) => {
-							const conv = s.conversations[oldId];
-							if (conv) {
-								conv.id = convId!;
-								conv.threadId = convId!;
-								s.conversations[convId!] = conv;
-								delete s.conversations[oldId];
-								if (s.activeConversationId === oldId) {
-									s.activeConversationId = convId!;
+				// The full send — ensure a backend thread exists (re-keying the
+				// optimistic client UUID on first send), POST the message, and
+				// stream the reply. Defined as a closure so the error bubble's
+				// Retry button can replay it without re-pushing the user message.
+				const runSend = async (cid: string): Promise<void> => {
+					let targetConvId = cid;
+					try {
+						let { threadId } = get().conversations[targetConvId];
+						if (!threadId) {
+							threadId = await createThread();
+							// Re-key the conversation from client UUID to backend threadId
+							// so fetchThreads won't create a duplicate entry later.
+							const oldId = targetConvId;
+							const newId = threadId;
+							set((s) => {
+								const conv = s.conversations[oldId];
+								if (conv) {
+									conv.id = newId;
+									conv.threadId = newId;
+									s.conversations[newId] = conv;
+									delete s.conversations[oldId];
+									if (s.activeConversationId === oldId) {
+										s.activeConversationId = newId;
+									}
+									const stream = s.streams[oldId];
+									if (stream) {
+										s.streams[newId] = stream;
+										delete s.streams[oldId];
+									}
 								}
-								const stream = s.streams[oldId];
-								if (stream) {
-									s.streams[convId!] = stream;
-									delete s.streams[oldId];
-								}
-							}
-						});
-					}
-					const tid = threadId;
-					await streamWithAuthRetry(
-						convId,
-						() => sendMessageToThread(tid, text, contexts),
-						set,
-					);
+							});
+							targetConvId = newId;
+						}
+						const tid = threadId;
+						await streamWithRetry(
+							targetConvId,
+							() => sendMessageToThread(tid, text, contexts),
+							set,
+						);

-					if (!hasPendingInput(convId, get)) {
-						finalizeStreamingMessage(convId, set, get);
+						if (!hasPendingInput(targetConvId, get)) {
+							finalizeStreamingMessage(targetConvId, set, get);
+						}
+					} catch (err) {
+						// Abort errors are expected when the user cancels — not a failure.
+						if (err instanceof DOMException && err.name === 'AbortError') {
+							return;
+						}
+						console.error('[AIAssistant] sendMessage failed:', err);
+						const resolution = resolveAssistantError(
+							err,
+							'Something went wrong while fetching the response. Please try again.',
+						);
+						finalizeStreamingError(targetConvId, resolution, set, () =>
+							runSend(targetConvId),
+						);
 					}
-				} catch (err) {
-					// Abort errors are expected when the user cancels — not a failure
-					if (err instanceof DOMException && err.name === 'AbortError') {
-						return;
-					}
-					console.error('[AIAssistant] sendMessage failed:', err);
-					const { message, isRateLimit } = resolveAssistantErrorMessage(
-						err,
-						'Something went wrong while fetching the response. Please try again.',
-					);
-					finalizeStreamingError(convId, message, set, isRateLimit);
-				}
+				};
+
+				await runSend(convId);
 			},

 			approveAction: async (
@@ -1167,26 +1286,17 @@ export const useAIAssistantStore = create<AIAssistantStore>()(
 					}
 				});

-				try {
-					await streamWithAuthRetry(
+				const run = (): Promise<void> =>
+					streamAndFinalize(
 						conversationId,
 						() => approveExecution(approvalId),
-						set,
-					);
-					if (!hasPendingInput(conversationId, get)) {
-						finalizeStreamingMessage(conversationId, set, get);
-					}
-				} catch (err) {
-					if (err instanceof DOMException && err.name === 'AbortError') {
-						return;
-					}
-					console.error('[AIAssistant] approveAction failed:', err);
-					const { message, isRateLimit } = resolveAssistantErrorMessage(
-						err,
 						'Something went wrong while processing the approval. Please try again.',
+						'[AIAssistant] approveAction failed:',
+						set,
+						get,
+						run,
 					);
-					finalizeStreamingError(conversationId, message, set, isRateLimit);
-				}
+				await run();
 			},

 			rejectAction: async (
@@ -1246,26 +1356,17 @@ export const useAIAssistantStore = create<AIAssistantStore>()(
 					resetStreamingState(s, conversationId);
 				});

-				try {
-					await streamWithAuthRetry(
+				const run = (): Promise<void> =>
+					streamAndFinalize(
 						conversationId,
 						() => regenerateMessage(messageId),
-						set,
-					);
-					if (!hasPendingInput(conversationId, get)) {
-						finalizeStreamingMessage(conversationId, set, get);
-					}
-				} catch (err) {
-					if (err instanceof DOMException && err.name === 'AbortError') {
-						return;
-					}
-					console.error('[AIAssistant] regenerateAssistantMessage failed:', err);
-					const { message, isRateLimit } = resolveAssistantErrorMessage(
-						err,
 						'Something went wrong while regenerating the response. Please try again.',
+						'[AIAssistant] regenerateAssistantMessage failed:',
+						set,
+						get,
+						run,
 					);
-					finalizeStreamingError(conversationId, message, set, isRateLimit);
-				}
+				await run();
 			},

 			submitMessageFeedback: async (
@@ -1312,26 +1413,42 @@ export const useAIAssistantStore = create<AIAssistantStore>()(
 					}
 				});

-				try {
-					await streamWithAuthRetry(
+				const run = (): Promise<void> =>
+					streamAndFinalize(
 						conversationId,
 						() => clarifyExecution(clarificationId, answers),
-						set,
-					);
-					if (!hasPendingInput(conversationId, get)) {
-						finalizeStreamingMessage(conversationId, set, get);
-					}
-				} catch (err) {
-					if (err instanceof DOMException && err.name === 'AbortError') {
-						return;
-					}
-					console.error('[AIAssistant] submitClarification failed:', err);
-					const { message, isRateLimit } = resolveAssistantErrorMessage(
-						err,
 						'Something went wrong while processing your answers. Please try again.',
+						'[AIAssistant] submitClarification failed:',
+						set,
+						get,
+						run,
 					);
-					finalizeStreamingError(conversationId, message, set, isRateLimit);
+				await run();
+			},
+
+			retryAssistantMessage: async (conversationId: string): Promise<void> => {
+				const retry = retryRegistry.get(conversationId);
+				if (!retry) {
+					return;
 				}
+				retryRegistry.delete(conversationId);
+
+				// Drop the trailing error bubble we're retrying from and reset the
+				// stream so the in-progress retry renders immediately. The retry
+				// thunk replays the originating action without re-pushing the
+				// user's message.
+				set((s) => {
+					const conv = s.conversations[conversationId];
+					if (conv) {
+						const last = conv.messages[conv.messages.length - 1];
+						if (last?.isError) {
+							conv.messages.pop();
+						}
+					}
+					resetStreamingState(s, conversationId);
+				});
+
+				await retry();
 			},
 		})),
 		{
--- a/frontend/src/container/AIAssistant/types.ts
+++ b/frontend/src/container/AIAssistant/types.ts
@@ -15,9 +15,11 @@
 import type {
 	ApprovalEventDTO,
 	ClarificationEventDTO,
+	ErrorCodeDTO,
 	FeedbackRatingDTO,
 	MessageActionDTO,
 	MessageActionKindDTO,
+	RetryActionDTO,
 } from 'api/ai-assistant/sigNozAIAssistantAPI.schemas';

 /** Client-only file attachment — no API equivalent (uploads happen via data URLs). */
@@ -91,6 +93,18 @@ export interface Message {
 	 * bar (copy/vote/regenerate) is hidden — retrying would just 429 again.
 	 */
 	isRateLimitError?: boolean;
+	/**
+	 * Marks an assistant message that represents a failed turn. Drives the
+	 * error styling and replaces the feedback bar with a retry affordance.
+	 */
+	isError?: boolean;
+	/** Known backend error code for the failure, when recognised. */
+	errorCode?: ErrorCodeDTO;
+	/**
+	 * Retry semantics for a failed turn — `manual` renders an inline Retry
+	 * button on the error bubble; `none`/`auto` render no button.
+	 */
+	retryAction?: RetryActionDTO;
 	createdAt: number;
 }

--- a/frontend/src/container/AIAssistant/utils/tests/resolveAssistantError.test.ts
+++ b/frontend/src/container/AIAssistant/utils/tests/resolveAssistantError.test.ts
@@ -0,0 +1,154 @@
+import { AxiosError } from 'axios';
+import {
+	ErrorCodeDTO,
+	RetryActionDTO,
+} from 'api/ai-assistant/sigNozAIAssistantAPI.schemas';
+
+import { resolveAssistantError } from '../resolveAssistantError';
+
+const FALLBACK = 'Something went wrong. Please try again.';
+
+function restError(status: number, code: string, message: string): AxiosError {
+	const err = new AxiosError('Request failed');
+	err.response = {
+		status,
+		data: { error: { code, message } },
+	} as AxiosError['response'];
+	return err;
+}
+
+describe('resolveAssistantError', () => {
+	describe('message resolution', () => {
+		it('prefers code-specific FE copy over the backend message', () => {
+			const err = restError(409, ErrorCodeDTO.thread_busy, 'raw backend phrasing');
+
+			const { message } = resolveAssistantError(err, FALLBACK);
+			expect(message).toBe(
+				'This conversation is still finishing a previous response. Give it a moment and try again.',
+			);
+		});
+
+		it('falls through to the backend message for a known code without FE copy', () => {
+			const err = restError(
+				400,
+				ErrorCodeDTO.message_not_found,
+				'No such message exists.',
+			);
+
+			expect(resolveAssistantError(err, FALLBACK)).toStrictEqual({
+				message: 'No such message exists.',
+				code: ErrorCodeDTO.message_not_found,
+				retryAction: RetryActionDTO.none,
+				isRateLimit: false,
+			});
+		});
+
+		it('falls back when the error code is not in ErrorCodeDTO', () => {
+			const err = restError(400, 'future_unknown_code', 'Backend-only message');
+
+			expect(resolveAssistantError(err, FALLBACK)).toStrictEqual({
+				message: FALLBACK,
+				code: undefined,
+				retryAction: RetryActionDTO.none,
+				isRateLimit: false,
+			});
+		});
+	});
+
+	describe('rate limiting', () => {
+		it('marks HTTP 429 responses as rate limited and non-retryable', () => {
+			const err = restError(
+				429,
+				ErrorCodeDTO.hourly_message_limit,
+				'Hourly limit reached.',
+			);
+
+			expect(resolveAssistantError(err, FALLBACK)).toStrictEqual({
+				message: "You've reached the hourly message limit. Please try again later.",
+				code: ErrorCodeDTO.hourly_message_limit,
+				retryAction: RetryActionDTO.none,
+				isRateLimit: true,
+			});
+		});
+
+		it('treats known SSE rate-limit codes as rate limited', () => {
+			const err = Object.assign(new Error('Daily token limit exceeded.'), {
+				code: ErrorCodeDTO.daily_token_limit,
+			});
+
+			const res = resolveAssistantError(err, FALLBACK);
+			expect(res.isRateLimit).toBe(true);
+			expect(res.retryAction).toBe(RetryActionDTO.none);
+		});
+
+		it('marks 429 as rate limited even when the code is unknown', () => {
+			const err = restError(429, 'future_unknown_code', 'Too many requests');
+
+			expect(resolveAssistantError(err, FALLBACK)).toStrictEqual({
+				message: FALLBACK,
+				code: undefined,
+				retryAction: RetryActionDTO.none,
+				isRateLimit: true,
+			});
+		});
+	});
+
+	describe('retryAction resolution', () => {
+		it('honours an explicit retryAction from an SSE error event', () => {
+			const err = Object.assign(new Error('Transient hiccup'), {
+				code: ErrorCodeDTO.internal_error,
+				retryAction: RetryActionDTO.auto,
+			});
+
+			expect(resolveAssistantError(err, FALLBACK).retryAction).toBe(
+				RetryActionDTO.auto,
+			);
+		});
+
+		it('forces none for non-retryable permission errors', () => {
+			const err = restError(403, ErrorCodeDTO.permission_denied, 'forbidden');
+
+			expect(resolveAssistantError(err, FALLBACK).retryAction).toBe(
+				RetryActionDTO.none,
+			);
+		});
+
+		it('derives manual for 409 conflicts', () => {
+			const err = restError(409, ErrorCodeDTO.thread_has_active_execution, 'busy');
+
+			expect(resolveAssistantError(err, FALLBACK).retryAction).toBe(
+				RetryActionDTO.manual,
+			);
+		});
+
+		it('derives manual for 5xx responses', () => {
+			const err = restError(503, 'future_unknown_code', 'unavailable');
+
+			expect(resolveAssistantError(err, FALLBACK).retryAction).toBe(
+				RetryActionDTO.manual,
+			);
+		});
+
+		it('derives manual for network failures with no response', () => {
+			const err = new AxiosError('Network Error');
+
+			expect(resolveAssistantError(err, FALLBACK).retryAction).toBe(
+				RetryActionDTO.manual,
+			);
+		});
+
+		it('derives none for other 4xx responses', () => {
+			const err = restError(400, 'future_unknown_code', 'bad request');
+
+			expect(resolveAssistantError(err, FALLBACK).retryAction).toBe(
+				RetryActionDTO.none,
+			);
+		});
+
+		it('defaults to manual for non-Axios errors with no code', () => {
+			expect(resolveAssistantError(new Error('boom'), FALLBACK).retryAction).toBe(
+				RetryActionDTO.manual,
+			);
+		});
+	});
+});
--- a/frontend/src/container/AIAssistant/utils/tests/resolveAssistantErrorMessage.test.ts
+++ b/frontend/src/container/AIAssistant/utils/tests/resolveAssistantErrorMessage.test.ts
@@ -1,91 +0,0 @@
-import { AxiosError } from 'axios';
-import { ErrorCodeDTO } from 'api/ai-assistant/sigNozAIAssistantAPI.schemas';
-
-import { resolveAssistantErrorMessage } from '../resolveAssistantErrorMessage';
-
-const FALLBACK = 'Something went wrong. Please try again.';
-
-describe('resolveAssistantErrorMessage', () => {
-	it('returns backend message for a known error code', () => {
-		const err = new AxiosError('Request failed');
-		err.response = {
-			status: 400,
-			data: {
-				error: {
-					code: ErrorCodeDTO.thread_busy,
-					message: 'This thread is busy. Try again shortly.',
-				},
-			},
-		} as AxiosError['response'];
-
-		expect(resolveAssistantErrorMessage(err, FALLBACK)).toStrictEqual({
-			message: 'This thread is busy. Try again shortly.',
-			isRateLimit: false,
-		});
-	});
-
-	it('falls back when error code is not in ErrorCodeDTO', () => {
-		const err = new AxiosError('Request failed');
-		err.response = {
-			status: 400,
-			data: {
-				error: {
-					code: 'future_unknown_code',
-					message: 'Backend-only message',
-				},
-			},
-		} as AxiosError['response'];
-
-		expect(resolveAssistantErrorMessage(err, FALLBACK)).toStrictEqual({
-			message: FALLBACK,
-			isRateLimit: false,
-		});
-	});
-
-	it('marks HTTP 429 responses as rate limited', () => {
-		const err = new AxiosError('Too many requests');
-		err.response = {
-			status: 429,
-			data: {
-				error: {
-					code: ErrorCodeDTO.hourly_message_limit,
-					message: 'Hourly limit reached.',
-				},
-			},
-		} as AxiosError['response'];
-
-		expect(resolveAssistantErrorMessage(err, FALLBACK)).toStrictEqual({
-			message: 'Hourly limit reached.',
-			isRateLimit: true,
-		});
-	});
-
-	it('uses backend message for known SSE rate-limit error codes', () => {
-		const err = Object.assign(new Error('Daily token limit exceeded.'), {
-			code: ErrorCodeDTO.daily_token_limit,
-		});
-
-		expect(resolveAssistantErrorMessage(err, FALLBACK)).toStrictEqual({
-			message: 'Daily token limit exceeded.',
-			isRateLimit: true,
-		});
-	});
-
-	it('marks 429 as rate limited even when error code is unknown', () => {
-		const err = new AxiosError('Too many requests');
-		err.response = {
-			status: 429,
-			data: {
-				error: {
-					code: 'future_unknown_code',
-					message: 'Too many requests',
-				},
-			},
-		} as AxiosError['response'];
-
-		expect(resolveAssistantErrorMessage(err, FALLBACK)).toStrictEqual({
-			message: FALLBACK,
-			isRateLimit: true,
-		});
-	});
-});
--- a/frontend/src/container/AIAssistant/utils/resolveAssistantError.ts
+++ b/frontend/src/container/AIAssistant/utils/resolveAssistantError.ts
@@ -0,0 +1,209 @@
+import { isAxiosError } from 'axios';
+import {
+	ErrorCodeDTO,
+	RetryActionDTO,
+	type ErrorBodyDTO,
+	type ErrorResponseDTO,
+} from 'api/ai-assistant/sigNozAIAssistantAPI.schemas';
+
+export interface AssistantErrorResolution {
+	/** User-facing copy: code-specific FE copy → backend message → caller fallback. */
+	message: string;
+	/** Known backend error code, when one we recognise was supplied. */
+	code?: ErrorCodeDTO;
+	/**
+	 * Whether/how the failed action may be retried:
+	 *   • `auto`   — transient; the caller may silently re-attempt (capped).
+	 *   • `manual` — surface a Retry affordance to the user.
+	 *   • `none`   — retrying would re-fail deterministically; offer nothing.
+	 */
+	retryAction: RetryActionDTO;
+	/** Quota/limit error — callers hide the retry + feedback bar (retrying just re-limits). */
+	isRateLimit: boolean;
+}
+
+/** Quota/limit codes — surfaced as rate-limit errors (no retry, feedback bar hidden). */
+const RATE_LIMIT_ERROR_CODES = new Set<ErrorCodeDTO>([
+	ErrorCodeDTO.rate_limit_override_exceeds_ceiling,
+	ErrorCodeDTO.thread_message_limit,
+	ErrorCodeDTO.connection_limit_exceeded,
+	ErrorCodeDTO.hourly_message_limit,
+	ErrorCodeDTO.daily_message_limit,
+	ErrorCodeDTO.daily_token_limit,
+	ErrorCodeDTO.daily_cost_limit,
+	ErrorCodeDTO.budget_exceeded,
+]);
+
+/**
+ * Codes whose retry would re-fail deterministically — permission/config/validation
+ * failures. These force `retryAction: none` regardless of HTTP status.
+ */
+const NON_RETRYABLE_CODES = new Set<ErrorCodeDTO>([
+	ErrorCodeDTO.permission_denied,
+	ErrorCodeDTO.user_disabled,
+	ErrorCodeDTO.org_disabled,
+	ErrorCodeDTO.validation_error,
+	ErrorCodeDTO.invalid_content_length,
+	ErrorCodeDTO.invalid_fork_target,
+	ErrorCodeDTO.missing_signoz_url,
+	ErrorCodeDTO.invalid_signoz_url,
+	ErrorCodeDTO.region_not_configured,
+]);
+
+/**
+ * Code-specific, user-friendly copy. Takes precedence over the backend's raw
+ * `error.message` so the user sees an actionable, consistent sentence rather
+ * than internal phrasing. Codes absent here fall through to the backend message.
+ */
+const ERROR_CODE_COPY: Partial<Record<ErrorCodeDTO, string>> = {
+	[ErrorCodeDTO.permission_denied]:
+		"You don't have permission to do that. Contact your workspace admin if you think this is a mistake.",
+	[ErrorCodeDTO.user_disabled]:
+		'Your access to the AI assistant has been disabled. Contact your workspace admin to re-enable it.',
+	[ErrorCodeDTO.org_disabled]:
+		'The AI assistant is disabled for your organisation. An admin can enable it in settings.',
+	[ErrorCodeDTO.thread_busy]:
+		'This conversation is still finishing a previous response. Give it a moment and try again.',
+	[ErrorCodeDTO.thread_has_active_execution]:
+		'This conversation is still finishing a previous response. Give it a moment and try again.',
+	[ErrorCodeDTO.hourly_message_limit]:
+		"You've reached the hourly message limit. Please try again later.",
+	[ErrorCodeDTO.daily_message_limit]:
+		"You've reached the daily message limit. Please try again tomorrow.",
+	[ErrorCodeDTO.daily_token_limit]:
+		"You've reached today's usage limit. Please try again tomorrow.",
+	[ErrorCodeDTO.daily_cost_limit]:
+		"You've reached today's usage limit. Please try again tomorrow.",
+	[ErrorCodeDTO.budget_exceeded]:
+		"You've reached your usage budget. Contact your workspace admin to raise it.",
+	[ErrorCodeDTO.thread_message_limit]:
+		'This conversation has reached its length limit. Start a new conversation to continue.',
+	[ErrorCodeDTO.connection_limit_exceeded]:
+		'Too many active conversations right now. Close one and try again.',
+	[ErrorCodeDTO.max_turns_exceeded]:
+		'The assistant reached the maximum number of steps for this request. Try rephrasing or breaking it into smaller asks.',
+	[ErrorCodeDTO.region_unreachable]:
+		"Couldn't reach your region's services. Please try again in a moment.",
+	[ErrorCodeDTO.region_not_configured]:
+		'No region is configured for the AI assistant yet. An admin can set this up in settings.',
+	[ErrorCodeDTO.mcp_unavailable]:
+		'A required service is temporarily unavailable. Please try again shortly.',
+	[ErrorCodeDTO.sandbox_unavailable]:
+		'The execution environment is temporarily unavailable. Please try again shortly.',
+	[ErrorCodeDTO.internal_error]:
+		'Something went wrong on our end. Please try again.',
+};
+
+function isErrorCodeDTO(code: string | undefined): code is ErrorCodeDTO {
+	return (
+		code !== undefined && (Object.values(ErrorCodeDTO) as string[]).includes(code)
+	);
+}
+
+function isRetryActionDTO(value: unknown): value is RetryActionDTO {
+	return (
+		typeof value === 'string' &&
+		(Object.values(RetryActionDTO) as string[]).includes(value)
+	);
+}
+
+/**
+ * Pulls the structured error body out of either an Axios REST error or the
+ * SSE error the streaming loop throws (a plain `Error` augmented with `code`).
+ */
+function getErrorBody(err: unknown): ErrorBodyDTO | null {
+	if (isAxiosError(err)) {
+		return (err.response?.data as ErrorResponseDTO | undefined)?.error ?? null;
+	}
+
+	const code = (err as { code?: string } | undefined)?.code;
+	const message = err instanceof Error ? err.message : undefined;
+	if (!code || !message) {
+		return null;
+	}
+
+	return { code: code as ErrorCodeDTO, message };
+}
+
+function isRateLimit(code: ErrorCodeDTO | undefined, err: unknown): boolean {
+	if (isAxiosError(err) && err.response?.status === 429) {
+		return true;
+	}
+	return code !== undefined && RATE_LIMIT_ERROR_CODES.has(code);
+}
+
+/**
+ * Resolves how the failed action may be retried. The backend's explicit signal
+ * (SSE `ErrorEventDTO.retryAction`) is authoritative; otherwise we derive it
+ * from the rate-limit/non-retryable code sets and the HTTP status.
+ */
+function resolveRetryAction(
+	err: unknown,
+	code: ErrorCodeDTO | undefined,
+	rateLimited: boolean,
+): RetryActionDTO {
+	const explicit = (err as { retryAction?: unknown } | undefined)?.retryAction;
+	if (isRetryActionDTO(explicit)) {
+		return explicit;
+	}
+
+	if (rateLimited || (code !== undefined && NON_RETRYABLE_CODES.has(code))) {
+		return RetryActionDTO.none;
+	}
+
+	if (isAxiosError(err)) {
+		const status = err.response?.status;
+		// No response → network/timeout failure; retrying may well succeed.
+		if (status === undefined || status === 408) {
+			return RetryActionDTO.manual;
+		}
+		if (status === 401 || status === 403) {
+			return RetryActionDTO.none;
+		}
+		if (status === 409 || status >= 500) {
+			return RetryActionDTO.manual;
+		}
+		// Other 4xx (validation, bad request) re-fail deterministically.
+		return RetryActionDTO.none;
+	}
+
+	// Non-Axios transport/parse error with no code — let the user retry.
+	return RetryActionDTO.manual;
+}
+
+function resolveMessage(
+	code: ErrorCodeDTO | undefined,
+	body: ErrorBodyDTO | null,
+	fallback: string,
+): string {
+	if (code !== undefined && ERROR_CODE_COPY[code]) {
+		return ERROR_CODE_COPY[code] as string;
+	}
+	// Trust the backend's message only for codes we recognise — never surface
+	// raw text for unknown codes (could be an internal stack trace).
+	if (code !== undefined && body?.message.trim()) {
+		return body.message.trim();
+	}
+	return fallback;
+}
+
+/**
+ * Single resolution point for both SSE and REST assistant errors. Maps the
+ * error onto user-facing copy plus retry semantics, degrading gracefully for
+ * unknown codes (falls back to `fallback` + a `manual` retry where sensible).
+ */
+export function resolveAssistantError(
+	err: unknown,
+	fallback: string,
+): AssistantErrorResolution {
+	const body = getErrorBody(err);
+	const code = isErrorCodeDTO(body?.code) ? body?.code : undefined;
+	const rateLimited = isRateLimit(code, err);
+
+	return {
+		message: resolveMessage(code, body, fallback),
+		code,
+		retryAction: resolveRetryAction(err, code, rateLimited),
+		isRateLimit: rateLimited,
+	};
+}
--- a/frontend/src/container/AIAssistant/utils/resolveAssistantErrorMessage.ts
+++ b/frontend/src/container/AIAssistant/utils/resolveAssistantErrorMessage.ts
@@ -1,71 +0,0 @@
-import { isAxiosError } from 'axios';
-import {
-	ErrorCodeDTO,
-	type ErrorBodyDTO,
-	type ErrorResponseDTO,
-} from 'api/ai-assistant/sigNozAIAssistantAPI.schemas';
-
-export interface AssistantErrorResolution {
-	message: string;
-	isRateLimit: boolean;
-}
-
-function isErrorCodeDTO(code: string | undefined): code is ErrorCodeDTO {
-	return (
-		code !== undefined && (Object.values(ErrorCodeDTO) as string[]).includes(code)
-	);
-}
-
-const RATE_LIMIT_ERROR_CODES = new Set<ErrorCodeDTO>([
-	ErrorCodeDTO.rate_limit_override_exceeds_ceiling,
-	ErrorCodeDTO.thread_message_limit,
-	ErrorCodeDTO.connection_limit_exceeded,
-	ErrorCodeDTO.hourly_message_limit,
-	ErrorCodeDTO.daily_message_limit,
-	ErrorCodeDTO.daily_token_limit,
-	ErrorCodeDTO.daily_cost_limit,
-	ErrorCodeDTO.budget_exceeded,
-]);
-
-function isRateLimitError(code: string | undefined, err: unknown): boolean {
-	if (isAxiosError(err) && err.response?.status === 429) {
-		return true;
-	}
-
-	return isErrorCodeDTO(code) && RATE_LIMIT_ERROR_CODES.has(code);
-}
-
-function getErrorBody(err: unknown): ErrorBodyDTO | null {
-	if (isAxiosError(err)) {
-		return (err.response?.data as ErrorResponseDTO | undefined)?.error ?? null;
-	}
-
-	const code = (err as { code?: string } | undefined)?.code;
-	const message = err instanceof Error ? err.message : undefined;
-	if (!code || !message) {
-		return null;
-	}
-
-	return { code: code as ErrorCodeDTO, message };
-}
-
-/**
- * Uses `error.message` when `error.code` is a known `ErrorCodeDTO`;
- * otherwise returns `fallback`.
- */
-export function resolveAssistantErrorMessage(
-	err: unknown,
-	fallback: string,
-): AssistantErrorResolution {
-	const body = getErrorBody(err);
-	const isRateLimit = isRateLimitError(body?.code, err);
-
-	if (body && isErrorCodeDTO(body.code) && body.message.trim()) {
-		return {
-			message: body.message.trim(),
-			isRateLimit,
-		};
-	}
-
-	return { message: fallback, isRateLimit: Boolean(isRateLimit) };
-}
Author	SHA1	Message	Date
Yunus M	f41dc64d65	feat: add unit tests for error handling and retry functionality	2026-06-30 16:28:07 +05:30
Yunus M	934cf08774	feat: implement error handling and retry mechanism for assistant messages	2026-06-30 16:15:46 +05:30