feat(cli): add compose passthrough command (#93 )

Adds `cf compose <stack> <command> [args...]` to run any docker compose command on a stack without needing dedicated wrappers. Useful for commands like top, images, exec, run, config, etc. Multi-host stacks require --host to specify which host to run on.
fix(web): service action buttons fixes and additions (#92 )
2026-02-03 14:13:26 +00:00 · 2025-12-20 21:26:05 -08:00 · 2025-12-20 21:11:44 -08:00 · 2025-12-20 20:56:48 -08:00 · 2025-12-20 20:47:34 -08:00 · 2025-12-20 20:41:26 -08:00
133 changed files with 11992 additions and 2170 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -0,0 +1,2 @@
+*.gif filter=lfs diff=lfs merge=lfs -text
+*.webm filter=lfs diff=lfs merge=lfs -text
--- a/.github/check_readme_commands.py
+++ b/.github/check_readme_commands.py
@@ -24,7 +24,7 @@ def get_all_commands(typer_app: typer.Typer, prefix: str = "cf") -> set[str]:
            continue
        name = command.name
        if not name and command.callback:
-            name = command.callback.__name__
+            name = getattr(command.callback, "__name__", None)
        if name:
            commands.add(f"{prefix} {name}")

--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -27,8 +27,8 @@ jobs:
      - name: Install dependencies
        run: uv sync --all-extras --dev

-      - name: Run tests
-        run: uv run pytest
+      - name: Run tests (excluding browser tests)
+        run: uv run pytest -m "not browser"

      - name: Upload coverage reports to Codecov
        if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.13'
@@ -36,6 +36,26 @@ jobs:
        env:
          CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}

+  browser-tests:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v6
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v7
+
+      - name: Set up Python
+        run: uv python install 3.13
+
+      - name: Install dependencies
+        run: uv sync --all-extras --dev
+
+      - name: Install Playwright browsers
+        run: uv run playwright install chromium --with-deps
+
+      - name: Run browser tests
+        run: uv run pytest -m browser -v --no-cov
+
  lint:
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -0,0 +1,66 @@
+name: Docs
+
+on:
+  push:
+    branches: [main]
+    paths:
+      - "docs/**"
+      - "zensical.toml"
+      - ".github/workflows/docs.yml"
+  pull_request:
+    paths:
+      - "docs/**"
+      - "zensical.toml"
+      - ".github/workflows/docs.yml"
+  workflow_dispatch:
+
+permissions:
+  contents: read
+  pages: write
+  id-token: write
+
+concurrency:
+  group: "pages-${{ github.ref }}"
+  cancel-in-progress: true
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          lfs: true
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+
+      - name: Set up Python
+        run: uv python install 3.12
+
+      - name: Install Zensical
+        run: uv tool install zensical
+
+      - name: Build docs
+        run: zensical build
+
+      - name: Setup Pages
+        if: github.event_name != 'pull_request'
+        uses: actions/configure-pages@v5
+
+      - name: Upload artifact
+        if: github.event_name != 'pull_request'
+        uses: actions/upload-pages-artifact@v3
+        with:
+          path: "./site"
+
+  deploy:
+    if: github.event_name != 'pull_request'
+    environment:
+      name: github-pages
+      url: ${{ steps.deployment.outputs.page_url }}
+    runs-on: ubuntu-latest
+    needs: build
+    steps:
+      - name: Deploy to GitHub Pages
+        id: deployment
+        uses: actions/deploy-pages@v4
--- a/.gitignore
+++ b/.gitignore
@@ -44,3 +44,4 @@ compose-farm.yaml
 coverage.xml
 .env
 homepage/
+site/
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -25,12 +25,18 @@ repos:
        args: [--fix]
      - id: ruff-format

-  - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v1.14.0
+  - repo: local
    hooks:
      - id: mypy
-        additional_dependencies:
-          - pydantic>=2.0.0
-          - typer>=0.9.0
-          - asyncssh>=2.14.0
-          - types-PyYAML
+        name: mypy (type checker)
+        entry: uv run mypy src tests
+        language: system
+        types: [python]
+        pass_filenames: false
+
+      - id: ty
+        name: ty (type checker)
+        entry: uv run ty check
+        language: system
+        types: [python]
+        pass_filenames: false
--- a/.prompts/docs-review.md
+++ b/.prompts/docs-review.md
@@ -0,0 +1,94 @@
+Review all documentation in this repository for accuracy, completeness, and consistency. Cross-reference documentation against the actual codebase to identify issues.
+
+## Scope
+
+Review all documentation files:
+- docs/*.md (primary documentation)
+- README.md (repository landing page)
+- CLAUDE.md (development guidelines)
+- examples/README.md (example configurations)
+
+## Review Checklist
+
+### 1. Command Documentation
+
+For each documented command, verify against the CLI source code:
+
+- Command exists in codebase
+- All options are documented with correct names, types, and defaults
+- Short options (-x) match long options (--xxx)
+- Examples would work as written
+- Check for undocumented commands or options
+
+Run `--help` for each command to verify.
+
+### 2. Configuration Documentation
+
+Verify against Pydantic models in the config module:
+
+- All config keys are documented
+- Types match Pydantic field types
+- Required vs optional fields are correct
+- Default values are accurate
+- Config file search order matches code
+- Example YAML is valid and uses current schema
+
+### 3. Architecture Documentation
+
+Verify against actual directory structure:
+
+- File paths match actual source code location
+- All modules listed actually exist
+- No modules are missing from the list
+- Component descriptions match code functionality
+- CLI module list includes all command files
+
+### 4. State and Data Files
+
+Verify against state and path modules:
+
+- State file name and location are correct
+- State file format matches actual structure
+- Log file name and location are correct
+- What triggers state/log updates is accurate
+
+### 5. Installation Documentation
+
+Verify against pyproject.toml:
+
+- Python version requirement matches requires-python
+- Package name is correct
+- Optional dependencies are documented
+- CLI entry points are mentioned
+- Installation methods work as documented
+
+### 6. Feature Claims
+
+For each claimed feature, verify it exists and works as described.
+
+### 7. Cross-Reference Consistency
+
+Check for conflicts between documentation files:
+
+- README vs docs/index.md (should be consistent)
+- CLAUDE.md vs actual code structure
+- Command tables match across files
+- Config examples are consistent
+
+## Output Format
+
+Provide findings in these categories:
+
+1. **Critical Issues**: Incorrect information that would cause user problems
+2. **Inaccuracies**: Technical errors, wrong defaults, incorrect paths
+3. **Missing Documentation**: Features/commands that exist but aren't documented
+4. **Outdated Content**: Information that was once true but no longer is
+5. **Inconsistencies**: Conflicts between different documentation files
+6. **Minor Issues**: Typos, formatting, unclear wording
+7. **Verified Accurate**: Sections confirmed to be correct
+
+For each issue, include:
+- File path and line number (if applicable)
+- What the documentation says
+- What the code actually does
+- Suggested fix
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -9,40 +9,72 @@
 ## Architecture

 ```
-compose_farm/
+src/compose_farm/
 ├── cli/               # CLI subpackage
 │   ├── __init__.py    # Imports modules to trigger command registration
 │   ├── app.py         # Shared Typer app instance, version callback
 │   ├── common.py      # Shared helpers, options, progress bar utilities
-│   ├── config.py      # Config subcommand (init, show, path, validate, edit)
+│   ├── config.py      # Config subcommand (init, show, path, validate, edit, symlink)
 │   ├── lifecycle.py   # up, down, pull, restart, update, apply commands
 │   ├── management.py  # refresh, check, init-network, traefik-file commands
-│   └── monitoring.py  # logs, ps, stats commands
+│   ├── monitoring.py  # logs, ps, stats commands
+│   ├── ssh.py         # SSH key management (setup, status, keygen)
+│   └── web.py         # Web UI server command
 ├── config.py          # Pydantic models, YAML loading
 ├── compose.py         # Compose file parsing (.env, ports, volumes, networks)
 ├── console.py         # Shared Rich console instances
 ├── executor.py        # SSH/local command execution, streaming output
 ├── operations.py      # Business logic (up, migrate, discover, preflight checks)
-├── state.py           # Deployment state tracking (which service on which host)
+├── state.py           # Deployment state tracking (which stack on which host)
 ├── logs.py            # Image digest snapshots (dockerfarm-log.toml)
-└── traefik.py         # Traefik file-provider config generation from labels
+├── paths.py           # Path utilities, config file discovery
+├── ssh_keys.py        # SSH key path constants and utilities
+├── traefik.py         # Traefik file-provider config generation from labels
+└── web/               # Web UI (FastAPI + HTMX)
 ```

 ## Web UI Icons

 Icons use [Lucide](https://lucide.dev/). Add new icons as macros in `web/templates/partials/icons.html` by copying SVG paths from their site. The `action_btn`, `stat_card`, and `collapse` macros in `components.html` accept an optional `icon` parameter.

+## HTMX Patterns
+
+- **Multi-element refresh**: Use custom events, not `hx-swap-oob`. Elements have `hx-trigger="cf:refresh from:body"` and JS calls `document.body.dispatchEvent(new CustomEvent('cf:refresh'))`. Simpler to debug/test.
+- **SPA navigation**: Sidebar uses `hx-boost="true"` to AJAX-ify links.
+- **Attribute inheritance**: Set `hx-target`/`hx-swap` on parent elements.
+
 ## Key Design Decisions

 1. **Hybrid SSH approach**: asyncssh for parallel streaming with prefixes; native `ssh -t` for raw mode (progress bars)
-2. **Parallel by default**: Multiple services run concurrently via `asyncio.gather`
-3. **Streaming output**: Real-time stdout/stderr with `[service]` prefix using Rich
+2. **Parallel by default**: Multiple stacks run concurrently via `asyncio.gather`
+3. **Streaming output**: Real-time stdout/stderr with `[stack]` prefix using Rich
 4. **SSH key auth only**: Uses ssh-agent, no password handling (YAGNI)
 5. **NFS assumption**: Compose files at same path on all hosts
 6. **Local IP auto-detection**: Skips SSH when target host matches local machine's IP
-7. **State tracking**: Tracks where services are deployed for auto-migration
+7. **State tracking**: Tracks where stacks are deployed for auto-migration
 8. **Pre-flight checks**: Verifies NFS mounts and Docker networks exist before starting/migrating

+## Code Style
+
+- **Imports at top level**: Never add imports inside functions unless they are explicitly marked with `# noqa: PLC0415` and a comment explaining it speeds up CLI startup. Heavy modules like `pydantic`, `yaml`, and `rich.table` are lazily imported to keep `cf --help` fast.
+
+## Testing
+
+Run tests with `uv run pytest`. Browser tests require Chromium (system-installed or via `playwright install chromium`):
+
+```bash
+# Unit tests only (skip browser tests, can parallelize)
+uv run pytest -m "not browser" -n auto
+
+# Browser tests only (run sequentially, no coverage)
+uv run pytest -m browser --no-cov
+
+# All tests
+uv run pytest --no-cov
+```
+
+Browser tests are marked with `@pytest.mark.browser`. They use Playwright to test HTMX behavior, JavaScript functionality (sidebar filter, command palette, terminals), and content stability during navigation. Run sequentially (no `-n`) to avoid resource contention.
+
 ## Communication Notes

 - Clarify ambiguous wording (e.g., homophones like "right"/"write", "their"/"there").
@@ -53,23 +85,50 @@ Icons use [Lucide](https://lucide.dev/). Add new icons as macros in `web/templat
 - **NEVER merge anything into main.** Always commit directly or use fast-forward/rebase.
 - Never force push.

+## Pull Requests
+
+- Never include unchecked checklists (e.g., `- [ ] ...`) in PR descriptions. Either omit the checklist or use checked items.
+- **NEVER run `gh pr merge`**. PRs are merged via the GitHub UI, not the CLI.
+
+## Releases
+
+Use `gh release create` to create releases. The tag is created automatically.
+
+```bash
+# Check current version
+git tag --sort=-v:refname | head -1
+
+# Create release (minor version bump: v0.21.1 -> v0.22.0)
+gh release create v0.22.0 --title "v0.22.0" --notes "release notes here"
+```
+
+Versioning:
+- **Patch** (v0.21.0 → v0.21.1): Bug fixes
+- **Minor** (v0.21.1 → v0.22.0): New features, non-breaking changes
+
+Write release notes manually describing what changed. Group by features and bug fixes.
+
 ## Commands Quick Reference

 CLI available as `cf` or `compose-farm`.

 | Command | Description |
 |---------|-------------|
-| `up`    | Start services (`docker compose up -d`), auto-migrates if host changed |
-| `down`  | Stop services (`docker compose down`). Use `--orphaned` to stop services removed from config |
+| `up`    | Start stacks (`docker compose up -d`), auto-migrates if host changed |
+| `down`  | Stop stacks (`docker compose down`). Use `--orphaned` to stop stacks removed from config |
+| `stop`  | Stop services without removing containers (`docker compose stop`) |
 | `pull`  | Pull latest images |
 | `restart` | `down` + `up -d` |
-| `update` | `pull` + `down` + `up -d` |
-| `apply` | Make reality match config: migrate services + stop orphans. Use `--dry-run` to preview |
-| `logs`  | Show service logs |
-| `ps`    | Show status of all services |
-| `stats` | Show overview (hosts, services, pending migrations; `--live` for container counts) |
-| `refresh` | Update state from reality: discover running services, capture image digests |
+| `update` | `pull` + `build` + `down` + `up -d` |
+| `apply` | Make reality match config: migrate stacks + stop orphans. Use `--dry-run` to preview |
+| `compose` | Run any docker compose command on a stack (passthrough) |
+| `logs`  | Show stack logs |
+| `ps`    | Show status of all stacks |
+| `stats` | Show overview (hosts, stacks, pending migrations; `--live` for container counts) |
+| `refresh` | Update state from reality: discover running stacks, capture image digests |
 | `check` | Validate config, traefik labels, mounts, networks; show host compatibility |
 | `init-network` | Create Docker network on hosts with consistent subnet/gateway |
 | `traefik-file` | Generate Traefik file-provider config from compose labels |
-| `config` | Manage config files (init, show, path, validate, edit) |
+| `config` | Manage config files (init, show, path, validate, edit, symlink) |
+| `ssh`   | Manage SSH keys (setup, status, keygen) |
+| `web`   | Start web UI server |
--- a/22
+++ b/22
@@ -1,16 +1,20 @@
 # syntax=docker/dockerfile:1
-FROM ghcr.io/astral-sh/uv:python3.14-alpine

-# Install SSH client (required for remote host connections)
+# Build stage - install with uv
+FROM ghcr.io/astral-sh/uv:python3.14-alpine AS builder
+
+ARG VERSION
+RUN uv tool install --compile-bytecode "compose-farm[web]${VERSION:+==$VERSION}"
+
+# Runtime stage - minimal image without uv
+FROM python:3.14-alpine
+
+# Install only runtime requirements
 RUN apk add --no-cache openssh-client

-# Install compose-farm from PyPI
-ARG VERSION
-RUN uv tool install "compose-farm[web]${VERSION:+==$VERSION}"
+# Copy installed tool virtualenv and bin symlinks from builder
+COPY --from=builder /root/.local/share/uv/tools/compose-farm /root/.local/share/uv/tools/compose-farm
+COPY --from=builder /usr/local/bin/cf /usr/local/bin/compose-farm /usr/local/bin/

-# Add uv tool bin to PATH
-ENV PATH="/root/.local/bin:$PATH"
-
-# Default entrypoint
 ENTRYPOINT ["cf"]
 CMD ["--help"]
--- a/README.md
+++ b/README.md
@@ -10,7 +10,19 @@
 A minimal CLI tool to run Docker Compose commands across multiple hosts via SSH.

 > [!NOTE]
-> Run `docker compose` commands across multiple hosts via SSH. One YAML maps services to hosts. Run `cf apply` and reality matches your config—services start, migrate, or stop as needed. No Kubernetes, no Swarm, no magic.
+> Run `docker compose` commands across multiple hosts via SSH. One YAML maps stacks to hosts. Run `cf apply` and reality matches your config—stacks start, migrate, or stop as needed. No Kubernetes, no Swarm, no magic.
+
+## Quick Demo
+
+**CLI:**
+
+![CLI Demo](docs/assets/quickstart.gif)
+
+**Web UI:**
+
+![Web UI Demo](docs/assets/web-workflow.gif)
+
+## Table of Contents

 <!-- START doctoc generated TOC please keep comment here to allow auto update -->
 <!-- DON'T EDIT THIS SECTION, INSTEAD RE-RUN doctoc TO UPDATE -->
@@ -19,12 +31,17 @@ A minimal CLI tool to run Docker Compose commands across multiple hosts via SSH.
 - [How It Works](#how-it-works)
 - [Requirements](#requirements)
 - [Limitations & Best Practices](#limitations--best-practices)
-  - [What breaks when you move a service](#what-breaks-when-you-move-a-service)
+  - [What breaks when you move a stack](#what-breaks-when-you-move-a-stack)
  - [Best practices](#best-practices)
  - [What Compose Farm doesn't do](#what-compose-farm-doesnt-do)
 - [Installation](#installation)
+- [SSH Authentication](#ssh-authentication)
+  - [SSH Agent (default)](#ssh-agent-default)
+  - [Dedicated SSH Key (recommended for Docker/Web UI)](#dedicated-ssh-key-recommended-for-dockerweb-ui)
 - [Configuration](#configuration)
-  - [Multi-Host Services](#multi-host-services)
+  - [Single-host example](#single-host-example)
+  - [Multi-host example](#multi-host-example)
+  - [Multi-Host Stacks](#multi-host-stacks)
  - [Config Command](#config-command)
 - [Usage](#usage)
  - [CLI `--help` Output](#cli---help-output)
@@ -37,14 +54,14 @@ A minimal CLI tool to run Docker Compose commands across multiple hosts via SSH.

 ## Why Compose Farm?

-I used to run 100+ Docker Compose stacks on a single machine that kept running out of memory. I needed a way to distribute services across multiple machines without the complexity of:
+I used to run 100+ Docker Compose stacks on a single machine that kept running out of memory. I needed a way to distribute stacks across multiple machines without the complexity of:

 - **Kubernetes**: Overkill for my use case. I don't need pods, services, ingress controllers, or YAML manifests 10x the size of my compose files.
 - **Docker Swarm**: Effectively in maintenance mode—no longer being invested in by Docker.

 Both require changes to your compose files. **Compose Farm requires zero changes**—your existing `docker-compose.yml` files work as-is.

-I also wanted a declarative setup—one config file that defines where everything runs. Change the config, run `cf apply`, and everything reconciles—services start, migrate, or stop as needed. See [Comparison with Alternatives](#comparison-with-alternatives) for how this compares to other approaches.
+I also wanted a declarative setup—one config file that defines where everything runs. Change the config, run `cf apply`, and everything reconciles—stacks start, migrate, or stop as needed. See [Comparison with Alternatives](#comparison-with-alternatives) for how this compares to other approaches.

 <p align="center">
 <a href="https://xkcd.com/927/">
@@ -56,8 +73,8 @@ Before you say it—no, this is not a new standard. I changed nothing about my e

 Compose Farm just automates what you'd do by hand:
 - Runs `docker compose` commands over SSH
- Tracks which service runs on which host
- **One command (`cf apply`) to reconcile everything**—start missing services, migrate moved ones, stop removed ones
+- Tracks which stack runs on which host
+- **One command (`cf apply`) to reconcile everything**—start missing stacks, migrate moved ones, stop removed ones
 - Generates Traefik file-provider config for cross-host routing

 **It's a convenience wrapper, not a new paradigm.**
@@ -67,13 +84,13 @@ Compose Farm just automates what you'd do by hand:
 **The declarative way** — run `cf apply` and reality matches your config:

 1. Compose Farm compares your config to what's actually running
-2. Services in config but not running? **Starts them**
-3. Services on the wrong host? **Migrates them** (stops on old host, starts on new)
-4. Services running but removed from config? **Stops them**
+2. Stacks in config but not running? **Starts them**
+3. Stacks on the wrong host? **Migrates them** (stops on old host, starts on new)
+4. Stacks running but removed from config? **Stops them**

-**Under the hood** — each service operation is just SSH + docker compose:
+**Under the hood** — each stack operation is just SSH + docker compose:

-1. Look up which host runs the service (e.g., `plex` → `server-1`)
+1. Look up which host runs the stack (e.g., `plex` → `server-1`)
 2. SSH to `server-1` (or run locally if `localhost`)
 3. Execute `docker compose -f /opt/compose/plex/docker-compose.yml up -d`
 4. Stream output back with `[plex]` prefix
@@ -101,13 +118,13 @@ nas:/volume1/compose  →  /opt/compose (on server-2)
 nas:/volume1/compose  →  /opt/compose (on server-3)
 ```

-Compose Farm simply runs `docker compose -f /opt/compose/{service}/docker-compose.yml` on the appropriate host—it doesn't copy or sync files.
+Compose Farm simply runs `docker compose -f /opt/compose/{stack}/docker-compose.yml` on the appropriate host—it doesn't copy or sync files.

 ## Limitations & Best Practices

 Compose Farm moves containers between hosts but **does not provide cross-host networking**. Docker's internal DNS and networks don't span hosts.

-### What breaks when you move a service
+### What breaks when you move a stack

 - **Docker DNS** - `http://redis:6379` won't resolve from another host
 - **Docker networks** - Containers can't reach each other via network names
@@ -117,7 +134,7 @@ Compose Farm moves containers between hosts but **does not provide cross-host ne

 1. **Keep dependent services together** - If an app needs a database, redis, or worker, keep them in the same compose file on the same host

-2. **Only migrate standalone services** - Services that don't talk to other containers (or only talk to external APIs) are safe to move
+2. **Only migrate standalone stacks** - Stacks whose services don't talk to other containers (or only talk to external APIs) are safe to move

 3. **Expose ports for cross-host communication** - If services must communicate across hosts, publish ports and use IP addresses instead of container names:
   ```yaml
@@ -137,8 +154,11 @@ If you need containers on different hosts to communicate seamlessly, you need Do
 ## Installation

 ```bash
+# One-liner (installs uv if needed)
+curl -fsSL https://compose-farm.nijho.lt/install | sh
+
+# Or if you already have uv/pip
 uv tool install compose-farm
-# or
 pip install compose-farm
 ```

@@ -159,10 +179,83 @@ docker run --rm \

 </details>

+## SSH Authentication
+
+Compose Farm uses SSH to run commands on remote hosts. There are two authentication methods:
+
+### SSH Agent (default)
+
+Works out of the box if you have an SSH agent running with your keys loaded:
+
+```bash
+# Verify your agent has keys
+ssh-add -l
+
+# Run compose-farm commands
+cf up --all
+```
+
+### Dedicated SSH Key (recommended for Docker/Web UI)
+
+When running compose-farm in Docker, the SSH agent connection can be lost (e.g., after container restart). The `cf ssh` command sets up a dedicated key that persists:
+
+```bash
+# Generate key and copy to all configured hosts
+cf ssh setup
+
+# Check status
+cf ssh status
+```
+
+This creates `~/.ssh/compose-farm/id_ed25519` (ED25519, no passphrase) and copies the public key to each host's `authorized_keys`. Compose Farm tries the SSH agent first, then falls back to this key.
+
+<details><summary>🐳 Docker volume options for SSH keys</summary>
+
+When running in Docker, mount a volume to persist the SSH keys. Choose ONE option and use it for both `cf` and `web` Compose services:
+
+**Option 1: Host path (default)** - keys at `~/.ssh/compose-farm/id_ed25519`
+```yaml
+volumes:
+  - ~/.ssh/compose-farm:/root/.ssh
+```
+
+**Option 2: Named volume** - managed by Docker
+```yaml
+volumes:
+  - cf-ssh:/root/.ssh
+```
+
+Run setup once after starting the container (while the SSH agent still works):
+
+```bash
+docker compose exec web cf ssh setup
+```
+
+The keys will persist across restarts.
+
+</details>
+
 ## Configuration

-Create `~/.config/compose-farm/compose-farm.yaml` (or `./compose-farm.yaml` in your working directory):
+Create `compose-farm.yaml` in the directory where you'll run commands (e.g., `/opt/stacks`). This keeps config near your stacks. Alternatively, use `~/.config/compose-farm/compose-farm.yaml` for a global config, or symlink from one to the other with `cf config symlink`.

+### Single-host example
+
+No SSH, shared storage, or Traefik file-provider required.
+
+```yaml
+compose_dir: /opt/stacks
+
+hosts:
+  local: localhost  # Run locally without SSH
+
+stacks:
+  plex: local
+  jellyfin: local
+  traefik: local
+```
+
+### Multi-host example
 ```yaml
 compose_dir: /opt/compose  # Must be the same path on all hosts

@@ -173,24 +266,24 @@ hosts:
  server-2:
    address: 192.168.1.11
    # user defaults to current user
-  local: localhost  # Run locally without SSH

-services:
+stacks:
  plex: server-1
  jellyfin: server-2
  sonarr: server-1
-  radarr: local  # Runs on the machine where you invoke compose-farm

-  # Multi-host services (run on multiple/all hosts)
+  # Multi-host stacks (run on multiple/all hosts)
  autokuma: all              # Runs on ALL configured hosts
  dozzle: [server-1, server-2]  # Explicit list of hosts
 ```

-Compose files are expected at `{compose_dir}/{service}/compose.yaml` (also supports `compose.yml`, `docker-compose.yml`, `docker-compose.yaml`).
+For cross-host HTTP routing, add Traefik labels to your compose files and set `traefik_file` so Compose Farm can generate the file-provider config.

-### Multi-Host Services
+Each entry in `stacks:` maps to a folder under `compose_dir` that contains a compose file. Compose files are expected at `{compose_dir}/{stack}/compose.yaml` (also supports `compose.yml`, `docker-compose.yml`, `docker-compose.yaml`).

-Some services need to run on every host. This is typically required for tools that access **host-local resources** like the Docker socket (`/var/run/docker.sock`), which cannot be accessed remotely without security risks.
+### Multi-Host Stacks
+
+Some stacks need to run on every host. This is typically required for tools that access **host-local resources** like the Docker socket (`/var/run/docker.sock`), which cannot be accessed remotely without security risks.

 Common use cases:
 - **AutoKuma** - auto-creates Uptime Kuma monitors from container labels (needs local Docker socket)
@@ -203,7 +296,7 @@ This is the same pattern as Docker Swarm's `deploy.mode: global`.
 Use the `all` keyword or an explicit list:

 ```yaml
-services:
+stacks:
  # Run on all configured hosts
  autokuma: all
  dozzle: all
@@ -212,9 +305,9 @@ services:
  node-exporter: [server-1, server-2, server-3]
 ```

-When you run `cf up autokuma`, it starts the service on all hosts in parallel. Multi-host services:
+When you run `cf up autokuma`, it starts the stack on all hosts in parallel. Multi-host stacks:
 - Are excluded from migration logic (they always run everywhere)
- Show output with `[service@host]` prefix for each host
+- Show output with `[stack@host]` prefix for each host
 - Track all running hosts in state

 ### Config Command
@@ -238,20 +331,21 @@ The CLI is available as both `compose-farm` and the shorter `cf` alias.
 | Command | Description |
 |---------|-------------|
 | **`cf apply`** | **Make reality match config (start + migrate + stop orphans)** |
-| `cf up <svc>` | Start service (auto-migrates if host changed) |
-| `cf down <svc>` | Stop service |
-| `cf restart <svc>` | down + up |
-| `cf update <svc>` | pull + down + up |
-| `cf pull <svc>` | Pull latest images |
-| `cf logs -f <svc>` | Follow logs |
-| `cf ps` | Show status of all services |
-| `cf refresh` | Update state from running services |
+| `cf up <stack>` | Start stack (auto-migrates if host changed) |
+| `cf down <stack>` | Stop and remove stack containers |
+| `cf stop <stack>` | Stop stack without removing containers |
+| `cf restart <stack>` | down + up |
+| `cf update <stack>` | pull + build + down + up |
+| `cf pull <stack>` | Pull latest images |
+| `cf logs -f <stack>` | Follow logs |
+| `cf ps` | Show status of all stacks |
+| `cf refresh` | Update state from running stacks |
 | `cf check` | Validate config, mounts, networks |
 | `cf init-network` | Create Docker network on hosts |
 | `cf traefik-file` | Generate Traefik file-provider config |
-| `cf config <cmd>` | Manage config files (init, show, path, validate, edit) |
+| `cf config <cmd>` | Manage config files (init, show, path, validate, edit, symlink) |

-All commands support `--all` to operate on all services.
+All commands support `--all` to operate on all stacks.

 Each command replaces: look up host → SSH → find compose file → run `ssh host "cd /opt/compose/plex && docker compose up -d"`.

@@ -259,14 +353,14 @@ Each command replaces: look up host → SSH → find compose file → run `ssh h
 # The main command: make reality match your config
 cf apply               # start missing + migrate + stop orphans
 cf apply --dry-run     # preview what would change
-cf apply --no-orphans  # skip stopping orphaned services
-cf apply --full        # also refresh all services (picks up config changes)
+cf apply --no-orphans  # skip stopping orphaned stacks
+cf apply --full        # also refresh all stacks (picks up config changes)

-# Or operate on individual services
-cf up plex jellyfin    # start services (auto-migrates if host changed)
+# Or operate on individual stacks
+cf up plex jellyfin    # start stacks (auto-migrates if host changed)
 cf up --all
-cf down plex           # stop services
-cf down --orphaned     # stop services removed from config
+cf down plex           # stop stacks
+cf down --orphaned     # stop stacks removed from config

 # Pull latest images
 cf pull --all
@@ -274,19 +368,19 @@ cf pull --all
 # Restart (down + up)
 cf restart plex

-# Update (pull + down + up) - the end-to-end update command
+# Update (pull + build + down + up) - the end-to-end update command
 cf update --all

-# Update state from reality (discovers running services + captures digests)
-cf refresh             # updates state.yaml and dockerfarm-log.toml
+# Update state from reality (discovers running stacks + captures digests)
+cf refresh             # updates compose-farm-state.yaml and dockerfarm-log.toml
 cf refresh --dry-run   # preview without writing

 # Validate config, traefik labels, mounts, and networks
 cf check                 # full validation (includes SSH checks)
 cf check --local         # fast validation (skip SSH)
-cf check jellyfin        # check service + show which hosts can run it
+cf check jellyfin        # check stack + show which hosts can run it

-# Create Docker network on new hosts (before migrating services)
+# Create Docker network on new hosts (before migrating stacks)
 cf init-network nuc hp   # create mynetwork on specific hosts
 cf init-network          # create on all hosts

@@ -329,26 +423,33 @@ Full `--help` output for each command. See the [Usage](#usage) table above for a
 │ --help                -h        Show this message and exit.                  │
 ╰──────────────────────────────────────────────────────────────────────────────╯
 ╭─ Lifecycle ──────────────────────────────────────────────────────────────────╮
-│ up             Start services (docker compose up -d). Auto-migrates if host  │
+│ up             Start stacks (docker compose up -d). Auto-migrates if host    │
 │                changed.                                                      │
-│ down           Stop services (docker compose down).                          │
+│ down           Stop stacks (docker compose down).                            │
+│ stop           Stop services without removing containers (docker compose     │
+│                stop).                                                        │
 │ pull           Pull latest images (docker compose pull).                     │
-│ restart        Restart services (down + up).                                 │
-│ update         Update services (pull + build + down + up).                   │
+│ restart        Restart stacks (down + up). With --service, restarts just     │
+│                that service.                                                 │
+│ update         Update stacks (pull + build + down + up). With --service,     │
+│                updates just that service.                                    │
 │ apply          Make reality match config (start, migrate, stop as needed).   │
+│ compose        Run any docker compose command on a stack.                    │
 ╰──────────────────────────────────────────────────────────────────────────────╯
 ╭─ Configuration ──────────────────────────────────────────────────────────────╮
 │ traefik-file   Generate a Traefik file-provider fragment from compose        │
 │                Traefik labels.                                               │
-│ refresh        Update local state from running services.                     │
+│ refresh        Update local state from running stacks.                       │
 │ check          Validate configuration, traefik labels, mounts, and networks. │
 │ init-network   Create Docker network on hosts with consistent settings.      │
 │ config         Manage compose-farm configuration files.                      │
+│ ssh            Manage SSH keys for passwordless authentication.              │
 ╰──────────────────────────────────────────────────────────────────────────────╯
 ╭─ Monitoring ─────────────────────────────────────────────────────────────────╮
-│ logs           Show service logs.                                            │
-│ ps             Show status of all services.                                  │
-│ stats          Show overview statistics for hosts and services.              │
+│ logs           Show stack logs. With --service, shows logs for just that     │
+│                service.                                                      │
+│ ps             Show status of stacks.                                        │
+│ stats          Show overview statistics for hosts and stacks.                │
 ╰──────────────────────────────────────────────────────────────────────────────╯
 ╭─ Server ─────────────────────────────────────────────────────────────────────╮
 │ web            Start the web UI server.                                      │
@@ -377,18 +478,19 @@ Full `--help` output for each command. See the [Usage](#usage) table above for a
 <!-- ⚠️ This content is auto-generated by `markdown-code-runner`. -->
 ```yaml

- Usage: cf up [OPTIONS] [SERVICES]...
+ Usage: cf up [OPTIONS] [STACKS]...

- Start services (docker compose up -d). Auto-migrates if host changed.
+ Start stacks (docker compose up -d). Auto-migrates if host changed.

 ╭─ Arguments ──────────────────────────────────────────────────────────────────╮
-│   services      [SERVICES]...  Services to operate on                        │
+│   stacks      [STACKS]...  Stacks to operate on                              │
 ╰──────────────────────────────────────────────────────────────────────────────╯
 ╭─ Options ────────────────────────────────────────────────────────────────────╮
-│ --all     -a            Run on all services                                  │
-│ --host    -H      TEXT  Filter to services on this host                      │
-│ --config  -c      PATH  Path to config file                                  │
-│ --help    -h            Show this message and exit.                          │
+│ --all      -a            Run on all stacks                                   │
+│ --host     -H      TEXT  Filter to stacks on this host                       │
+│ --service  -s      TEXT  Target a specific service within the stack          │
+│ --config   -c      PATH  Path to config file                                 │
+│ --help     -h            Show this message and exit.                         │
 ╰──────────────────────────────────────────────────────────────────────────────╯

 ```
@@ -412,18 +514,18 @@ Full `--help` output for each command. See the [Usage](#usage) table above for a
 <!-- ⚠️ This content is auto-generated by `markdown-code-runner`. -->
 ```yaml

- Usage: cf down [OPTIONS] [SERVICES]...
+ Usage: cf down [OPTIONS] [STACKS]...

- Stop services (docker compose down).
+ Stop stacks (docker compose down).

 ╭─ Arguments ──────────────────────────────────────────────────────────────────╮
-│   services      [SERVICES]...  Services to operate on                        │
+│   stacks      [STACKS]...  Stacks to operate on                              │
 ╰──────────────────────────────────────────────────────────────────────────────╯
 ╭─ Options ────────────────────────────────────────────────────────────────────╮
-│ --all       -a            Run on all services                                │
-│ --orphaned                Stop orphaned services (in state but removed from  │
+│ --all       -a            Run on all stacks                                  │
+│ --orphaned                Stop orphaned stacks (in state but removed from    │
 │                           config)                                            │
-│ --host      -H      TEXT  Filter to services on this host                    │
+│ --host      -H      TEXT  Filter to stacks on this host                      │
 │ --config    -c      PATH  Path to config file                                │
 │ --help      -h            Show this message and exit.                        │
 ╰──────────────────────────────────────────────────────────────────────────────╯
@@ -434,6 +536,41 @@ Full `--help` output for each command. See the [Usage](#usage) table above for a

 </details>

+<details>
+<summary>See the output of <code>cf stop --help</code></summary>
+
+<!-- CODE:BASH:START -->
+<!-- echo '```yaml' -->
+<!-- export NO_COLOR=1 -->
+<!-- export TERM=dumb -->
+<!-- export TERMINAL_WIDTH=90 -->
+<!-- cf stop --help -->
+<!-- echo '```' -->
+<!-- CODE:END -->
+<!-- OUTPUT:START -->
+<!-- ⚠️ This content is auto-generated by `markdown-code-runner`. -->
+```yaml
+
+ Usage: cf stop [OPTIONS] [STACKS]...
+
+ Stop services without removing containers (docker compose stop).
+
+╭─ Arguments ──────────────────────────────────────────────────────────────────╮
+│   stacks      [STACKS]...  Stacks to operate on                              │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Options ────────────────────────────────────────────────────────────────────╮
+│ --all      -a            Run on all stacks                                   │
+│ --service  -s      TEXT  Target a specific service within the stack          │
+│ --config   -c      PATH  Path to config file                                 │
+│ --help     -h            Show this message and exit.                         │
+╰──────────────────────────────────────────────────────────────────────────────╯
+
+```
+
+<!-- OUTPUT:END -->
+
+</details>
+
 <details>
 <summary>See the output of <code>cf pull --help</code></summary>

@@ -449,17 +586,18 @@ Full `--help` output for each command. See the [Usage](#usage) table above for a
 <!-- ⚠️ This content is auto-generated by `markdown-code-runner`. -->
 ```yaml

- Usage: cf pull [OPTIONS] [SERVICES]...
+ Usage: cf pull [OPTIONS] [STACKS]...

 Pull latest images (docker compose pull).

 ╭─ Arguments ──────────────────────────────────────────────────────────────────╮
-│   services      [SERVICES]...  Services to operate on                        │
+│   stacks      [STACKS]...  Stacks to operate on                              │
 ╰──────────────────────────────────────────────────────────────────────────────╯
 ╭─ Options ────────────────────────────────────────────────────────────────────╮
-│ --all     -a            Run on all services                                  │
-│ --config  -c      PATH  Path to config file                                  │
-│ --help    -h            Show this message and exit.                          │
+│ --all      -a            Run on all stacks                                   │
+│ --service  -s      TEXT  Target a specific service within the stack          │
+│ --config   -c      PATH  Path to config file                                 │
+│ --help     -h            Show this message and exit.                         │
 ╰──────────────────────────────────────────────────────────────────────────────╯

 ```
@@ -483,17 +621,18 @@ Full `--help` output for each command. See the [Usage](#usage) table above for a
 <!-- ⚠️ This content is auto-generated by `markdown-code-runner`. -->
 ```yaml

- Usage: cf restart [OPTIONS] [SERVICES]...
+ Usage: cf restart [OPTIONS] [STACKS]...

- Restart services (down + up).
+ Restart stacks (down + up). With --service, restarts just that service.

 ╭─ Arguments ──────────────────────────────────────────────────────────────────╮
-│   services      [SERVICES]...  Services to operate on                        │
+│   stacks      [STACKS]...  Stacks to operate on                              │
 ╰──────────────────────────────────────────────────────────────────────────────╯
 ╭─ Options ────────────────────────────────────────────────────────────────────╮
-│ --all     -a            Run on all services                                  │
-│ --config  -c      PATH  Path to config file                                  │
-│ --help    -h            Show this message and exit.                          │
+│ --all      -a            Run on all stacks                                   │
+│ --service  -s      TEXT  Target a specific service within the stack          │
+│ --config   -c      PATH  Path to config file                                 │
+│ --help     -h            Show this message and exit.                         │
 ╰──────────────────────────────────────────────────────────────────────────────╯

 ```
@@ -517,17 +656,19 @@ Full `--help` output for each command. See the [Usage](#usage) table above for a
 <!-- ⚠️ This content is auto-generated by `markdown-code-runner`. -->
 ```yaml

- Usage: cf update [OPTIONS] [SERVICES]...
+ Usage: cf update [OPTIONS] [STACKS]...

- Update services (pull + build + down + up).
+ Update stacks (pull + build + down + up). With --service, updates just that
+ service.

 ╭─ Arguments ──────────────────────────────────────────────────────────────────╮
-│   services      [SERVICES]...  Services to operate on                        │
+│   stacks      [STACKS]...  Stacks to operate on                              │
 ╰──────────────────────────────────────────────────────────────────────────────╯
 ╭─ Options ────────────────────────────────────────────────────────────────────╮
-│ --all     -a            Run on all services                                  │
-│ --config  -c      PATH  Path to config file                                  │
-│ --help    -h            Show this message and exit.                          │
+│ --all      -a            Run on all stacks                                   │
+│ --service  -s      TEXT  Target a specific service within the stack          │
+│ --config   -c      PATH  Path to config file                                 │
+│ --help     -h            Show this message and exit.                         │
 ╰──────────────────────────────────────────────────────────────────────────────╯

 ```
@@ -555,19 +696,21 @@ Full `--help` output for each command. See the [Usage](#usage) table above for a

 Make reality match config (start, migrate, stop as needed).

- This is the "reconcile" command that ensures running services match your
+ This is the "reconcile" command that ensures running stacks match your
 config file. It will:
- 1. Stop orphaned services (in state but removed from config) 2. Migrate
- services on wrong host (host in state ≠ host in config) 3. Start missing
- services (in config but not in state)
- Use --dry-run to preview changes before applying. Use --no-orphans to only
- migrate/start without stopping orphaned services. Use --full to also run 'up'
- on all services (picks up compose/env changes).
+
+ 1. Stop orphaned stacks (in state but removed from config)
+ 2. Migrate stacks on wrong host (host in state ≠ host in config)
+ 3. Start missing stacks (in config but not in state)
+
+ Use --dry-run to preview changes before applying.
+ Use --no-orphans to only migrate/start without stopping orphaned stacks.
+ Use --full to also run 'up' on all stacks (picks up compose/env changes).

 ╭─ Options ────────────────────────────────────────────────────────────────────╮
 │ --dry-run     -n            Show what would change without executing         │
-│ --no-orphans                Only migrate, don't stop orphaned services       │
-│ --full        -f            Also run up on all services to apply config      │
+│ --no-orphans                Only migrate, don't stop orphaned stacks         │
+│ --full        -f            Also run up on all stacks to apply config        │
 │                             changes                                          │
 │ --config      -c      PATH  Path to config file                              │
 │ --help        -h            Show this message and exit.                      │
@@ -579,6 +722,53 @@ Full `--help` output for each command. See the [Usage](#usage) table above for a

 </details>

+<details>
+<summary>See the output of <code>cf compose --help</code></summary>
+
+<!-- CODE:BASH:START -->
+<!-- echo '```yaml' -->
+<!-- export NO_COLOR=1 -->
+<!-- export TERM=dumb -->
+<!-- export TERMINAL_WIDTH=90 -->
+<!-- cf compose --help -->
+<!-- echo '```' -->
+<!-- CODE:END -->
+<!-- OUTPUT:START -->
+<!-- ⚠️ This content is auto-generated by `markdown-code-runner`. -->
+```yaml
+
+ Usage: cf compose [OPTIONS] STACK COMMAND [ARGS]...
+
+ Run any docker compose command on a stack.
+
+ Passthrough to docker compose for commands not wrapped by cf.
+ Options after COMMAND are passed to docker compose, not cf.
+
+ Examples:
+   cf compose mystack --help        - show docker compose help
+   cf compose mystack top           - view running processes
+   cf compose mystack images        - list images
+   cf compose mystack exec web bash - interactive shell
+   cf compose mystack config        - view parsed config
+
+╭─ Arguments ──────────────────────────────────────────────────────────────────╮
+│ *    stack        TEXT       Stack to operate on (use '.' for current dir)   │
+│                              [required]                                      │
+│ *    command      TEXT       Docker compose command [required]               │
+│      args         [ARGS]...  Additional arguments                            │
+╰──────────────────────────────────────────────────────────────────────────────╯
+╭─ Options ────────────────────────────────────────────────────────────────────╮
+│ --host    -H      TEXT  Filter to stacks on this host                        │
+│ --config  -c      PATH  Path to config file                                  │
+│ --help    -h            Show this message and exit.                          │
+╰──────────────────────────────────────────────────────────────────────────────╯
+
+```
+
+<!-- OUTPUT:END -->
+
+</details>
+
 **Configuration**

 <details>
@@ -596,15 +786,15 @@ Full `--help` output for each command. See the [Usage](#usage) table above for a
 <!-- ⚠️ This content is auto-generated by `markdown-code-runner`. -->
 ```yaml

- Usage: cf traefik-file [OPTIONS] [SERVICES]...
+ Usage: cf traefik-file [OPTIONS] [STACKS]...

 Generate a Traefik file-provider fragment from compose Traefik labels.

 ╭─ Arguments ──────────────────────────────────────────────────────────────────╮
-│   services      [SERVICES]...  Services to operate on                        │
+│   stacks      [STACKS]...  Stacks to operate on                              │
 ╰──────────────────────────────────────────────────────────────────────────────╯
 ╭─ Options ────────────────────────────────────────────────────────────────────╮
-│ --all     -a            Run on all services                                  │
+│ --all     -a            Run on all stacks                                    │
 │ --output  -o      PATH  Write Traefik file-provider YAML to this path        │
 │                         (stdout if omitted)                                  │
 │ --config  -c      PATH  Path to config file                                  │
@@ -632,16 +822,24 @@ Full `--help` output for each command. See the [Usage](#usage) table above for a
 <!-- ⚠️ This content is auto-generated by `markdown-code-runner`. -->
 ```yaml

- Usage: cf refresh [OPTIONS]
+ Usage: cf refresh [OPTIONS] [STACKS]...

- Update local state from running services.
+ Update local state from running stacks.
+
+ Discovers which stacks are running on which hosts, updates the state
+ file, and captures image digests. This is a read operation - it updates
+ your local state to match reality, not the other way around.
+
+ Without arguments: refreshes all stacks (same as --all).
+ With stack names: refreshes only those stacks.

- Discovers which services are running on which hosts, updates the state file,
- and captures image digests. This is a read operation - it updates your local
- state to match reality, not the other way around.
 Use 'cf apply' to make reality match your config (stop orphans, migrate).

+╭─ Arguments ──────────────────────────────────────────────────────────────────╮
+│   stacks      [STACKS]...  Stacks to operate on                              │
+╰──────────────────────────────────────────────────────────────────────────────╯
 ╭─ Options ────────────────────────────────────────────────────────────────────╮
+│ --all       -a            Run on all stacks                                  │
 │ --config    -c      PATH  Path to config file                                │
 │ --log-path  -l      PATH  Path to Dockerfarm TOML log                        │
 │ --dry-run   -n            Show what would change without writing             │
@@ -670,16 +868,17 @@ Full `--help` output for each command. See the [Usage](#usage) table above for a
 <!-- ⚠️ This content is auto-generated by `markdown-code-runner`. -->
 ```yaml

- Usage: cf check [OPTIONS] [SERVICES]...
+ Usage: cf check [OPTIONS] [STACKS]...

 Validate configuration, traefik labels, mounts, and networks.

- Without arguments: validates all services against configured hosts. With
- service arguments: validates specific services and shows host compatibility.
+ Without arguments: validates all stacks against configured hosts.
+ With stack arguments: validates specific stacks and shows host compatibility.
+
 Use --local to skip SSH-based checks for faster validation.

 ╭─ Arguments ──────────────────────────────────────────────────────────────────╮
-│   services      [SERVICES]...  Services to operate on                        │
+│   stacks      [STACKS]...  Stacks to operate on                              │
 ╰──────────────────────────────────────────────────────────────────────────────╯
 ╭─ Options ────────────────────────────────────────────────────────────────────╮
 │ --local                 Skip SSH-based checks (faster)                       │
@@ -713,9 +912,9 @@ Full `--help` output for each command. See the [Usage](#usage) table above for a

 Create Docker network on hosts with consistent settings.

- Creates an external Docker network that services can use for cross-host
- communication. Uses the same subnet/gateway on all hosts to ensure consistent
- networking.
+ Creates an external Docker network that stacks can use for cross-host
+ communication. Uses the same subnet/gateway on all hosts to ensure
+ consistent networking.

 ╭─ Arguments ──────────────────────────────────────────────────────────────────╮
 │   hosts      [HOSTS]...  Hosts to create network on (default: all)           │
@@ -773,6 +972,21 @@ Full `--help` output for each command. See the [Usage](#usage) table above for a

 </details>

+
+<details>
+<summary>See the output of <code>cf ssh --help</code></summary>
+
+<!-- CODE:BASH:START -->
+<!-- echo '```yaml' -->
+<!-- export NO_COLOR=1 -->
+<!-- export TERM=dumb -->
+<!-- export TERMINAL_WIDTH=90 -->
+<!-- cf ssh --help -->
+<!-- echo '```' -->
+<!-- CODE:END -->
+
+</details>
+
 **Monitoring**

 <details>
@@ -790,21 +1004,22 @@ Full `--help` output for each command. See the [Usage](#usage) table above for a
 <!-- ⚠️ This content is auto-generated by `markdown-code-runner`. -->
 ```yaml

- Usage: cf logs [OPTIONS] [SERVICES]...
+ Usage: cf logs [OPTIONS] [STACKS]...

- Show service logs.
+ Show stack logs. With --service, shows logs for just that service.

 ╭─ Arguments ──────────────────────────────────────────────────────────────────╮
-│   services      [SERVICES]...  Services to operate on                        │
+│   stacks      [STACKS]...  Stacks to operate on                              │
 ╰──────────────────────────────────────────────────────────────────────────────╯
 ╭─ Options ────────────────────────────────────────────────────────────────────╮
-│ --all     -a               Run on all services                               │
-│ --host    -H      TEXT     Filter to services on this host                   │
-│ --follow  -f               Follow logs                                       │
-│ --tail    -n      INTEGER  Number of lines (default: 20 for --all, 100       │
-│                            otherwise)                                        │
-│ --config  -c      PATH     Path to config file                               │
-│ --help    -h               Show this message and exit.                       │
+│ --all      -a               Run on all stacks                                │
+│ --host     -H      TEXT     Filter to stacks on this host                    │
+│ --service  -s      TEXT     Target a specific service within the stack       │
+│ --follow   -f               Follow logs                                      │
+│ --tail     -n      INTEGER  Number of lines (default: 20 for --all, 100      │
+│                             otherwise)                                       │
+│ --config   -c      PATH     Path to config file                              │
+│ --help     -h               Show this message and exit.                      │
 ╰──────────────────────────────────────────────────────────────────────────────╯

 ```
@@ -829,13 +1044,24 @@ Full `--help` output for each command. See the [Usage](#usage) table above for a
 <!-- ⚠️ This content is auto-generated by `markdown-code-runner`. -->
 ```yaml

- Usage: cf ps [OPTIONS]
+ Usage: cf ps [OPTIONS] [STACKS]...

- Show status of all services.
+ Show status of stacks.

+ Without arguments: shows all stacks (same as --all).
+ With stack names: shows only those stacks.
+ With --host: shows stacks on that host.
+ With --service: filters to a specific service within the stack.
+
+╭─ Arguments ──────────────────────────────────────────────────────────────────╮
+│   stacks      [STACKS]...  Stacks to operate on                              │
+╰──────────────────────────────────────────────────────────────────────────────╯
 ╭─ Options ────────────────────────────────────────────────────────────────────╮
-│ --config  -c      PATH  Path to config file                                  │
-│ --help    -h            Show this message and exit.                          │
+│ --all      -a            Run on all stacks                                   │
+│ --host     -H      TEXT  Filter to stacks on this host                       │
+│ --service  -s      TEXT  Target a specific service within the stack          │
+│ --config   -c      PATH  Path to config file                                 │
+│ --help     -h            Show this message and exit.                         │
 ╰──────────────────────────────────────────────────────────────────────────────╯

 ```
@@ -862,9 +1088,9 @@ Full `--help` output for each command. See the [Usage](#usage) table above for a

 Usage: cf stats [OPTIONS]

- Show overview statistics for hosts and services.
+ Show overview statistics for hosts and stacks.

- Without --live: Shows config/state info (hosts, services, pending migrations).
+ Without --live: Shows config/state info (hosts, stacks, pending migrations).
 With --live: Also queries Docker on each host for container counts.

 ╭─ Options ────────────────────────────────────────────────────────────────────╮
@@ -897,36 +1123,36 @@ Full `--help` output for each command. See the [Usage](#usage) table above for a

 ### Auto-Migration

-When you change a service's host assignment in config and run `up`, Compose Farm automatically:
+When you change a stack's host assignment in config and run `up`, Compose Farm automatically:
 1. Checks that required mounts and networks exist on the new host (aborts if missing)
 2. Runs `down` on the old host
 3. Runs `up -d` on the new host
 4. Updates state tracking

-Use `cf apply` to automatically reconcile all services—it finds and migrates services on wrong hosts, stops orphaned services, and starts missing services.
+Use `cf apply` to automatically reconcile all stacks—it finds and migrates stacks on wrong hosts, stops orphaned stacks, and starts missing stacks.

 ```yaml
 # Before: plex runs on server-1
-services:
+stacks:
  plex: server-1

 # After: change to server-2, then run `cf up plex`
-services:
+stacks:
  plex: server-2  # Compose Farm will migrate automatically
 ```

-**Orphaned services**: When you remove (or comment out) a service from config, it becomes "orphaned"—tracked in state but no longer in config. Use these commands to handle orphans:
+**Orphaned stacks**: When you remove (or comment out) a stack from config, it becomes "orphaned"—tracked in state but no longer in config. Use these commands to handle orphans:

- `cf apply` — Migrate services AND stop orphans (the full reconcile)
- `cf down --orphaned` — Only stop orphaned services
+- `cf apply` — Migrate stacks AND stop orphans (the full reconcile)
+- `cf down --orphaned` — Only stop orphaned stacks
 - `cf apply --dry-run` — Preview what would change before applying

-This makes the config truly declarative: comment out a service, run `cf apply`, and it stops.
+This makes the config truly declarative: comment out a stack, run `cf apply`, and it stops.

 ## Traefik Multihost Ingress (File Provider)

 If you run a single Traefik instance on one "front‑door" host and want it to route to
-Compose Farm services on other hosts, Compose Farm can generate a Traefik file‑provider
+Compose Farm stacks on other hosts, Compose Farm can generate a Traefik file‑provider
 fragment from your existing compose labels.

 **How it works**
@@ -936,11 +1162,11 @@ fragment from your existing compose labels.
 - Labels and port specs may use `${VAR}` / `${VAR:-default}`; Compose Farm resolves these
  using the stack's `.env` file and your current environment, just like Docker Compose.
 - Publish a host port for that container (via `ports:`). The generator prefers
-  host‑published ports so Traefik can reach the service across hosts; if none are found,
+  host‑published ports so Traefik can reach the stack across hosts; if none are found,
  it warns and you'd need L3 reachability to container IPs.
 - If a router label doesn't specify `traefik.http.routers.<name>.service` and there's only
  one Traefik service defined on that container, Compose Farm wires the router to it.
- `compose-farm.yaml` stays unchanged: just `hosts` and `services: service → host`.
+- `compose-farm.yaml` stays unchanged: just `hosts` and `stacks: stack → host`.

 Example `docker-compose.yml` pattern:

@@ -974,7 +1200,7 @@ providers:
 cf traefik-file --all --output /mnt/data/traefik/dynamic.d/compose-farm.yml
 ```

-Re‑run this after changing Traefik labels, moving a service to another host, or changing
+Re‑run this after changing Traefik labels, moving a stack to another host, or changing
 published ports.

 **Auto-regeneration**
@@ -985,17 +1211,17 @@ add `traefik_file` to your config:
 ```yaml
 compose_dir: /opt/compose
 traefik_file: /opt/traefik/dynamic.d/compose-farm.yml  # auto-regenerate on up/down/restart/update
-traefik_service: traefik  # skip services on same host (docker provider handles them)
+traefik_stack: traefik  # skip stacks on same host (docker provider handles them)

 hosts:
  # ...
-services:
+stacks:
  traefik: server-1  # Traefik runs here
-  plex: server-2     # Services on other hosts get file-provider entries
+  plex: server-2     # Stacks on other hosts get file-provider entries
  # ...
 ```

-The `traefik_service` option specifies which service runs Traefik. Services on the same host
+The `traefik_stack` option specifies which stack runs Traefik. Stacks on the same host
 are skipped in the file-provider config since Traefik's docker provider handles them directly.

 Now `cf up plex` will update the Traefik config automatically—no separate
@@ -1038,11 +1264,11 @@ There are many ways to run containers on multiple hosts. Here is where Compose F
 | Agentless | ✅ | ✅ | ❌ | ✅ | ❌ |
 | High availability | ❌ | ❌ | ✅ | ❌ | ❌ |

-**Docker Contexts** — You can use `docker context create remote ssh://...` and `docker compose --context remote up`. But it's manual: you must remember which host runs which service, there's no global view, no parallel execution, and no auto-migration.
+**Docker Contexts** — You can use `docker context create remote ssh://...` and `docker compose --context remote up`. But it's manual: you must remember which host runs which stack, there's no global view, no parallel execution, and no auto-migration.

 **Kubernetes / Docker Swarm** — Full orchestration that abstracts away the hardware. But they require cluster initialization, separate control planes, and often rewriting compose files. They introduce complexity (consensus, overlay networks) unnecessary for static "pet" servers.

-**Ansible / Terraform** — Infrastructure-as-Code tools that can SSH in and deploy containers. But they're push-based configuration management, not interactive CLIs. Great for setting up state, clumsy for day-to-day operations like `cf logs -f` or quickly restarting a service.
+**Ansible / Terraform** — Infrastructure-as-Code tools that can SSH in and deploy containers. But they're push-based configuration management, not interactive CLIs. Great for setting up state, clumsy for day-to-day operations like `cf logs -f` or quickly restarting a stack.

 **Portainer / Coolify** — Web-based management UIs. But they're UI-first and often require agents on your servers. Compose Farm is CLI-first and agentless.

--- a/compose-farm.example.yaml
+++ b/compose-farm.example.yaml
@@ -5,7 +5,7 @@ compose_dir: /opt/compose

 # Optional: Auto-regenerate Traefik file-provider config after up/down/restart/update
 traefik_file: /opt/traefik/dynamic.d/compose-farm.yml
-traefik_service: traefik  # Skip services on same host (docker provider handles them)
+traefik_stack: traefik  # Skip stacks on same host (docker provider handles them)

 hosts:
  # Full form with all options
@@ -20,11 +20,11 @@ hosts:
  # Local execution (no SSH)
  local: localhost

-services:
-  # Map service names to hosts
-  # Compose file expected at: {compose_dir}/{service}/compose.yaml
+stacks:
+  # Map stack names to hosts
+  # Compose file expected at: {compose_dir}/{stack}/compose.yaml
  traefik: server-1    # Traefik runs here
-  plex: server-2       # Services on other hosts get file-provider entries
+  plex: server-2       # Stacks on other hosts get file-provider entries
  jellyfin: server-2
  sonarr: server-1
  radarr: local
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -5,6 +5,12 @@ services:
      - ${SSH_AUTH_SOCK}:/ssh-agent:ro
      # Compose directory (contains compose files AND compose-farm.yaml config)
      - ${CF_COMPOSE_DIR:-/opt/stacks}:${CF_COMPOSE_DIR:-/opt/stacks}
+      # SSH keys for passwordless auth (generated by `cf ssh setup`)
+      # Choose ONE option below (use the same option for both cf and web services):
+      # Option 1: Host path (default) - keys at ~/.ssh/compose-farm/id_ed25519
+      - ${CF_SSH_DIR:-~/.ssh/compose-farm}:/root/.ssh
+      # Option 2: Named volume - managed by Docker, shared between services
+      # - cf-ssh:/root/.ssh
    environment:
      - SSH_AUTH_SOCK=/ssh-agent
      # Config file path (state stored alongside it)
@@ -12,13 +18,21 @@ services:

  web:
    image: ghcr.io/basnijholt/compose-farm:latest
+    restart: unless-stopped
    command: web --host 0.0.0.0 --port 9000
    volumes:
      - ${SSH_AUTH_SOCK}:/ssh-agent:ro
      - ${CF_COMPOSE_DIR:-/opt/stacks}:${CF_COMPOSE_DIR:-/opt/stacks}
+      # SSH keys - use the SAME option as cf service above
+      # Option 1: Host path (default)
+      - ${CF_SSH_DIR:-~/.ssh/compose-farm}:/root/.ssh
+      # Option 2: Named volume
+      # - cf-ssh:/root/.ssh
    environment:
      - SSH_AUTH_SOCK=/ssh-agent
      - CF_CONFIG=${CF_COMPOSE_DIR:-/opt/stacks}/compose-farm.yaml
+      # Used to detect self-updates and run via SSH to survive container restart
+      - CF_WEB_STACK=compose-farm
    labels:
      - traefik.enable=true
      - traefik.http.routers.compose-farm.rule=Host(`compose-farm.${DOMAIN}`)
@@ -32,3 +46,7 @@ services:
 networks:
  mynetwork:
    external: true
+
+volumes:
+  cf-ssh:
+    # Only used if Option 2 is selected above
--- a/docs/CNAME
+++ b/docs/CNAME
@@ -0,0 +1 @@
+compose-farm.nijho.lt
--- a/docs/architecture.md
+++ b/docs/architecture.md
@@ -0,0 +1,346 @@
+---
+icon: lucide/layers
+---
+
+# Architecture
+
+This document explains how Compose Farm works under the hood.
+
+## Design Philosophy
+
+Compose Farm follows three core principles:
+
+1. **KISS** - Keep it simple. It's a thin wrapper around `docker compose` over SSH.
+2. **YAGNI** - No orchestration, no service discovery, no health checks until needed.
+3. **Zero changes** - Your existing compose files work unchanged.
+
+## High-Level Architecture
+
+```
+┌─────────────────────────────────────────────────────────────────┐
+│                        Compose Farm CLI                         │
+│                                                                 │
+│  ┌──────────┐  ┌──────────┐  ┌──────────┐  ┌──────────────────┐ │
+│  │  Config  │  │  State   │  │Operations│  │   Executor       │ │
+│  │  Parser  │  │ Tracker  │  │  Logic   │  │  (SSH/Local)     │ │
+│  └────┬─────┘  └────┬─────┘  └────┬─────┘  └────────┬─────────┘ │
+└───────┼─────────────┼─────────────┼─────────────────┼───────────┘
+        │             │             │                 │
+        ▼             ▼             ▼                 ▼
+┌───────────────────────────────────────────────────────────────┐
+│                         SSH / Local                            │
+└───────────────────────────────────────────────────────────────┘
+        │                                             │
+        ▼                                             ▼
+┌───────────────┐                           ┌───────────────┐
+│   Host: nuc   │                           │   Host: hp    │
+│               │                           │               │
+│ docker compose│                           │ docker compose│
+│    up -d      │                           │    up -d      │
+└───────────────┘                           └───────────────┘
+```
+
+## Core Components
+
+### Configuration (`src/compose_farm/config.py`)
+
+Pydantic models for YAML configuration:
+
+- **Config** - Root configuration with compose_dir, hosts, stacks
+- **HostConfig** - Host address and SSH user
+- **ServiceConfig** - Service-to-host mappings
+
+Key features:
+- Validation with Pydantic
+- Multi-host stack expansion (`all` → list of hosts)
+- YAML loading with sensible defaults
+
+### State Tracking (`src/compose_farm/state.py`)
+
+Tracks deployment state in `compose-farm-state.yaml` (stored alongside the config file):
+
+```yaml
+deployed:
+  plex: nuc
+  sonarr: nuc
+```
+
+Used for:
+- Detecting migrations (stack moved to different host)
+- Identifying orphans (stacks removed from config)
+- `cf ps` status display
+
+### Operations (`src/compose_farm/operations.py`)
+
+Business logic for stack operations:
+
+- **up** - Start stack, handle migration if needed
+- **down** - Stop stack
+- **preflight checks** - Verify mounts, networks exist before operations
+- **discover** - Find running stacks on hosts
+- **migrate** - Down on old host, up on new host
+
+### Executor (`src/compose_farm/executor.py`)
+
+SSH and local command execution:
+
+- **Hybrid SSH approach**: asyncssh for parallel streaming, native `ssh -t` for raw mode
+- **Parallel by default**: Multiple stacks via `asyncio.gather`
+- **Streaming output**: Real-time stdout/stderr with `[stack]` prefix
+- **Local detection**: Skips SSH when target matches local machine IP
+
+### CLI (`src/compose_farm/cli/`)
+
+Typer-based CLI with subcommand modules:
+
+```
+cli/
+├── app.py          # Shared Typer app, version callback
+├── common.py       # Shared helpers, options, progress utilities
+├── config.py       # config subcommand (init, show, path, validate, edit, symlink)
+├── lifecycle.py    # up, down, pull, restart, update, apply
+├── management.py   # refresh, check, init-network, traefik-file
+├── monitoring.py   # logs, ps, stats
+├── ssh.py          # SSH key management (setup, status, keygen)
+└── web.py          # Web UI server command
+```
+
+## Command Flow
+
+### cf up plex
+
+```
+1. Load configuration
+   └─► Parse compose-farm.yaml
+   └─► Validate stack exists
+
+2. Check state
+   └─► Load state.yaml
+   └─► Is plex already running?
+   └─► Is it on a different host? (migration needed)
+
+3. Pre-flight checks
+   └─► SSH to target host
+   └─► Check compose file exists
+   └─► Check required mounts exist
+   └─► Check required networks exist
+
+4. Execute migration (if needed)
+   └─► SSH to old host
+   └─► Run: docker compose down
+
+5. Start stack
+   └─► SSH to target host
+   └─► cd /opt/compose/plex
+   └─► Run: docker compose up -d
+
+6. Update state
+   └─► Write new state to state.yaml
+
+7. Generate Traefik config (if configured)
+   └─► Regenerate traefik file-provider
+```
+
+### cf apply
+
+```
+1. Load configuration and state
+
+2. Compute diff
+   ├─► Orphans: in state, not in config
+   ├─► Migrations: in both, different host
+   └─► Missing: in config, not in state
+
+3. Stop orphans
+   └─► For each orphan: cf down
+
+4. Migrate stacks
+   └─► For each migration: down old, up new
+
+5. Start missing
+   └─► For each missing: cf up
+
+6. Update state
+```
+
+## SSH Execution
+
+### Parallel Streaming (asyncssh)
+
+For most operations, Compose Farm uses asyncssh:
+
+```python
+async def run_command(host, command):
+    async with asyncssh.connect(host) as conn:
+        result = await conn.run(command)
+        return result.stdout, result.stderr
+```
+
+Multiple stacks run concurrently via `asyncio.gather`.
+
+### Raw Mode (native ssh)
+
+For commands needing PTY (progress bars, interactive):
+
+```bash
+ssh -t user@host "docker compose pull"
+```
+
+### Local Detection
+
+When target host IP matches local machine:
+
+```python
+if is_local(host_address):
+    # Run locally, no SSH
+    subprocess.run(command)
+else:
+    # SSH to remote
+    ssh.run(command)
+```
+
+## State Management
+
+### State File
+
+Location: `compose-farm-state.yaml` (stored alongside the config file)
+
+```yaml
+deployed:
+  plex: nuc
+  sonarr: nuc
+```
+
+Image digests are stored separately in `dockerfarm-log.toml` (also in the config directory).
+
+### State Transitions
+
+```
+Config Change          State Change           Action
+─────────────────────────────────────────────────────
+Add stack            Missing                 cf up
+Remove stack         Orphaned                cf down
+Change host           Migration               down old, up new
+No change             No change               none (or refresh)
+```
+
+### cf refresh
+
+Syncs state with reality by querying Docker on each host:
+
+```bash
+docker ps --format '{{.Names}}'
+```
+
+Updates state.yaml to match what's actually running.
+
+## Compose File Discovery
+
+For each stack, Compose Farm looks for compose files in:
+
+```
+{compose_dir}/{stack}/
+├── compose.yaml         # preferred
+├── compose.yml
+├── docker-compose.yml
+└── docker-compose.yaml
+```
+
+First match wins.
+
+## Traefik Integration
+
+### Label Extraction
+
+Compose Farm parses Traefik labels from compose files:
+
+```yaml
+stacks:
+  plex:
+    labels:
+      - traefik.enable=true
+      - traefik.http.routers.plex.rule=Host(`plex.example.com`)
+      - traefik.http.services.plex.loadbalancer.server.port=32400
+```
+
+### File Provider Generation
+
+Converts labels to Traefik file-provider YAML:
+
+```yaml
+http:
+  routers:
+    plex:
+      rule: Host(`plex.example.com`)
+      service: plex
+  services:
+    plex:
+      loadBalancer:
+        servers:
+          - url: http://192.168.1.10:32400
+```
+
+### Variable Resolution
+
+Supports `${VAR}` and `${VAR:-default}` from:
+1. Service's `.env` file
+2. Current environment
+
+## Error Handling
+
+### Pre-flight Failures
+
+Before any operation, Compose Farm checks:
+- SSH connectivity
+- Compose file existence
+- Required mounts
+- Required networks
+
+If checks fail, operation aborts with clear error.
+
+### Partial Failures
+
+When operating on multiple stacks:
+- Each stack is independent
+- Failures are logged, but other stacks continue
+- Exit code reflects overall success/failure
+
+## Performance Considerations
+
+### Parallel Execution
+
+Services are started/stopped in parallel:
+
+```python
+await asyncio.gather(*[
+    up_stack(stack) for stack in stacks
+])
+```
+
+### SSH Multiplexing
+
+For repeated connections to the same host, SSH reuses connections.
+
+### Caching
+
+- Config is parsed once per command
+- State is loaded once, written once
+- Host discovery results are cached during command
+
+## Web UI Architecture
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│                         Web UI                               │
+│                                                             │
+│  ┌─────────────┐  ┌─────────────┐  ┌─────────────────────┐  │
+│  │   FastAPI   │  │    Jinja    │  │       HTMX          │  │
+│  │   Backend   │  │  Templates  │  │   Dynamic Updates   │  │
+│  └─────────────┘  └─────────────┘  └─────────────────────┘  │
+│                                                             │
+│  Pattern: Custom events, not hx-swap-oob                    │
+│  Elements trigger on: cf:refresh from:body                  │
+└─────────────────────────────────────────────────────────────┘
+```
+
+Icons use [Lucide](https://lucide.dev/). Add new icons as macros in `web/templates/partials/icons.html`.
--- a/docs/assets/apply.gif
+++ b/docs/assets/apply.gif
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bb1372a59a4ed1ac74d3864d7a84dd5311fce4cb6c6a00bf3a574bc2f98d5595
+size 895927
--- a/docs/assets/apply.webm
+++ b/docs/assets/apply.webm
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f339a85f3d930db5a020c9f77e106edc5f44ea7dee6f68557106721493c24ef8
+size 205907
--- a/docs/assets/install.gif
+++ b/docs/assets/install.gif
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:388aa49a1269145698f9763452aaf6b9c6232ea9229abe1dae304df558e29695
+size 403442
--- a/docs/assets/install.webm
+++ b/docs/assets/install.webm
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9b8bf4dcb8ee67270d4a88124b4dd4abe0dab518e73812ee73f7c66d77f146e2
+size 228025
--- a/docs/assets/logs.gif
+++ b/docs/assets/logs.gif
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:16b9a28137dfae25488e2094de85766a039457f5dca20c2d84ac72e3967c10b9
+size 164237
--- a/docs/assets/logs.webm
+++ b/docs/assets/logs.webm
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e0fbe697a1f8256ce3b9a6a64c7019d42769134df9b5b964e5abe98a29e918fd
+size 68242
--- a/docs/assets/migration.gif
+++ b/docs/assets/migration.gif
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:629b8c80b98eb996b75439745676fd99a83f391ca25f778a71bd59173f814c2f
+size 1194931
--- a/docs/assets/migration.webm
+++ b/docs/assets/migration.webm
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:33fd46f2d8538cc43be4cb553b3af9d8b412f282ee354b6373e2793fe41c799b
+size 405057
--- a/docs/assets/quickstart.gif
+++ b/docs/assets/quickstart.gif
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ccd96e33faba5f297999917d89834b29d58bd2a8929eea8d62875e3d8830bd5c
+size 3198466
--- a/docs/assets/quickstart.webm
+++ b/docs/assets/quickstart.webm
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:979a1a21303bbf284b3510981066ef05c41c1035b34392fecc7bee472116e6db
+size 967564
--- a/docs/assets/update.gif
+++ b/docs/assets/update.gif
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2067f4967a93b7ee3a8db7750c435f41b1fccd2919f3443da4b848c20cc54f23
+size 124559
--- a/docs/assets/update.webm
+++ b/docs/assets/update.webm
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5471bd94e6d1b9d415547fa44de6021fdad2e1cc5b8b295680e217104aa749d6
+size 98149
--- a/docs/assets/web-console.gif
+++ b/docs/assets/web-console.gif
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dac5660cfe6574857ec055fac7822f25b7c5fcb10a836b19c86142515e2fbf75
+size 1816075
--- a/docs/assets/web-console.webm
+++ b/docs/assets/web-console.webm
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d4efec8ef5a99f2cb31d55cd71cdbf0bb8dd0cd6281571886b7c1f8b41c3f9da
+size 1660764
--- a/docs/assets/web-navigation.gif
+++ b/docs/assets/web-navigation.gif
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9348dd36e79192344476d61fbbffdb122a96ecc5829fbece1818590cfc521521
+size 3373003
--- a/docs/assets/web-navigation.webm
+++ b/docs/assets/web-navigation.webm
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bebbf8151434ba37bf5e46566a4e8b57812944281926f579d056bdc835ca26aa
+size 2729799
--- a/docs/assets/web-shell.gif
+++ b/docs/assets/web-shell.gif
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3712afff6fcde00eb951264bb24d4301deb085d082b4e95ed4c1893a571938ee
+size 1528294
--- a/docs/assets/web-shell.webm
+++ b/docs/assets/web-shell.webm
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0b218d400836a50661c9cdcce2d2b1e285cc5fe592cb42f58aae41f3e7d60684
+size 1327413
--- a/docs/assets/web-stack.gif
+++ b/docs/assets/web-stack.gif
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6a232ddc1b9ddd9bf6b5d99c05153e1094be56f1952f02636ca498eb7484e096
+size 3808675
--- a/docs/assets/web-stack.webm
+++ b/docs/assets/web-stack.webm
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5a7c9f5f6d47074a6af135190fda6d0a1936cd7a0b04b3aa04ea7d99167a9e05
+size 3333014
--- a/docs/assets/web-themes.gif
+++ b/docs/assets/web-themes.gif
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:66f4547ed2e83b302d795875588d9a085af76071a480f1096f2bb64344b80c42
+size 5428670
--- a/docs/assets/web-themes.webm
+++ b/docs/assets/web-themes.webm
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:75c8cdeefbbdcab2a240821d3410539f2a2cbe0a015897f4135404c80c3ac32c
+size 6578366
--- a/docs/assets/web-workflow.gif
+++ b/docs/assets/web-workflow.gif
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ff2e3ca5a46397efcd5f3a595e7d3c179266cc4f3f5f528b428f5ef2a423028e
+size 12649149
--- a/docs/assets/web-workflow.webm
+++ b/docs/assets/web-workflow.webm
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2d739c5f77ddd9d90b609e31df620b35988081b7341fe225eb717d71a87caa88
+size 12284953
--- a/docs/best-practices.md
+++ b/docs/best-practices.md
@@ -0,0 +1,372 @@
+---
+icon: lucide/lightbulb
+---
+
+# Best Practices
+
+Tips, limitations, and recommendations for using Compose Farm effectively.
+
+## Limitations
+
+### No Cross-Host Networking
+
+Compose Farm moves containers between hosts but **does not provide cross-host networking**. Docker's internal DNS and networks don't span hosts.
+
+**What breaks when you move a stack:**
+
+| Feature | Works? | Why |
+|---------|--------|-----|
+| `http://redis:6379` | No | Docker DNS doesn't cross hosts |
+| Docker network names | No | Networks are per-host |
+| `DATABASE_URL=postgres://db:5432` | No | Container name won't resolve |
+| Host IP addresses | Yes | Use `192.168.1.10:5432` |
+
+### What Compose Farm Doesn't Do
+
+- No overlay networking (use Swarm/Kubernetes)
+- No service discovery across hosts
+- No automatic dependency tracking between compose files
+- No health checks or restart policies beyond Docker's
+- No secrets management beyond Docker's
+
+## Stack Organization
+
+### Keep Dependencies Together
+
+If services talk to each other, keep them in the same compose file on the same host:
+
+```yaml
+# /opt/compose/myapp/docker-compose.yml
+services:
+  app:
+    image: myapp
+    depends_on:
+      - db
+      - redis
+
+  db:
+    image: postgres
+
+  redis:
+    image: redis
+```
+
+```yaml
+# compose-farm.yaml
+stacks:
+  myapp: nuc  # All three containers stay together
+```
+
+### Separate Standalone Stacks
+
+Stacks whose services don't talk to other containers can be anywhere:
+
+```yaml
+stacks:
+  # These can run on any host
+  plex: nuc
+  jellyfin: hp
+  homeassistant: nas
+
+  # These should stay together
+  myapp: nuc  # includes app + db + redis
+```
+
+### Cross-Host Communication
+
+If services MUST communicate across hosts, publish ports:
+
+```yaml
+# Instead of
+DATABASE_URL=postgres://db:5432
+
+# Use
+DATABASE_URL=postgres://192.168.1.10:5432
+```
+
+```yaml
+# And publish the port
+services:
+  db:
+    ports:
+      - "5432:5432"
+```
+
+## Multi-Host Stacks
+
+### When to Use `all`
+
+Use `all` for stacks that need local access to each host:
+
+```yaml
+stacks:
+  # Need Docker socket
+  dozzle: all          # Log viewer
+  portainer-agent: all  # Portainer agents
+  autokuma: all        # Auto-creates monitors
+
+  # Need host metrics
+  node-exporter: all   # Prometheus metrics
+  promtail: all        # Log shipping
+```
+
+### Host-Specific Lists
+
+For stacks on specific hosts only:
+
+```yaml
+stacks:
+  # Only on compute nodes
+  gitlab-runner: [nuc, hp]
+
+  # Only on storage nodes
+  minio: [nas-1, nas-2]
+```
+
+## Migration Safety
+
+### Pre-flight Checks
+
+Before migrating, Compose Farm verifies:
+- Compose file is accessible on new host
+- Required mounts exist on new host
+- Required networks exist on new host
+
+### Data Considerations
+
+**Compose Farm doesn't move data.** Ensure:
+
+1. **Shared storage**: Data volumes on NFS/shared storage
+2. **External databases**: Data in external DB, not container
+3. **Backup first**: Always backup before migration
+
+### Safe Migration Pattern
+
+```bash
+# 1. Preview changes
+cf apply --dry-run
+
+# 2. Verify target host can run the stack
+cf check myservice
+
+# 3. Apply changes
+cf apply
+```
+
+## State Management
+
+### When to Refresh
+
+Run `cf refresh` after:
+- Manual `docker compose` commands
+- Container restarts
+- Host reboots
+- Any changes outside Compose Farm
+
+```bash
+cf refresh --dry-run  # Preview
+cf refresh            # Sync
+```
+
+### State Conflicts
+
+If state doesn't match reality:
+
+```bash
+# See what's actually running
+cf refresh --dry-run
+
+# Sync state
+cf refresh
+
+# Then apply config
+cf apply
+```
+
+## Shared Storage
+
+### NFS Best Practices
+
+```bash
+# Mount options for Docker compatibility
+nas:/compose /opt/compose nfs rw,hard,intr,rsize=8192,wsize=8192 0 0
+```
+
+### Directory Ownership
+
+Ensure consistent UID/GID across hosts:
+
+```yaml
+services:
+  myapp:
+    environment:
+      - PUID=1000
+      - PGID=1000
+```
+
+### Config vs Data
+
+Keep config and data separate:
+
+```
+/opt/compose/          # Shared: compose files + config
+├── plex/
+│   ├── docker-compose.yml
+│   └── config/        # Small config files OK
+
+/mnt/data/             # Shared: large media files
+├── movies/
+├── tv/
+└── music/
+
+/opt/appdata/          # Local: per-host app data
+├── plex/
+└── sonarr/
+```
+
+## Performance
+
+### Parallel Operations
+
+Compose Farm runs operations in parallel. For large deployments:
+
+```bash
+# Good: parallel by default
+cf up --all
+
+# Avoid: sequential updates when possible
+for svc in plex sonarr radarr; do
+  cf update $svc
+done
+```
+
+### SSH Connection Reuse
+
+SSH connections are reused within a command. For many operations:
+
+```bash
+# One command, one connection per host
+cf update --all
+
+# Multiple commands, multiple connections (slower)
+cf update plex && cf update sonarr && cf update radarr
+```
+
+## Traefik Setup
+
+### Stack Placement
+
+Put Traefik on a reliable host:
+
+```yaml
+stacks:
+  traefik: nuc  # Primary host with good uptime
+```
+
+### Same-Host Stacks
+
+Stacks on the same host as Traefik use Docker provider:
+
+```yaml
+traefik_stack: traefik
+
+stacks:
+  traefik: nuc
+  portainer: nuc   # Docker provider handles this
+  plex: hp         # File provider handles this
+```
+
+### Middleware in Separate File
+
+Define middlewares outside Compose Farm's generated file:
+
+```yaml
+# /opt/traefik/dynamic.d/middlewares.yml
+http:
+  middlewares:
+    redirect-https:
+      redirectScheme:
+        scheme: https
+```
+
+## Backup Strategy
+
+### What to Backup
+
+| Item | Location | Method |
+|------|----------|--------|
+| Compose Farm config | `~/.config/compose-farm/` | Git or copy |
+| Compose files | `/opt/compose/` | Git |
+| State file | `~/.config/compose-farm/state.yaml` | Optional (can refresh) |
+| App data | `/opt/appdata/` | Backup solution |
+
+### Disaster Recovery
+
+```bash
+# Restore config
+cp backup/compose-farm.yaml ~/.config/compose-farm/
+
+# Refresh state from running containers
+cf refresh
+
+# Or start fresh
+cf apply
+```
+
+## Troubleshooting
+
+### Common Issues
+
+**Stack won't start:**
+```bash
+cf check myservice      # Verify mounts/networks
+cf logs myservice       # Check container logs
+```
+
+**Migration fails:**
+```bash
+cf check myservice      # Verify new host is ready
+cf init-network newhost # Create network if missing
+```
+
+**State out of sync:**
+```bash
+cf refresh --dry-run    # See differences
+cf refresh              # Sync state
+```
+
+**SSH issues:**
+```bash
+cf ssh status           # Check key status
+cf ssh setup            # Re-setup keys
+```
+
+## Security Considerations
+
+### SSH Keys
+
+- Use dedicated SSH key for Compose Farm
+- Limit key to specific hosts if possible
+- Don't store keys in Docker images
+
+### Network Exposure
+
+- Published ports are accessible from network
+- Use firewalls for sensitive services
+- Consider VPN for cross-host communication
+
+### Secrets
+
+- Don't commit `.env` files with secrets
+- Use Docker secrets or external secret management
+- Avoid secrets in compose file labels
+
+## Comparison: When to Use Alternatives
+
+| Scenario | Solution |
+|----------|----------|
+| 2-10 hosts, static stacks | **Compose Farm** |
+| Cross-host container networking | Docker Swarm |
+| Auto-scaling, self-healing | Kubernetes |
+| Infrastructure as code | Ansible + Compose Farm |
+| High availability requirements | Kubernetes or Swarm |
--- a/docs/commands.md
+++ b/docs/commands.md
@@ -0,0 +1,650 @@
+---
+icon: lucide/terminal
+---
+
+# Commands Reference
+
+The Compose Farm CLI is available as both `compose-farm` and the shorter alias `cf`.
+
+## Command Overview
+
+| Category | Command | Description |
+|----------|---------|-------------|
+| **Lifecycle** | `apply` | Make reality match config |
+| | `up` | Start stacks |
+| | `down` | Stop stacks |
+| | `restart` | Restart stacks (down + up) |
+| | `update` | Update stacks (pull + build + down + up) |
+| | `pull` | Pull latest images |
+| **Monitoring** | `ps` | Show stack status |
+| | `logs` | Show stack logs |
+| | `stats` | Show overview statistics |
+| **Configuration** | `check` | Validate config and mounts |
+| | `refresh` | Sync state from reality |
+| | `init-network` | Create Docker network |
+| | `traefik-file` | Generate Traefik config |
+| | `config` | Manage config files |
+| | `ssh` | Manage SSH keys |
+| **Server** | `web` | Start web UI |
+
+## Global Options
+
+```bash
+cf --version, -v    # Show version
+cf --help, -h       # Show help
+```
+
+---
+
+## Lifecycle Commands
+
+### cf apply
+
+Make reality match your configuration. The primary reconciliation command.
+
+<video autoplay loop muted playsinline>
+  <source src="/assets/apply.webm" type="video/webm">
+</video>
+
+```bash
+cf apply [OPTIONS]
+```
+
+**Options:**
+
+| Option | Description |
+|--------|-------------|
+| `--dry-run, -n` | Preview changes without executing |
+| `--no-orphans` | Skip stopping orphaned stacks |
+| `--full, -f` | Also refresh running stacks |
+| `--config, -c PATH` | Path to config file |
+
+**What it does:**
+
+1. Stops orphaned stacks (in state but removed from config)
+2. Migrates stacks on wrong host
+3. Starts missing stacks (in config but not running)
+
+**Examples:**
+
+```bash
+# Preview what would change
+cf apply --dry-run
+
+# Apply all changes
+cf apply
+
+# Only start/migrate, don't stop orphans
+cf apply --no-orphans
+
+# Also refresh all running stacks
+cf apply --full
+```
+
+---
+
+### cf up
+
+Start stacks. Auto-migrates if host assignment changed.
+
+```bash
+cf up [OPTIONS] [STACKS]...
+```
+
+**Options:**
+
+| Option | Description |
+|--------|-------------|
+| `--all, -a` | Start all stacks |
+| `--host, -H TEXT` | Filter to stacks on this host |
+| `--config, -c PATH` | Path to config file |
+
+**Examples:**
+
+```bash
+# Start specific stacks
+cf up plex sonarr
+
+# Start all stacks
+cf up --all
+
+# Start all stacks on a specific host
+cf up --all --host nuc
+```
+
+**Auto-migration:**
+
+If you change a stack's host in config and run `cf up`:
+
+1. Verifies mounts/networks exist on new host
+2. Runs `down` on old host
+3. Runs `up -d` on new host
+4. Updates state
+
+---
+
+### cf down
+
+Stop stacks.
+
+```bash
+cf down [OPTIONS] [STACKS]...
+```
+
+**Options:**
+
+| Option | Description |
+|--------|-------------|
+| `--all, -a` | Stop all stacks |
+| `--orphaned` | Stop orphaned stacks only |
+| `--host, -H TEXT` | Filter to stacks on this host |
+| `--config, -c PATH` | Path to config file |
+
+**Examples:**
+
+```bash
+# Stop specific stacks
+cf down plex
+
+# Stop all stacks
+cf down --all
+
+# Stop stacks removed from config
+cf down --orphaned
+
+# Stop all stacks on a host
+cf down --all --host nuc
+```
+
+---
+
+### cf restart
+
+Restart stacks (down + up).
+
+```bash
+cf restart [OPTIONS] [STACKS]...
+```
+
+**Options:**
+
+| Option | Description |
+|--------|-------------|
+| `--all, -a` | Restart all stacks |
+| `--config, -c PATH` | Path to config file |
+
+**Examples:**
+
+```bash
+cf restart plex
+cf restart --all
+```
+
+---
+
+### cf update
+
+Update stacks (pull + build + down + up).
+
+<video autoplay loop muted playsinline>
+  <source src="/assets/update.webm" type="video/webm">
+</video>
+
+```bash
+cf update [OPTIONS] [STACKS]...
+```
+
+**Options:**
+
+| Option | Description |
+|--------|-------------|
+| `--all, -a` | Update all stacks |
+| `--config, -c PATH` | Path to config file |
+
+**Examples:**
+
+```bash
+# Update specific stack
+cf update plex
+
+# Update all stacks
+cf update --all
+```
+
+---
+
+### cf pull
+
+Pull latest images.
+
+```bash
+cf pull [OPTIONS] [STACKS]...
+```
+
+**Options:**
+
+| Option | Description |
+|--------|-------------|
+| `--all, -a` | Pull for all stacks |
+| `--config, -c PATH` | Path to config file |
+
+**Examples:**
+
+```bash
+cf pull plex
+cf pull --all
+```
+
+---
+
+## Monitoring Commands
+
+### cf ps
+
+Show status of stacks.
+
+```bash
+cf ps [OPTIONS] [STACKS]...
+```
+
+**Options:**
+
+| Option | Description |
+|--------|-------------|
+| `--all, -a` | Show all stacks (default) |
+| `--host, -H TEXT` | Filter to stacks on this host |
+| `--config, -c PATH` | Path to config file |
+
+**Examples:**
+
+```bash
+# Show all stacks
+cf ps
+
+# Show specific stacks
+cf ps plex sonarr
+
+# Filter by host
+cf ps --host nuc
+```
+
+---
+
+### cf logs
+
+Show stack logs.
+
+<video autoplay loop muted playsinline>
+  <source src="/assets/logs.webm" type="video/webm">
+</video>
+
+```bash
+cf logs [OPTIONS] [STACKS]...
+```
+
+**Options:**
+
+| Option | Description |
+|--------|-------------|
+| `--all, -a` | Show logs for all stacks |
+| `--host, -H TEXT` | Filter to stacks on this host |
+| `--follow, -f` | Follow logs (live stream) |
+| `--tail, -n INTEGER` | Number of lines (default: 20 for --all, 100 otherwise) |
+| `--config, -c PATH` | Path to config file |
+
+**Examples:**
+
+```bash
+# Show last 100 lines
+cf logs plex
+
+# Follow logs
+cf logs -f plex
+
+# Show last 50 lines of multiple stacks
+cf logs -n 50 plex sonarr
+
+# Show last 20 lines of all stacks
+cf logs --all
+```
+
+---
+
+### cf stats
+
+Show overview statistics.
+
+```bash
+cf stats [OPTIONS]
+```
+
+**Options:**
+
+| Option | Description |
+|--------|-------------|
+| `--live, -l` | Query Docker for live container counts |
+| `--config, -c PATH` | Path to config file |
+
+**Examples:**
+
+```bash
+# Config/state overview
+cf stats
+
+# Include live container counts
+cf stats --live
+```
+
+---
+
+## Configuration Commands
+
+### cf check
+
+Validate configuration, mounts, and networks.
+
+```bash
+cf check [OPTIONS] [STACKS]...
+```
+
+**Options:**
+
+| Option | Description |
+|--------|-------------|
+| `--local` | Skip SSH-based checks (faster) |
+| `--config, -c PATH` | Path to config file |
+
+**Examples:**
+
+```bash
+# Full validation with SSH
+cf check
+
+# Fast local-only validation
+cf check --local
+
+# Check specific stack and show host compatibility
+cf check jellyfin
+```
+
+---
+
+### cf refresh
+
+Update local state from running stacks.
+
+```bash
+cf refresh [OPTIONS]
+```
+
+**Options:**
+
+| Option | Description |
+|--------|-------------|
+| `--dry-run, -n` | Show what would change |
+| `--log-path, -l PATH` | Path to Dockerfarm TOML log |
+| `--config, -c PATH` | Path to config file |
+
+**Examples:**
+
+```bash
+# Sync state with reality
+cf refresh
+
+# Preview changes
+cf refresh --dry-run
+```
+
+---
+
+### cf init-network
+
+Create Docker network on hosts with consistent settings.
+
+```bash
+cf init-network [OPTIONS] [HOSTS]...
+```
+
+**Options:**
+
+| Option | Description |
+|--------|-------------|
+| `--network, -n TEXT` | Network name (default: mynetwork) |
+| `--subnet, -s TEXT` | Network subnet (default: 172.20.0.0/16) |
+| `--gateway, -g TEXT` | Network gateway (default: 172.20.0.1) |
+| `--config, -c PATH` | Path to config file |
+
+**Examples:**
+
+```bash
+# Create on all hosts
+cf init-network
+
+# Create on specific hosts
+cf init-network nuc hp
+
+# Custom network settings
+cf init-network -n production -s 10.0.0.0/16 -g 10.0.0.1
+```
+
+---
+
+### cf traefik-file
+
+Generate Traefik file-provider config from compose labels.
+
+```bash
+cf traefik-file [OPTIONS] [STACKS]...
+```
+
+**Options:**
+
+| Option | Description |
+|--------|-------------|
+| `--all, -a` | Generate for all stacks |
+| `--output, -o PATH` | Output file (stdout if omitted) |
+| `--config, -c PATH` | Path to config file |
+
+**Examples:**
+
+```bash
+# Preview to stdout
+cf traefik-file --all
+
+# Write to file
+cf traefik-file --all -o /opt/traefik/dynamic.d/cf.yml
+
+# Specific stacks
+cf traefik-file plex jellyfin -o /opt/traefik/cf.yml
+```
+
+---
+
+### cf config
+
+Manage configuration files.
+
+```bash
+cf config COMMAND
+```
+
+**Subcommands:**
+
+| Command | Description |
+|---------|-------------|
+| `init` | Create new config with examples |
+| `show` | Display config with highlighting |
+| `path` | Print config file path |
+| `validate` | Validate syntax and schema |
+| `edit` | Open in $EDITOR |
+| `symlink` | Create symlink from default location |
+
+**Options by subcommand:**
+
+| Subcommand | Options |
+|------------|---------|
+| `init` | `--path/-p PATH`, `--force/-f` |
+| `show` | `--path/-p PATH`, `--raw/-r` |
+| `edit` | `--path/-p PATH` |
+| `path` | `--path/-p PATH` |
+| `validate` | `--path/-p PATH` |
+| `symlink` | `--force/-f` |
+
+**Examples:**
+
+```bash
+# Create config at default location
+cf config init
+
+# Create config at custom path
+cf config init --path /opt/compose-farm/config.yaml
+
+# Show config with syntax highlighting
+cf config show
+
+# Show raw config (for copy-paste)
+cf config show --raw
+
+# Validate config
+cf config validate
+
+# Edit config in $EDITOR
+cf config edit
+
+# Print config path
+cf config path
+
+# Create symlink to local config
+cf config symlink
+
+# Create symlink to specific file
+cf config symlink /opt/compose-farm/config.yaml
+```
+
+---
+
+### cf ssh
+
+Manage SSH keys for passwordless authentication.
+
+```bash
+cf ssh COMMAND
+```
+
+**Subcommands:**
+
+| Command | Description |
+|---------|-------------|
+| `setup` | Generate key and copy to all hosts |
+| `status` | Show SSH key status and host connectivity |
+| `keygen` | Generate key without distributing |
+
+**Options for `cf ssh setup` and `cf ssh keygen`:**
+
+| Option | Description |
+|--------|-------------|
+| `--force, -f` | Regenerate key even if it exists |
+
+**Examples:**
+
+```bash
+# Set up SSH keys (generates and distributes)
+cf ssh setup
+
+# Check status and connectivity
+cf ssh status
+
+# Generate key only (don't distribute)
+cf ssh keygen
+```
+
+---
+
+## Server Commands
+
+### cf web
+
+Start the web UI server.
+
+```bash
+cf web [OPTIONS]
+```
+
+**Options:**
+
+| Option | Description |
+|--------|-------------|
+| `--host, -H TEXT` | Host to bind to (default: 0.0.0.0) |
+| `--port, -p INTEGER` | Port to listen on (default: 8000) |
+| `--reload, -r` | Enable auto-reload for development |
+
+**Note:** Requires web dependencies: `pip install compose-farm[web]`
+
+**Examples:**
+
+```bash
+# Start on default port
+cf web
+
+# Start on custom port
+cf web --port 3000
+
+# Development mode with auto-reload
+cf web --reload
+```
+
+---
+
+## Common Patterns
+
+### Daily Operations
+
+```bash
+# Morning: check status
+cf ps
+cf stats --live
+
+# Update a specific stack
+cf update plex
+
+# View logs
+cf logs -f plex
+```
+
+### Maintenance
+
+```bash
+# Update all stacks
+cf update --all
+
+# Refresh state after manual changes
+cf refresh
+```
+
+### Migration
+
+```bash
+# Preview what would change
+cf apply --dry-run
+
+# Move a stack: edit config, then
+cf up plex  # auto-migrates
+
+# Or reconcile everything
+cf apply
+```
+
+### Troubleshooting
+
+```bash
+# Validate config
+cf check --local
+cf check
+
+# Check specific stack
+cf check jellyfin
+
+# Sync state
+cf refresh --dry-run
+cf refresh
+```
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -0,0 +1,419 @@
+---
+icon: lucide/settings
+---
+
+# Configuration Reference
+
+Compose Farm uses a YAML configuration file to define hosts and stack assignments.
+
+## Config File Location
+
+Compose Farm looks for configuration in this order:
+
+1. `-c` / `--config` flag (if provided)
+2. `CF_CONFIG` environment variable
+3. `./compose-farm.yaml` (current directory)
+4. `$XDG_CONFIG_HOME/compose-farm/compose-farm.yaml` (defaults to `~/.config`)
+
+Use `-c` / `--config` to specify a custom path:
+
+```bash
+cf ps -c /path/to/config.yaml
+```
+
+Or set the environment variable:
+
+```bash
+export CF_CONFIG=/path/to/config.yaml
+```
+
+## Examples
+
+### Single host (local-only)
+
+```yaml
+# Required: directory containing compose files
+compose_dir: /opt/stacks
+
+# Define local host
+hosts:
+  local: localhost
+
+# Map stacks to the local host
+stacks:
+  plex: local
+  sonarr: local
+  radarr: local
+```
+
+### Multi-host (full example)
+
+```yaml
+# Required: directory containing compose files (same path on all hosts)
+compose_dir: /opt/compose
+
+# Optional: auto-regenerate Traefik config
+traefik_file: /opt/traefik/dynamic.d/compose-farm.yml
+traefik_stack: traefik
+
+# Define Docker hosts
+hosts:
+  nuc:
+    address: 192.168.1.10
+    user: docker
+  hp:
+    address: 192.168.1.11
+    user: admin
+
+# Map stacks to hosts
+stacks:
+  # Single-host stacks
+  plex: nuc
+  sonarr: nuc
+  radarr: hp
+
+  # Multi-host stacks
+  dozzle: all                    # Run on ALL hosts
+  node-exporter: [nuc, hp]       # Run on specific hosts
+```
+
+## Settings Reference
+
+### compose_dir (required)
+
+Directory containing your compose stack folders. Must be the same path on all hosts.
+
+```yaml
+compose_dir: /opt/compose
+```
+
+**Directory structure:**
+
+```
+/opt/compose/
+├── plex/
+│   ├── docker-compose.yml    # or compose.yaml
+│   └── .env                  # optional environment file
+├── sonarr/
+│   └── docker-compose.yml
+└── ...
+```
+
+Supported compose file names (checked in order):
+- `compose.yaml`
+- `compose.yml`
+- `docker-compose.yml`
+- `docker-compose.yaml`
+
+### traefik_file
+
+Path to auto-generated Traefik file-provider config. When set, Compose Farm regenerates this file after `up`, `down`, `restart`, and `update` commands.
+
+```yaml
+traefik_file: /opt/traefik/dynamic.d/compose-farm.yml
+```
+
+### traefik_stack
+
+Stack name running Traefik. Stacks on the same host are skipped in file-provider config (Traefik's docker provider handles them).
+
+```yaml
+traefik_stack: traefik
+```
+
+## Hosts Configuration
+
+### Basic Host
+
+```yaml
+hosts:
+  myserver:
+    address: 192.168.1.10
+```
+
+### With SSH User
+
+```yaml
+hosts:
+  myserver:
+    address: 192.168.1.10
+    user: docker
+```
+
+If `user` is omitted, the current user is used.
+
+### With Custom SSH Port
+
+```yaml
+hosts:
+  myserver:
+    address: 192.168.1.10
+    user: docker
+    port: 2222  # SSH port (default: 22)
+```
+
+### Localhost
+
+For stacks running on the same machine where you invoke Compose Farm:
+
+```yaml
+hosts:
+  local: localhost
+```
+
+No SSH is used for localhost stacks.
+
+### Multiple Hosts
+
+```yaml
+hosts:
+  nuc:
+    address: 192.168.1.10
+    user: docker
+  hp:
+    address: 192.168.1.11
+    user: admin
+  truenas:
+    address: 192.168.1.100
+  local: localhost
+```
+
+## Stacks Configuration
+
+### Single-Host Stack
+
+```yaml
+stacks:
+  plex: nuc
+  sonarr: nuc
+  radarr: hp
+```
+
+### Multi-Host Stack
+
+For stacks that need to run on every host (e.g., log shippers, monitoring agents):
+
+```yaml
+stacks:
+  # Run on ALL configured hosts
+  dozzle: all
+  promtail: all
+
+  # Run on specific hosts
+  node-exporter: [nuc, hp, truenas]
+```
+
+**Common multi-host stacks:**
+- **Dozzle** - Docker log viewer (needs local socket)
+- **Promtail/Alloy** - Log shipping (needs local socket)
+- **node-exporter** - Host metrics (needs /proc, /sys)
+- **AutoKuma** - Uptime Kuma monitors (needs local socket)
+
+### Stack Names
+
+Stack names must match directory names in `compose_dir`:
+
+```yaml
+compose_dir: /opt/compose
+stacks:
+  plex: nuc      # expects /opt/compose/plex/docker-compose.yml
+  my-app: hp     # expects /opt/compose/my-app/docker-compose.yml
+```
+
+## State File
+
+Compose Farm tracks deployment state in `compose-farm-state.yaml`, stored alongside the config file.
+
+For example, if your config is at `~/.config/compose-farm/compose-farm.yaml`, the state file will be at `~/.config/compose-farm/compose-farm-state.yaml`.
+
+```yaml
+deployed:
+  plex: nuc
+  sonarr: nuc
+```
+
+This file records which stacks are deployed and on which host.
+
+**Don't edit manually.** Use `cf refresh` to sync state with reality.
+
+## Environment Variables
+
+### In Compose Files
+
+Your compose files can use `.env` files as usual:
+
+```
+/opt/compose/plex/
+├── docker-compose.yml
+└── .env
+```
+
+Compose Farm runs `docker compose` which handles `.env` automatically.
+
+### In Traefik Labels
+
+When generating Traefik config, Compose Farm resolves `${VAR}` and `${VAR:-default}` from:
+
+1. The stack's `.env` file
+2. Current environment
+
+## Config Commands
+
+### Initialize Config
+
+```bash
+cf config init
+```
+
+Creates a new config file with documented examples.
+
+### Validate Config
+
+```bash
+cf config validate
+```
+
+Checks syntax and schema.
+
+### Show Config
+
+```bash
+cf config show
+```
+
+Displays current config with syntax highlighting.
+
+### Edit Config
+
+```bash
+cf config edit
+```
+
+Opens config in `$EDITOR`.
+
+### Show Config Path
+
+```bash
+cf config path
+```
+
+Prints the config file location (useful for scripting).
+
+### Create Symlink
+
+```bash
+cf config symlink                          # Link to ./compose-farm.yaml
+cf config symlink /path/to/my-config.yaml  # Link to specific file
+```
+
+Creates a symlink from the default location (`~/.config/compose-farm/compose-farm.yaml`) to your config file. Use `--force` to overwrite an existing symlink.
+
+## Validation
+
+### Local Validation
+
+Fast validation without SSH:
+
+```bash
+cf check --local
+```
+
+Checks:
+- Config syntax
+- Stack-to-host mappings
+- Compose file existence
+
+### Full Validation
+
+```bash
+cf check
+```
+
+Additional SSH-based checks:
+- Host connectivity
+- Mount point existence
+- Docker network existence
+- Traefik label validation
+
+### Stack-Specific Check
+
+```bash
+cf check jellyfin
+```
+
+Shows which hosts can run the stack (have required mounts/networks).
+
+## Example Configurations
+
+### Minimal
+
+```yaml
+compose_dir: /opt/compose
+
+hosts:
+  server: 192.168.1.10
+
+stacks:
+  myapp: server
+```
+
+### Home Lab
+
+```yaml
+compose_dir: /opt/compose
+
+hosts:
+  nuc:
+    address: 192.168.1.10
+    user: docker
+  nas:
+    address: 192.168.1.100
+    user: admin
+
+stacks:
+  # Media
+  plex: nuc
+  sonarr: nuc
+  radarr: nuc
+
+  # Infrastructure
+  traefik: nuc
+  portainer: nuc
+
+  # Monitoring (on all hosts)
+  dozzle: all
+```
+
+### Production
+
+```yaml
+compose_dir: /opt/compose
+network: production
+traefik_file: /opt/traefik/dynamic.d/cf.yml
+traefik_stack: traefik
+
+hosts:
+  web-1:
+    address: 10.0.1.10
+    user: deploy
+  web-2:
+    address: 10.0.1.11
+    user: deploy
+  db:
+    address: 10.0.1.20
+    user: deploy
+
+stacks:
+  # Load balanced
+  api: [web-1, web-2]
+
+  # Single instance
+  postgres: db
+  redis: db
+
+  # Infrastructure
+  traefik: web-1
+
+  # Monitoring
+  promtail: all
+```
--- a/docs/demos/README.md
+++ b/docs/demos/README.md
@@ -0,0 +1,17 @@
+# Demo Recordings
+
+Demo recording infrastructure for Compose Farm documentation.
+
+## Structure
+
+```
+docs/demos/
+├── cli/        # VHS-based CLI terminal recordings
+└── web/        # Playwright-based web UI recordings
+```
+
+## Output
+
+All recordings output to `docs/assets/` as WebM (primary) and GIF (fallback).
+
+See subdirectory READMEs for usage.
--- a/docs/demos/cli/README.md
+++ b/docs/demos/cli/README.md
@@ -0,0 +1,32 @@
+# CLI Demo Recordings
+
+VHS-based terminal demo recordings for Compose Farm CLI.
+
+## Requirements
+
+- [VHS](https://github.com/charmbracelet/vhs): `go install github.com/charmbracelet/vhs@latest`
+
+## Usage
+
+```bash
+# Record all demos
+./docs/demos/cli/record.sh
+
+# Record single demo
+cd /opt/stacks && vhs docs/demos/cli/quickstart.tape
+```
+
+## Demos
+
+| Tape | Description |
+|------|-------------|
+| `install.tape` | Installing with `uv tool install` |
+| `quickstart.tape` | `cf ps`, `cf up`, `cf logs` |
+| `logs.tape` | Viewing logs |
+| `update.tape` | `cf update` |
+| `migration.tape` | Service migration |
+| `apply.tape` | `cf apply` |
+
+## Output
+
+GIF and WebM files saved to `docs/assets/`.
--- a/docs/demos/cli/apply.tape
+++ b/docs/demos/cli/apply.tape
@@ -0,0 +1,39 @@
+# Apply Demo
+# Shows cf apply previewing and reconciling state
+
+Output docs/assets/apply.gif
+Output docs/assets/apply.webm
+
+Set Shell "bash"
+Set FontSize 14
+Set Width 900
+Set Height 600
+Set Theme "Catppuccin Mocha"
+Set TypingSpeed 50ms
+
+Type "# Preview what would change"
+Enter
+Sleep 500ms
+
+Type "cf apply --dry-run"
+Enter
+Wait
+
+Type "# Check current status"
+Enter
+Sleep 500ms
+
+Type "cf stats"
+Enter
+Wait+Screen /Summary/
+Sleep 2s
+
+Type "# Apply the changes"
+Enter
+Sleep 500ms
+
+Type "cf apply"
+Enter
+# Wait for shell prompt (command complete)
+Wait
+Sleep 4s
--- a/docs/demos/cli/install.tape
+++ b/docs/demos/cli/install.tape
@@ -0,0 +1,42 @@
+# Installation Demo
+# Shows installing compose-farm with uv
+
+Output docs/assets/install.gif
+Output docs/assets/install.webm
+
+Set Shell "bash"
+Set FontSize 14
+Set Width 900
+Set Height 600
+Set Theme "Catppuccin Mocha"
+Set TypingSpeed 50ms
+Env FORCE_COLOR "1"
+
+Hide
+Type "export PATH=$HOME/.local/bin:$PATH && uv tool uninstall compose-farm 2>/dev/null; clear"
+Enter
+Show
+Type "# Install with uv (recommended)"
+Enter
+Sleep 500ms
+
+Type "uv tool install compose-farm"
+Enter
+Wait+Screen /Installed|already installed/
+
+Type "# Verify installation"
+Enter
+Sleep 500ms
+
+Type "cf --version"
+Enter
+Wait+Screen /compose-farm/
+Sleep 1s
+
+Type "cf --help | less"
+Enter
+Sleep 2s
+PageDown
+Sleep 2s
+Type "q"
+Sleep 2s
--- a/docs/demos/cli/logs.tape
+++ b/docs/demos/cli/logs.tape
@@ -0,0 +1,21 @@
+# Logs Demo
+# Shows viewing stack logs
+
+Output docs/assets/logs.gif
+Output docs/assets/logs.webm
+
+Set Shell "bash"
+Set FontSize 14
+Set Width 900
+Set Height 550
+Set Theme "Catppuccin Mocha"
+Set TypingSpeed 50ms
+
+Type "# View recent logs"
+Enter
+Sleep 500ms
+
+Type "cf logs immich --tail 20"
+Enter
+Wait+Screen /immich/
+Sleep 2s
--- a/docs/demos/cli/migration.tape
+++ b/docs/demos/cli/migration.tape
@@ -0,0 +1,71 @@
+# Migration Demo
+# Shows automatic stack migration when host changes
+
+Output docs/assets/migration.gif
+Output docs/assets/migration.webm
+
+Set Shell "bash"
+Set FontSize 14
+Set Width 1000
+Set Height 600
+Set Theme "Catppuccin Mocha"
+Set TypingSpeed 50ms
+
+Type "# Current status: audiobookshelf on 'nas'"
+Enter
+Sleep 500ms
+
+Type "cf ps audiobookshelf"
+Enter
+Wait+Screen /PORTS/
+
+Type "# Edit config to move it to 'anton'"
+Enter
+Sleep 1s
+
+Type "nvim /opt/stacks/compose-farm.yaml"
+Enter
+Wait+Screen /stacks:/
+
+# Search for audiobookshelf
+Type "/audiobookshelf"
+Enter
+Sleep 1s
+
+# Move to the host value (nas) and change it
+Type "f:"
+Sleep 500ms
+Type "w"
+Sleep 500ms
+Type "ciw"
+Sleep 500ms
+Type "anton"
+Escape
+Sleep 1s
+
+# Save and quit
+Type ":wq"
+Enter
+Sleep 1s
+
+Type "# Run up - automatically migrates!"
+Enter
+Sleep 500ms
+
+Type "cf up audiobookshelf"
+Enter
+# Wait for migration phases: first the stop on old host
+Wait+Screen /Migrating|down/
+# Then wait for start on new host
+Wait+Screen /Starting|up/
+# Finally wait for completion
+Wait
+
+Type "# Verify: audiobookshelf now on 'anton'"
+Enter
+Sleep 500ms
+
+Type "cf ps audiobookshelf"
+Enter
+Wait+Screen /PORTS/
+Sleep 3s
--- a/docs/demos/cli/quickstart.tape
+++ b/docs/demos/cli/quickstart.tape
@@ -0,0 +1,91 @@
+# Quick Start Demo
+# Shows basic cf commands
+
+Output docs/assets/quickstart.gif
+Output docs/assets/quickstart.webm
+
+Set Shell "bash"
+Set FontSize 14
+Set Width 900
+Set Height 600
+Set Theme "Catppuccin Mocha"
+Set FontFamily "FiraCode Nerd Font"
+Set TypingSpeed 50ms
+Env BAT_PAGING "always"
+
+Type "# Config is just: stack  host"
+Enter
+Sleep 500ms
+
+Type "# First, define your hosts..."
+Enter
+Sleep 500ms
+
+Type "bat -r 1:11 compose-farm.yaml"
+Enter
+Sleep 3s
+Type "q"
+Sleep 500ms
+
+Type "# Then map each stack to a host"
+Enter
+Sleep 500ms
+
+Type "bat -r 13:30 compose-farm.yaml"
+Enter
+Sleep 3s
+Type "q"
+Sleep 500ms
+
+Type "# Check stack status"
+Enter
+Sleep 500ms
+
+Type "cf ps immich"
+Enter
+Wait+Screen /PORTS/
+
+Type "# Start a stack"
+Enter
+Sleep 500ms
+
+Type "cf up immich"
+Enter
+Wait
+
+Type "# View logs"
+Enter
+Sleep 500ms
+
+Type "cf logs immich --tail 5"
+Enter
+Wait+Screen /immich/
+Sleep 2s
+
+Type "#  The magic: move between hosts (nas  anton)"
+Enter
+Sleep 500ms
+
+Type "# Change host in config (using sed)"
+Enter
+Sleep 500ms
+
+Type "sed -i 's/audiobookshelf: nas/audiobookshelf: anton/' compose-farm.yaml"
+Enter
+Sleep 500ms
+
+Type "# Apply changes - auto-migrates!"
+Enter
+Sleep 500ms
+
+Type "cf apply"
+Enter
+Sleep 15s
+
+Type "# Verify: now on anton"
+Enter
+Sleep 500ms
+
+Type "cf ps audiobookshelf"
+Enter
+Sleep 5s
--- a/docs/demos/cli/record.sh
+++ b/docs/demos/cli/record.sh
@@ -0,0 +1,89 @@
+#!/usr/bin/env bash
+# Record all VHS demos
+# Run this on a Docker host with compose-farm configured
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+DEMOS_DIR="$(dirname "$SCRIPT_DIR")"
+DOCS_DIR="$(dirname "$DEMOS_DIR")"
+REPO_DIR="$(dirname "$DOCS_DIR")"
+OUTPUT_DIR="$DOCS_DIR/assets"
+
+# Colors
+GREEN='\033[0;32m'
+BLUE='\033[0;34m'
+YELLOW='\033[0;33m'
+RED='\033[0;31m'
+NC='\033[0m' # No Color
+
+# Check for VHS
+if ! command -v vhs &> /dev/null; then
+    echo "VHS not found. Install with:"
+    echo "  brew install vhs"
+    echo "  # or"
+    echo "  go install github.com/charmbracelet/vhs@latest"
+    exit 1
+fi
+
+# Ensure output directory exists
+mkdir -p "$OUTPUT_DIR"
+
+# Temp output dir (VHS runs from /opt/stacks, so relative paths go here)
+TEMP_OUTPUT="/opt/stacks/docs/assets"
+mkdir -p "$TEMP_OUTPUT"
+
+# Change to /opt/stacks so cf commands use installed version (not editable install)
+cd /opt/stacks
+
+# Ensure compose-farm.yaml has no uncommitted changes (safety check)
+if ! git diff --quiet compose-farm.yaml; then
+    echo -e "${RED}Error: compose-farm.yaml has uncommitted changes${NC}"
+    echo "Commit or stash your changes before recording demos"
+    exit 1
+fi
+
+echo -e "${BLUE}Recording VHS demos...${NC}"
+echo "Output directory: $OUTPUT_DIR"
+echo ""
+
+# Function to record a tape
+record_tape() {
+    local tape=$1
+    local name=$(basename "$tape" .tape)
+    echo -e "${GREEN}Recording:${NC} $name"
+    if vhs "$tape"; then
+        echo -e "${GREEN}  ✓ Done${NC}"
+    else
+        echo -e "${RED}  ✗ Failed${NC}"
+        return 1
+    fi
+}
+
+# Record demos in logical order
+echo -e "${YELLOW}=== Phase 1: Basic demos ===${NC}"
+record_tape "$SCRIPT_DIR/install.tape"
+record_tape "$SCRIPT_DIR/quickstart.tape"
+record_tape "$SCRIPT_DIR/logs.tape"
+
+echo -e "${YELLOW}=== Phase 2: Update demo ===${NC}"
+record_tape "$SCRIPT_DIR/update.tape"
+
+echo -e "${YELLOW}=== Phase 3: Migration demo ===${NC}"
+record_tape "$SCRIPT_DIR/migration.tape"
+git -C /opt/stacks checkout compose-farm.yaml  # Reset after migration
+
+echo -e "${YELLOW}=== Phase 4: Apply demo ===${NC}"
+record_tape "$SCRIPT_DIR/apply.tape"
+
+# Move GIFs and WebMs from temp location to repo
+echo ""
+echo -e "${BLUE}Moving recordings to repo...${NC}"
+mv "$TEMP_OUTPUT"/*.gif "$OUTPUT_DIR/" 2>/dev/null || true
+mv "$TEMP_OUTPUT"/*.webm "$OUTPUT_DIR/" 2>/dev/null || true
+rmdir "$TEMP_OUTPUT" 2>/dev/null || true
+rmdir "$(dirname "$TEMP_OUTPUT")" 2>/dev/null || true
+
+echo ""
+echo -e "${GREEN}Done!${NC} Recordings saved to $OUTPUT_DIR/"
+ls -la "$OUTPUT_DIR"/*.gif "$OUTPUT_DIR"/*.webm 2>/dev/null || echo "No recordings found (check for errors above)"
--- a/docs/demos/cli/update.tape
+++ b/docs/demos/cli/update.tape
@@ -0,0 +1,32 @@
+# Update Demo
+# Shows updating stacks (pull + build + down + up)
+
+Output docs/assets/update.gif
+Output docs/assets/update.webm
+
+Set Shell "bash"
+Set FontSize 14
+Set Width 900
+Set Height 500
+Set Theme "Catppuccin Mocha"
+Set TypingSpeed 50ms
+
+Type "# Update a single stack"
+Enter
+Sleep 500ms
+
+Type "cf update grocy"
+Enter
+# Wait for command to complete (chain waits for longer timeout)
+Wait+Screen /pull/
+Wait+Screen /grocy/
+Wait@60s
+
+Type "# Check current status"
+Enter
+Sleep 500ms
+
+Type "cf ps grocy"
+Enter
+Wait+Screen /PORTS/
+Sleep 1s
--- a/docs/demos/web/README.md
+++ b/docs/demos/web/README.md
@@ -0,0 +1,45 @@
+# Web UI Demo Recordings
+
+Playwright-based demo recording for Compose Farm web UI.
+
+## Requirements
+
+- Chromium: `playwright install chromium`
+- ffmpeg: `apt install ffmpeg` or `brew install ffmpeg`
+
+## Usage
+
+```bash
+# Record all demos
+python docs/demos/web/record.py
+
+# Record specific demo
+python docs/demos/web/record.py navigation
+```
+
+## Demos
+
+| Demo | Description |
+|------|-------------|
+| `navigation` | Command palette fuzzy search and navigation |
+| `stack` | Stack restart/logs via command palette |
+| `themes` | Theme switching with arrow key preview |
+| `workflow` | Full workflow: filter, navigate, logs, themes |
+| `console` | Console terminal running cf commands |
+| `shell` | Container shell exec with top |
+
+## Output
+
+WebM and GIF files saved to `docs/assets/web-{demo}.{webm,gif}`.
+
+## Files
+
+- `record.py` - Orchestration script
+- `conftest.py` - Playwright fixtures, helper functions
+- `demo_*.py` - Individual demo scripts
+
+## Notes
+
+- Uses real config at `/opt/stacks/compose-farm.yaml`
+- Adjust `pause(page, ms)` calls to control timing
+- Viewport: 1280x720
--- a/docs/demos/web/init.py
+++ b/docs/demos/web/init.py
@@ -0,0 +1 @@
+"""Web UI demo recording scripts."""
--- a/docs/demos/web/conftest.py
+++ b/docs/demos/web/conftest.py
@@ -0,0 +1,224 @@
+"""Shared fixtures for web UI demo recordings.
+
+Based on tests/web/test_htmx_browser.py patterns for consistency.
+"""
+
+from __future__ import annotations
+
+import os
+import re
+import shutil
+import socket
+import threading
+import time
+import urllib.request
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
+from unittest.mock import patch
+
+import pytest
+import uvicorn
+
+from compose_farm.config import Config as CFConfig
+from compose_farm.config import load_config
+from compose_farm.state import load_state as _original_load_state
+from compose_farm.web.app import create_app
+from compose_farm.web.cdn import CDN_ASSETS, ensure_vendor_cache
+
+if TYPE_CHECKING:
+    from collections.abc import Generator
+
+    from playwright.sync_api import BrowserContext, Page, Route
+
+# Stacks to exclude from demo recordings (exact match)
+DEMO_EXCLUDE_STACKS = {"arr"}
+
+
+def _get_filtered_config() -> CFConfig:
+    """Load config but filter out excluded stacks."""
+    config = load_config()
+    filtered_stacks = {
+        name: host for name, host in config.stacks.items() if name not in DEMO_EXCLUDE_STACKS
+    }
+    return CFConfig(
+        compose_dir=config.compose_dir,
+        hosts=config.hosts,
+        stacks=filtered_stacks,
+        traefik_file=config.traefik_file,
+        traefik_stack=config.traefik_stack,
+        config_path=config.config_path,
+    )
+
+
+def _get_filtered_state(config: CFConfig) -> dict[str, str | list[str]]:
+    """Load state but filter out excluded stacks."""
+    state = _original_load_state(config)
+    return {name: host for name, host in state.items() if name not in DEMO_EXCLUDE_STACKS}
+
+
+@pytest.fixture(scope="session")
+def vendor_cache(request: pytest.FixtureRequest) -> Path:
+    """Download CDN assets once and cache to disk for faster recordings."""
+    cache_dir = Path(str(request.config.rootdir)) / ".pytest_cache" / "vendor"
+    return ensure_vendor_cache(cache_dir)
+
+
+@pytest.fixture(scope="session")
+def browser_type_launch_args() -> dict[str, str]:
+    """Configure Playwright to use system Chromium if available."""
+    for name in ["chromium", "chromium-browser", "google-chrome", "chrome"]:
+        path = shutil.which(name)
+        if path:
+            return {"executable_path": path}
+    return {}
+
+
+# Path to real compose-farm config
+REAL_CONFIG_PATH = Path("/opt/stacks/compose-farm.yaml")
+
+
+@pytest.fixture(scope="module")
+def server_url() -> Generator[str, None, None]:
+    """Start demo server using real config (with filtered stacks) and return URL."""
+    os.environ["CF_CONFIG"] = str(REAL_CONFIG_PATH)
+
+    # Patch at source module level so all callers get filtered versions
+    patches = [
+        # Patch load_state at source - all functions calling it get filtered state
+        patch("compose_farm.state.load_state", _get_filtered_state),
+        # Patch get_config where imported
+        patch("compose_farm.web.routes.pages.get_config", _get_filtered_config),
+        patch("compose_farm.web.routes.api.get_config", _get_filtered_config),
+        patch("compose_farm.web.routes.actions.get_config", _get_filtered_config),
+        patch("compose_farm.web.app.get_config", _get_filtered_config),
+        patch("compose_farm.web.ws.get_config", _get_filtered_config),
+    ]
+
+    for p in patches:
+        p.start()
+
+    with socket.socket() as s:
+        s.bind(("127.0.0.1", 0))
+        port = s.getsockname()[1]
+
+    app = create_app()
+    uvicorn_config = uvicorn.Config(app, host="127.0.0.1", port=port, log_level="error")
+    server = uvicorn.Server(uvicorn_config)
+
+    thread = threading.Thread(target=server.run, daemon=True)
+    thread.start()
+
+    url = f"http://127.0.0.1:{port}"
+    server_ready = False
+    for _ in range(50):
+        try:
+            urllib.request.urlopen(url, timeout=0.5)  # noqa: S310
+            server_ready = True
+            break
+        except Exception:
+            time.sleep(0.1)
+
+    if not server_ready:
+        msg = f"Demo server failed to start on {url}"
+        raise RuntimeError(msg)
+
+    yield url
+
+    server.should_exit = True
+    thread.join(timeout=2)
+    os.environ.pop("CF_CONFIG", None)
+
+    for p in patches:
+        p.stop()
+
+
+@pytest.fixture(scope="module")
+def recording_output_dir(tmp_path_factory: pytest.TempPathFactory) -> Path:
+    """Directory for video recordings."""
+    return Path(tmp_path_factory.mktemp("recordings"))
+
+
+@pytest.fixture
+def recording_context(
+    browser: Any,  # pytest-playwright's browser fixture
+    vendor_cache: Path,
+    recording_output_dir: Path,
+) -> Generator[BrowserContext, None, None]:
+    """Browser context with video recording enabled."""
+    context = browser.new_context(
+        viewport={"width": 1280, "height": 720},
+        record_video_dir=str(recording_output_dir),
+        record_video_size={"width": 1280, "height": 720},
+    )
+
+    # Set up CDN interception
+    cache = {url: (vendor_cache / f, ct) for url, (f, ct) in CDN_ASSETS.items()}
+
+    def handle_cdn(route: Route) -> None:
+        url = route.request.url
+        for url_prefix, (filepath, content_type) in cache.items():
+            if url.startswith(url_prefix):
+                route.fulfill(status=200, content_type=content_type, body=filepath.read_bytes())
+                return
+        route.abort("failed")
+
+    context.route(re.compile(r"https://(cdn\.jsdelivr\.net|unpkg\.com)/.*"), handle_cdn)
+
+    yield context
+    context.close()
+
+
+@pytest.fixture
+def recording_page(recording_context: BrowserContext) -> Generator[Page, None, None]:
+    """Page with recording and slow motion enabled."""
+    page = recording_context.new_page()
+    yield page
+    page.close()
+
+
+# Demo helper functions
+
+
+def pause(page: Page, ms: int = 500) -> None:
+    """Pause for visibility in recording."""
+    page.wait_for_timeout(ms)
+
+
+def slow_type(page: Page, selector: str, text: str, delay: int = 100) -> None:
+    """Type with visible delay between keystrokes."""
+    page.type(selector, text, delay=delay)
+
+
+def open_command_palette(page: Page) -> None:
+    """Open command palette with Ctrl+K."""
+    page.keyboard.press("Control+k")
+    page.wait_for_selector("#cmd-palette[open]", timeout=2000)
+    pause(page, 300)
+
+
+def close_command_palette(page: Page) -> None:
+    """Close command palette with Escape."""
+    page.keyboard.press("Escape")
+    page.wait_for_selector("#cmd-palette:not([open])", timeout=2000)
+    pause(page, 200)
+
+
+def wait_for_sidebar(page: Page) -> None:
+    """Wait for sidebar to load with stacks."""
+    page.wait_for_selector("#sidebar-stacks", timeout=5000)
+    pause(page, 300)
+
+
+def navigate_to_stack(page: Page, stack: str) -> None:
+    """Navigate to a stack page via sidebar click."""
+    page.locator("#sidebar-stacks a", has_text=stack).click()
+    page.wait_for_url(f"**/stack/{stack}", timeout=5000)
+    pause(page, 500)
+
+
+def select_command(page: Page, command: str) -> None:
+    """Filter and select a command from the palette."""
+    page.locator("#cmd-input").fill(command)
+    pause(page, 300)
+    page.keyboard.press("Enter")
+    pause(page, 200)
--- a/docs/demos/web/demo_console.py
+++ b/docs/demos/web/demo_console.py
@@ -0,0 +1,73 @@
+"""Demo: Console terminal.
+
+Records a ~30 second demo showing:
+- Navigating to Console page
+- Running cf commands in the terminal
+- Showing the Compose Farm config in Monaco editor
+
+Run: pytest docs/demos/web/demo_console.py -v --no-cov
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import pytest
+from conftest import (
+    pause,
+    slow_type,
+    wait_for_sidebar,
+)
+
+if TYPE_CHECKING:
+    from playwright.sync_api import Page
+
+
+@pytest.mark.browser  # type: ignore[misc]
+def test_demo_console(recording_page: Page, server_url: str) -> None:
+    """Record console terminal demo."""
+    page = recording_page
+
+    # Start on dashboard
+    page.goto(server_url)
+    wait_for_sidebar(page)
+    pause(page, 800)
+
+    # Navigate to Console page via sidebar menu
+    page.locator(".menu a", has_text="Console").click()
+    page.wait_for_url("**/console", timeout=5000)
+    pause(page, 1000)
+
+    # Wait for terminal to be ready (auto-connects)
+    page.wait_for_selector("#console-terminal .xterm", timeout=10000)
+    pause(page, 1500)
+
+    # Run fastfetch first
+    slow_type(page, "#console-terminal .xterm-helper-textarea", "fastfetch", delay=80)
+    pause(page, 300)
+    page.keyboard.press("Enter")
+    pause(page, 2500)  # Wait for output
+
+    # Type cf stats command
+    slow_type(page, "#console-terminal .xterm-helper-textarea", "cf stats", delay=80)
+    pause(page, 300)
+    page.keyboard.press("Enter")
+    pause(page, 3000)  # Wait for output
+
+    # Type cf ps command
+    slow_type(page, "#console-terminal .xterm-helper-textarea", "cf ps grocy", delay=80)
+    pause(page, 300)
+    page.keyboard.press("Enter")
+    pause(page, 2500)  # Wait for output
+
+    # Scroll down to show the Editor section with Compose Farm config
+    editor_section = page.locator(".collapse", has_text="Editor").first
+    editor_section.scroll_into_view_if_needed()
+    pause(page, 800)
+
+    # Wait for Monaco editor to load with config content
+    page.wait_for_selector("#console-editor .monaco-editor", timeout=10000)
+    pause(page, 2500)  # Let viewer see the Compose Farm config file
+
+    # Final pause
+    pause(page, 800)
--- a/docs/demos/web/demo_navigation.py
+++ b/docs/demos/web/demo_navigation.py
@@ -0,0 +1,74 @@
+"""Demo: Command palette navigation.
+
+Records a ~15 second demo showing:
+- Opening command palette with Ctrl+K
+- Fuzzy search filtering
+- Arrow key navigation
+- Stack and page navigation
+
+Run: pytest docs/demos/web/demo_navigation.py -v --no-cov
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import pytest
+from conftest import (
+    open_command_palette,
+    pause,
+    slow_type,
+    wait_for_sidebar,
+)
+
+if TYPE_CHECKING:
+    from playwright.sync_api import Page
+
+
+@pytest.mark.browser  # type: ignore[misc]
+def test_demo_navigation(recording_page: Page, server_url: str) -> None:
+    """Record command palette navigation demo."""
+    page = recording_page
+
+    # Start on dashboard
+    page.goto(server_url)
+    wait_for_sidebar(page)
+    pause(page, 1000)  # Let viewer see dashboard
+
+    # Open command palette with keyboard shortcut
+    open_command_palette(page)
+    pause(page, 500)
+
+    # Type partial stack name for fuzzy search
+    slow_type(page, "#cmd-input", "grocy", delay=120)
+    pause(page, 800)
+
+    # Arrow down to show selection movement
+    page.keyboard.press("ArrowDown")
+    pause(page, 400)
+    page.keyboard.press("ArrowUp")
+    pause(page, 400)
+
+    # Press Enter to navigate to stack
+    page.keyboard.press("Enter")
+    page.wait_for_url("**/stack/grocy", timeout=5000)
+    pause(page, 1500)  # Show stack page
+
+    # Open palette again to navigate elsewhere
+    open_command_palette(page)
+    pause(page, 400)
+
+    # Navigate to another stack (immich) to show more navigation
+    slow_type(page, "#cmd-input", "imm", delay=120)
+    pause(page, 600)
+    page.keyboard.press("Enter")
+    page.wait_for_url("**/stack/immich", timeout=5000)
+    pause(page, 1200)  # Show immich stack page
+
+    # Open palette one more time, navigate back to dashboard
+    open_command_palette(page)
+    slow_type(page, "#cmd-input", "dashb", delay=120)
+    pause(page, 500)
+    page.keyboard.press("Enter")
+    page.wait_for_url(server_url, timeout=5000)
+    pause(page, 1000)  # Final dashboard view
--- a/docs/demos/web/demo_shell.py
+++ b/docs/demos/web/demo_shell.py
@@ -0,0 +1,71 @@
+"""Demo: Container shell exec.
+
+Records a ~25 second demo showing:
+- Navigating to a stack page
+- Clicking Shell button on a container
+- Running top command inside the container
+
+Run: pytest docs/demos/web/demo_shell.py -v --no-cov
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import pytest
+from conftest import (
+    pause,
+    slow_type,
+    wait_for_sidebar,
+)
+
+if TYPE_CHECKING:
+    from playwright.sync_api import Page
+
+
+@pytest.mark.browser  # type: ignore[misc]
+def test_demo_shell(recording_page: Page, server_url: str) -> None:
+    """Record container shell demo."""
+    page = recording_page
+
+    # Start on dashboard
+    page.goto(server_url)
+    wait_for_sidebar(page)
+    pause(page, 800)
+
+    # Navigate to a stack with a running container (grocy)
+    page.locator("#sidebar-stacks a", has_text="grocy").click()
+    page.wait_for_url("**/stack/grocy", timeout=5000)
+    pause(page, 1500)
+
+    # Wait for containers list to load (loaded via HTMX)
+    page.wait_for_selector("#containers-list button", timeout=10000)
+    pause(page, 800)
+
+    # Click Shell button on the first container
+    shell_btn = page.locator("#containers-list button", has_text="Shell").first
+    shell_btn.click()
+    pause(page, 1000)
+
+    # Wait for exec terminal to appear
+    page.wait_for_selector("#exec-terminal .xterm", timeout=10000)
+
+    # Scroll down to make the terminal visible
+    page.locator("#exec-terminal").scroll_into_view_if_needed()
+    pause(page, 2000)
+
+    # Run top command
+    slow_type(page, "#exec-terminal .xterm-helper-textarea", "top", delay=100)
+    pause(page, 300)
+    page.keyboard.press("Enter")
+    pause(page, 4000)  # Let top run for a bit
+
+    # Press q to quit top
+    page.keyboard.press("q")
+    pause(page, 1000)
+
+    # Run another command to show it's interactive
+    slow_type(page, "#exec-terminal .xterm-helper-textarea", "ps aux | head", delay=60)
+    pause(page, 300)
+    page.keyboard.press("Enter")
+    pause(page, 2000)
--- a/docs/demos/web/demo_stack.py
+++ b/docs/demos/web/demo_stack.py
@@ -0,0 +1,96 @@
+"""Demo: Stack actions.
+
+Records a ~30 second demo showing:
+- Navigating to a stack page
+- Viewing compose file in Monaco editor
+- Triggering Restart action via command palette
+- Watching terminal output stream
+- Triggering Logs action
+
+Run: pytest docs/demos/web/demo_stack.py -v --no-cov
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import pytest
+from conftest import (
+    open_command_palette,
+    pause,
+    slow_type,
+    wait_for_sidebar,
+)
+
+if TYPE_CHECKING:
+    from playwright.sync_api import Page
+
+
+@pytest.mark.browser  # type: ignore[misc]
+def test_demo_stack(recording_page: Page, server_url: str) -> None:
+    """Record stack actions demo."""
+    page = recording_page
+
+    # Start on dashboard
+    page.goto(server_url)
+    wait_for_sidebar(page)
+    pause(page, 800)
+
+    # Navigate to grocy via command palette
+    open_command_palette(page)
+    pause(page, 400)
+    slow_type(page, "#cmd-input", "grocy", delay=100)
+    pause(page, 500)
+    page.keyboard.press("Enter")
+    page.wait_for_url("**/stack/grocy", timeout=5000)
+    pause(page, 1000)  # Show stack page
+
+    # Click on Compose File collapse to show the Monaco editor
+    # The collapse uses a checkbox input, click it via the parent collapse div
+    compose_collapse = page.locator(".collapse", has_text="Compose File").first
+    compose_collapse.locator("input[type=checkbox]").click(force=True)
+    pause(page, 500)
+
+    # Wait for Monaco editor to load and show content
+    page.wait_for_selector("#compose-editor .monaco-editor", timeout=10000)
+    pause(page, 2000)  # Let viewer see the compose file
+
+    # Scroll down slightly to show more of the editor
+    page.locator("#compose-editor").scroll_into_view_if_needed()
+    pause(page, 1500)
+
+    # Close the compose file section
+    compose_collapse.locator("input[type=checkbox]").click(force=True)
+    pause(page, 500)
+
+    # Open command palette for stack actions
+    open_command_palette(page)
+    pause(page, 400)
+
+    # Filter to Restart action
+    slow_type(page, "#cmd-input", "restart", delay=120)
+    pause(page, 600)
+
+    # Execute Restart
+    page.keyboard.press("Enter")
+    pause(page, 300)
+
+    # Wait for terminal to expand and show output
+    page.wait_for_selector("#terminal-output .xterm", timeout=5000)
+    pause(page, 2500)  # Let viewer see terminal streaming
+
+    # Open palette again for Logs
+    open_command_palette(page)
+    pause(page, 400)
+
+    # Filter to Logs action
+    slow_type(page, "#cmd-input", "logs", delay=120)
+    pause(page, 600)
+
+    # Execute Logs
+    page.keyboard.press("Enter")
+    pause(page, 300)
+
+    # Show log output
+    page.wait_for_selector("#terminal-output .xterm", timeout=5000)
+    pause(page, 2500)  # Final view of logs
--- a/docs/demos/web/demo_themes.py
+++ b/docs/demos/web/demo_themes.py
@@ -0,0 +1,81 @@
+"""Demo: Theme switching.
+
+Records a ~15 second demo showing:
+- Opening theme picker via theme button
+- Live theme preview on arrow navigation
+- Selecting different themes
+- Theme persistence
+
+Run: pytest docs/demos/web/demo_themes.py -v --no-cov
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import pytest
+from conftest import (
+    pause,
+    slow_type,
+    wait_for_sidebar,
+)
+
+if TYPE_CHECKING:
+    from playwright.sync_api import Page
+
+
+@pytest.mark.browser  # type: ignore[misc]
+def test_demo_themes(recording_page: Page, server_url: str) -> None:
+    """Record theme switching demo."""
+    page = recording_page
+
+    # Start on dashboard
+    page.goto(server_url)
+    wait_for_sidebar(page)
+    pause(page, 1000)  # Show initial theme
+
+    # Click theme button to open theme picker
+    page.locator("#theme-btn").click()
+    page.wait_for_selector("#cmd-palette[open]", timeout=2000)
+    pause(page, 600)
+
+    # Arrow through many themes to show live preview effect
+    for _ in range(12):
+        page.keyboard.press("ArrowDown")
+        pause(page, 350)  # Show each preview
+
+    # Go back up through a few (land on valentine, not cyberpunk)
+    for _ in range(4):
+        page.keyboard.press("ArrowUp")
+        pause(page, 350)
+
+    # Select current theme with Enter
+    page.keyboard.press("Enter")
+    pause(page, 1000)
+
+    # Close palette with Escape
+    page.keyboard.press("Escape")
+    pause(page, 800)
+
+    # Open again and use search to find specific theme
+    page.locator("#theme-btn").click()
+    page.wait_for_selector("#cmd-palette[open]", timeout=2000)
+    pause(page, 400)
+
+    # Type to filter to a light theme (theme button pre-populates "theme:")
+    slow_type(page, "#cmd-input", "cup", delay=100)
+    pause(page, 500)
+    page.keyboard.press("Enter")
+    pause(page, 1000)
+
+    # Close and return to dark
+    page.keyboard.press("Escape")
+    pause(page, 500)
+    page.locator("#theme-btn").click()
+    page.wait_for_selector("#cmd-palette[open]", timeout=2000)
+    pause(page, 300)
+
+    slow_type(page, "#cmd-input", "dark", delay=100)
+    pause(page, 400)
+    page.keyboard.press("Enter")
+    pause(page, 800)
--- a/docs/demos/web/demo_workflow.py
+++ b/docs/demos/web/demo_workflow.py
@@ -0,0 +1,201 @@
+"""Demo: Full workflow.
+
+Records a comprehensive demo (~60 seconds) combining all major features:
+1. Console page: terminal with fastfetch, cf pull command
+2. Editor showing Compose Farm YAML config
+3. Command palette navigation to grocy stack
+4. Stack actions: up, logs
+5. Switch to mealie stack via command palette, run update
+6. Dashboard overview
+7. Theme cycling via command palette
+
+This demo is used on the homepage and Web UI page as the main showcase.
+
+Run: pytest docs/demos/web/demo_workflow.py -v --no-cov
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import pytest
+from conftest import open_command_palette, pause, slow_type, wait_for_sidebar
+
+if TYPE_CHECKING:
+    from playwright.sync_api import Page
+
+
+def _demo_console_terminal(page: Page, server_url: str) -> None:
+    """Demo part 1: Console page with terminal and editor."""
+    # Start on dashboard briefly
+    page.goto(server_url)
+    wait_for_sidebar(page)
+    pause(page, 800)
+
+    # Navigate to Console page via command palette
+    open_command_palette(page)
+    pause(page, 300)
+    slow_type(page, "#cmd-input", "cons", delay=100)
+    pause(page, 400)
+    page.keyboard.press("Enter")
+    page.wait_for_url("**/console", timeout=5000)
+    pause(page, 800)
+
+    # Wait for terminal to be ready
+    page.wait_for_selector("#console-terminal .xterm", timeout=10000)
+    pause(page, 1000)
+
+    # Run fastfetch first
+    slow_type(page, "#console-terminal .xterm-helper-textarea", "fastfetch", delay=60)
+    pause(page, 200)
+    page.keyboard.press("Enter")
+    pause(page, 2000)  # Wait for output
+
+    # Run cf pull on a stack to show Compose Farm in action
+    slow_type(page, "#console-terminal .xterm-helper-textarea", "cf pull grocy", delay=60)
+    pause(page, 200)
+    page.keyboard.press("Enter")
+    pause(page, 3000)  # Wait for pull output
+
+
+def _demo_config_editor(page: Page) -> None:
+    """Demo part 2: Show the Compose Farm config in editor."""
+    # Smoothly scroll down to show the Editor section
+    # Use JavaScript for smooth scrolling animation
+    page.evaluate("""
+        const editor = document.getElementById('console-editor');
+        if (editor) {
+            editor.scrollIntoView({ behavior: 'smooth', block: 'center' });
+        }
+    """)
+    pause(page, 1200)  # Wait for smooth scroll animation
+
+    # Wait for Monaco editor to load with config content
+    page.wait_for_selector("#console-editor .monaco-editor", timeout=10000)
+    pause(page, 2000)  # Let viewer see the Compose Farm config file
+
+
+def _demo_stack_actions(page: Page) -> None:
+    """Demo part 3: Navigate to stack and run actions."""
+    # Click on sidebar to take focus away from terminal, then use command palette
+    page.locator("#sidebar-stacks").click()
+    pause(page, 300)
+
+    # Navigate to grocy via command palette
+    open_command_palette(page)
+    pause(page, 300)
+    slow_type(page, "#cmd-input", "grocy", delay=100)
+    pause(page, 400)
+    page.keyboard.press("Enter")
+    page.wait_for_url("**/stack/grocy", timeout=5000)
+    pause(page, 1000)
+
+    # Open Compose File editor to show the compose.yaml
+    compose_collapse = page.locator(".collapse", has_text="Compose File").first
+    compose_collapse.locator("input[type=checkbox]").click(force=True)
+    pause(page, 500)
+
+    # Wait for Monaco editor to load and show content
+    page.wait_for_selector("#compose-editor .monaco-editor", timeout=10000)
+    pause(page, 2000)  # Let viewer see the compose file
+
+    # Close the compose file section
+    compose_collapse.locator("input[type=checkbox]").click(force=True)
+    pause(page, 500)
+
+    # Run Up action via command palette
+    open_command_palette(page)
+    pause(page, 300)
+    slow_type(page, "#cmd-input", "up", delay=100)
+    pause(page, 400)
+    page.keyboard.press("Enter")
+    pause(page, 200)
+
+    # Wait for terminal output
+    page.wait_for_selector("#terminal-output .xterm", timeout=5000)
+    pause(page, 2500)
+
+    # Show logs
+    open_command_palette(page)
+    pause(page, 300)
+    slow_type(page, "#cmd-input", "logs", delay=100)
+    pause(page, 400)
+    page.keyboard.press("Enter")
+    pause(page, 200)
+
+    page.wait_for_selector("#terminal-output .xterm", timeout=5000)
+    pause(page, 2500)
+
+    # Switch to mealie via command palette
+    open_command_palette(page)
+    pause(page, 300)
+    slow_type(page, "#cmd-input", "mealie", delay=100)
+    pause(page, 400)
+    page.keyboard.press("Enter")
+    page.wait_for_url("**/stack/mealie", timeout=5000)
+    pause(page, 1000)
+
+    # Run update action
+    open_command_palette(page)
+    pause(page, 300)
+    slow_type(page, "#cmd-input", "upda", delay=100)
+    pause(page, 400)
+    page.keyboard.press("Enter")
+    pause(page, 200)
+
+    page.wait_for_selector("#terminal-output .xterm", timeout=5000)
+    pause(page, 2500)
+
+
+def _demo_dashboard_and_themes(page: Page, server_url: str) -> None:
+    """Demo part 4: Dashboard and theme cycling."""
+    # Navigate to dashboard via command palette
+    open_command_palette(page)
+    pause(page, 300)
+    slow_type(page, "#cmd-input", "dash", delay=100)
+    pause(page, 400)
+    page.keyboard.press("Enter")
+    page.wait_for_url(server_url, timeout=5000)
+    pause(page, 800)
+
+    # Scroll to top of page to ensure dashboard is fully visible
+    page.evaluate("window.scrollTo(0, 0)")
+    pause(page, 600)
+
+    # Open theme picker and arrow down to Luxury (shows live preview)
+    # Theme order: light, dark, cupcake, bumblebee, emerald, corporate, synthwave,
+    # retro, cyberpunk, valentine, halloween, garden, forest, aqua, lofi, pastel,
+    # fantasy, wireframe, black, luxury (index 19)
+    page.locator("#theme-btn").click()
+    page.wait_for_selector("#cmd-palette[open]", timeout=2000)
+    pause(page, 400)
+
+    # Arrow down through themes with live preview until we reach Luxury
+    for _ in range(19):
+        page.keyboard.press("ArrowDown")
+        pause(page, 180)
+
+    # Select Luxury theme
+    pause(page, 400)
+    page.keyboard.press("Enter")
+    pause(page, 1000)
+
+    # Return to dark theme
+    page.locator("#theme-btn").click()
+    page.wait_for_selector("#cmd-palette[open]", timeout=2000)
+    pause(page, 300)
+    slow_type(page, "#cmd-input", " dark", delay=80)
+    pause(page, 400)
+    page.keyboard.press("Enter")
+    pause(page, 1000)
+
+
+@pytest.mark.browser  # type: ignore[misc]
+def test_demo_workflow(recording_page: Page, server_url: str) -> None:
+    """Record full workflow demo."""
+    page = recording_page
+
+    _demo_console_terminal(page, server_url)
+    _demo_config_editor(page)
+    _demo_stack_actions(page)
+    _demo_dashboard_and_themes(page, server_url)
--- a/docs/demos/web/record.py
+++ b/docs/demos/web/record.py
@@ -0,0 +1,260 @@
+#!/usr/bin/env python3
+"""Record all web UI demos.
+
+This script orchestrates recording of web UI demos using Playwright,
+then converts the WebM recordings to GIF format.
+
+Usage:
+    python docs/demos/web/record.py           # Record all demos
+    python docs/demos/web/record.py navigation  # Record specific demo
+
+Requirements:
+    - Playwright with Chromium: playwright install chromium
+    - ffmpeg for GIF conversion: apt install ffmpeg / brew install ffmpeg
+"""
+
+from __future__ import annotations
+
+import os
+import re
+import shutil
+import subprocess
+import sys
+from pathlib import Path
+
+from rich.console import Console
+
+console = Console()
+
+SCRIPT_DIR = Path(__file__).parent
+REPO_DIR = SCRIPT_DIR.parent.parent.parent
+OUTPUT_DIR = REPO_DIR / "docs" / "assets"
+
+DEMOS = [
+    "navigation",
+    "stack",
+    "themes",
+    "workflow",
+    "console",
+    "shell",
+]
+
+# High-quality ffmpeg settings for VP8 encoding
+# See: https://github.com/microsoft/playwright/issues/10855
+# See: https://github.com/microsoft/playwright/issues/31424
+#
+# MAX_QUALITY: Lossless-like, largest files
+# BALANCED_QUALITY: ~43% file size, nearly indistinguishable quality
+MAX_QUALITY_ARGS = "-c:v vp8 -qmin 0 -qmax 0 -crf 0 -deadline best -speed 0 -b:v 0 -threads 0"
+BALANCED_QUALITY_ARGS = "-c:v vp8 -qmin 0 -qmax 10 -crf 4 -deadline best -speed 0 -b:v 0 -threads 0"
+
+# Choose which quality to use
+VIDEO_QUALITY_ARGS = MAX_QUALITY_ARGS
+
+
+def patch_playwright_video_quality() -> None:
+    """Patch Playwright's videoRecorder.js to use high-quality encoding settings."""
+    from playwright._impl._driver import compute_driver_executable  # noqa: PLC0415
+
+    # compute_driver_executable returns (node_path, cli_path)
+    result = compute_driver_executable()
+    node_path = result[0] if isinstance(result, tuple) else result
+    driver_path = Path(node_path).parent
+
+    video_recorder = driver_path / "package" / "lib" / "server" / "chromium" / "videoRecorder.js"
+
+    if not video_recorder.exists():
+        msg = f"videoRecorder.js not found at {video_recorder}"
+        raise FileNotFoundError(msg)
+
+    content = video_recorder.read_text()
+
+    # Check if already patched
+    if "deadline best" in content:
+        return  # Already patched
+
+    # Pattern to match the ffmpeg args line
+    pattern = (
+        r"-c:v vp8 -qmin \d+ -qmax \d+ -crf \d+ -deadline \w+ -speed \d+ -b:v \w+ -threads \d+"
+    )
+
+    if not re.search(pattern, content):
+        msg = "Could not find ffmpeg args pattern in videoRecorder.js"
+        raise ValueError(msg)
+
+    # Replace with high-quality settings
+    new_content = re.sub(pattern, VIDEO_QUALITY_ARGS, content)
+    video_recorder.write_text(new_content)
+    console.print("[green]Patched Playwright for high-quality video recording[/green]")
+
+
+def record_demo(name: str) -> Path | None:
+    """Run a single demo and return the video path."""
+    console.print(f"[green]Recording:[/green] web-{name}")
+
+    demo_file = SCRIPT_DIR / f"demo_{name}.py"
+    if not demo_file.exists():
+        console.print(f"[red]  Demo file not found: {demo_file}[/red]")
+        return None
+
+    # Create temp output dir for this recording
+    temp_dir = SCRIPT_DIR / ".recordings"
+    temp_dir.mkdir(exist_ok=True)
+
+    # Run pytest with video recording
+    # Set PYTHONPATH so conftest.py imports work
+    env = {**os.environ, "PYTHONPATH": str(SCRIPT_DIR)}
+    result = subprocess.run(
+        [
+            sys.executable,
+            "-m",
+            "pytest",
+            str(demo_file),
+            "-v",
+            "--no-cov",
+            "-x",  # Stop on first failure
+            f"--basetemp={temp_dir}",
+        ],
+        check=False,
+        cwd=REPO_DIR,
+        capture_output=True,
+        text=True,
+        env=env,
+    )
+
+    if result.returncode != 0:
+        console.print(f"[red]  Failed to record {name}[/red]")
+        console.print(result.stdout)
+        console.print(result.stderr)
+        return None
+
+    # Find the recorded video
+    videos = list(temp_dir.rglob("*.webm"))
+    if not videos:
+        console.print(f"[red]  No video found for {name}[/red]")
+        return None
+
+    # Use the most recent video
+    video = max(videos, key=lambda p: p.stat().st_mtime)
+    console.print(f"[green]  Recorded: {video.name}[/green]")
+    return video
+
+
+def convert_to_gif(webm_path: Path, output_name: str) -> Path:
+    """Convert WebM to GIF using ffmpeg with palette optimization."""
+    gif_path = OUTPUT_DIR / f"{output_name}.gif"
+    palette_path = webm_path.parent / "palette.png"
+
+    # Two-pass approach for better quality
+    # Pass 1: Generate palette
+    subprocess.run(
+        [  # noqa: S607
+            "ffmpeg",
+            "-y",
+            "-i",
+            str(webm_path),
+            "-vf",
+            "fps=10,scale=1280:-1:flags=lanczos,palettegen=stats_mode=diff",
+            str(palette_path),
+        ],
+        check=True,
+        capture_output=True,
+    )
+
+    # Pass 2: Generate GIF with palette
+    subprocess.run(
+        [  # noqa: S607
+            "ffmpeg",
+            "-y",
+            "-i",
+            str(webm_path),
+            "-i",
+            str(palette_path),
+            "-lavfi",
+            "fps=10,scale=1280:-1:flags=lanczos[x];[x][1:v]paletteuse=dither=bayer:bayer_scale=5:diff_mode=rectangle",
+            str(gif_path),
+        ],
+        check=True,
+        capture_output=True,
+    )
+
+    palette_path.unlink(missing_ok=True)
+    return gif_path
+
+
+def move_recording(video_path: Path, name: str) -> tuple[Path, Path]:
+    """Move WebM and convert to GIF, returning both paths."""
+    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
+
+    output_name = f"web-{name}"
+    webm_dest = OUTPUT_DIR / f"{output_name}.webm"
+
+    shutil.copy2(video_path, webm_dest)
+    console.print(f"[blue]  WebM: {webm_dest.relative_to(REPO_DIR)}[/blue]")
+
+    gif_path = convert_to_gif(video_path, output_name)
+    console.print(f"[blue]  GIF:  {gif_path.relative_to(REPO_DIR)}[/blue]")
+
+    return webm_dest, gif_path
+
+
+def cleanup() -> None:
+    """Clean up temporary recording files."""
+    temp_dir = SCRIPT_DIR / ".recordings"
+    if temp_dir.exists():
+        shutil.rmtree(temp_dir)
+
+
+def main() -> int:
+    """Record all web UI demos."""
+    console.print("[blue]Recording web UI demos...[/blue]")
+    console.print(f"Output directory: {OUTPUT_DIR}")
+    console.print()
+
+    # Patch Playwright for high-quality video recording
+    patch_playwright_video_quality()
+
+    # Determine which demos to record
+    if len(sys.argv) > 1:
+        demos_to_record = [d for d in sys.argv[1:] if d in DEMOS]
+        if not demos_to_record:
+            console.print(f"[red]Unknown demo(s). Available: {', '.join(DEMOS)}[/red]")
+            return 1
+    else:
+        demos_to_record = DEMOS
+
+    results: dict[str, tuple[Path | None, Path | None]] = {}
+
+    try:
+        for i, demo in enumerate(demos_to_record, 1):
+            console.print(f"[yellow]=== Demo {i}/{len(demos_to_record)}: {demo} ===[/yellow]")
+
+            video_path = record_demo(demo)
+            if video_path:
+                webm, gif = move_recording(video_path, demo)
+                results[demo] = (webm, gif)
+            else:
+                results[demo] = (None, None)
+            console.print()
+    finally:
+        cleanup()
+
+    # Summary
+    console.print("[blue]=== Summary ===[/blue]")
+    success_count = sum(1 for w, _ in results.values() if w is not None)
+    console.print(f"Recorded: {success_count}/{len(demos_to_record)} demos")
+    console.print()
+
+    for demo, (webm, gif) in results.items():  # type: ignore[assignment]
+        status = "[green]OK[/green]" if webm else "[red]FAILED[/red]"
+        console.print(f"  {demo}: {status}")
+        if webm:
+            console.print(f"    {webm.relative_to(REPO_DIR)}")
+        if gif:
+            console.print(f"    {gif.relative_to(REPO_DIR)}")
+
+    return 0 if success_count == len(demos_to_record) else 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/docs/dev/docker-swarm-network.md
+++ b/docs/dev/docker-swarm-network.md
@@ -1,90 +0,0 @@
-# Docker Swarm Overlay Networks with Compose Farm
-
-Notes from testing Docker Swarm's attachable overlay networks as a way to get cross-host container networking while still using `docker compose`.
-
-## The Idea
-
-Docker Swarm overlay networks can be made "attachable", allowing regular `docker compose` containers (not just swarm services) to join them. This would give us:
-
- Cross-host Docker DNS (containers find each other by name)
- No need to publish ports for inter-container communication
- Keep using `docker compose up` instead of `docker stack deploy`
-
-## Setup Steps
-
-```bash
-# On manager node
-docker swarm init --advertise-addr <manager-ip>
-
-# On worker nodes (use token from init output)
-docker swarm join --token <token> <manager-ip>:2377
-
-# Create attachable overlay network (on manager)
-docker network create --driver overlay --attachable my-network
-
-# In compose files, add the network
-networks:
-  my-network:
-    external: true
-```
-
-## Required Ports
-
-Docker Swarm requires these ports open **bidirectionally** between all nodes:
-
-| Port | Protocol | Purpose |
-|------|----------|---------|
-| 2377 | TCP | Cluster management |
-| 7946 | TCP + UDP | Node communication |
-| 4789 | UDP | Overlay network traffic (VXLAN) |
-
-## Test Results (2024-12-13)
-
- docker-debian (192.168.1.66) as manager
- dev-lxc (192.168.1.167) as worker
-
-### What worked
-
- Swarm init and join
- Overlay network creation
- Nodes showed as Ready
-
-### What failed
-
- Container on dev-lxc couldn't attach to overlay network
- Error: `attaching to network failed... context deadline exceeded`
- Cause: Port 7946 blocked from docker-debian → dev-lxc
-
-### Root cause
-
-Firewall on dev-lxc wasn't configured to allow swarm ports. Opening these ports requires sudo access on each node.
-
-## Conclusion
-
-Docker Swarm overlay networks are **not plug-and-play**. Requirements:
-
-1. Swarm init/join on all nodes
-2. Firewall rules on all nodes (needs sudo/root)
-3. All nodes must have bidirectional connectivity on 3 ports
-
-For a simpler alternative, consider:
-
- **Tailscale**: VPN mesh, containers use host's Tailscale IP
- **Host networking + published ports**: What compose-farm does today
- **Keep dependent services together**: Avoid cross-host networking entirely
-
-## Future Work
-
-If we decide to support overlay networks:
-
-1. Add a `compose-farm network create` command that:
-   - Initializes swarm if needed
-   - Creates attachable overlay network
-   - Documents required firewall rules
-
-2. Add network config to compose-farm.yaml:
-   ```yaml
-   overlay_network: compose-farm-net
-   ```
-
-3. Auto-inject network into compose files (or document manual setup)
--- a/docs/dev/future-improvements.md
+++ b/docs/dev/future-improvements.md
@@ -1,128 +0,0 @@
-# Future Improvements
-
-Low-priority improvements identified during code review. These are not currently causing issues but could be addressed if they become pain points.
-
-## 1. State Module Efficiency (LOW)
-
-**Current:** Every state operation reads and writes the entire file.
-
-```python
-def set_service_host(config, service, host):
-    state = load_state(config)   # Read file
-    state[service] = host
-    save_state(config, state)    # Write file
-```
-
-**Impact:** With 87 services, this is fine. With 1000+, it would be slow.
-
-**Potential fix:** Add batch operations:
-```python
-def update_state(config, updates: dict[str, str | None]) -> None:
-    """Batch update: set services to hosts, None means remove."""
-    state = load_state(config)
-    for service, host in updates.items():
-        if host is None:
-            state.pop(service, None)
-        else:
-            state[service] = host
-    save_state(config, state)
-```
-
-**When to do:** Only if state operations become noticeably slow.
-
---
-
-## 2. Remote-Aware Compose Path Resolution (LOW)
-
-**Current:** `config.get_compose_path()` checks if files exist on the local filesystem:
-
-```python
-def get_compose_path(self, service: str) -> Path:
-    for filename in ("compose.yaml", "compose.yml", ...):
-        candidate = service_dir / filename
-        if candidate.exists():  # Local check!
-            return candidate
-```
-
-**Why this works:** NFS/shared storage means local = remote.
-
-**Why it could break:** If running compose-farm from a machine without the NFS mount, it returns `compose.yaml` (the default) even if `docker-compose.yml` exists on the remote host.
-
-**Potential fix:** Query the remote host for file existence, or accept this limitation and document it.
-
-**When to do:** Only if users need to run compose-farm from non-NFS machines.
-
---
-
-## 3. Add Integration Tests for CLI Commands (MEDIUM)
-
-**Current:** No integration tests for the actual CLI commands. Tests cover the underlying functions but not the Typer commands themselves.
-
-**Potential fix:** Add integration tests using `CliRunner` from Typer:
-
-```python
-from typer.testing import CliRunner
-from compose_farm.cli import app
-
-runner = CliRunner()
-
-def test_check_command_validates_config():
-    result = runner.invoke(app, ["check", "--local"])
-    assert result.exit_code == 0
-```
-
-**When to do:** When CLI behavior becomes complex enough to warrant dedicated testing.
-
---
-
-## 4. Add Tests for operations.py (MEDIUM)
-
-**Current:** Operations module has 30% coverage. Most logic is tested indirectly through test_sync.py.
-
-**Potential fix:** Add dedicated tests for:
- `up_services()` with migration scenarios
- `preflight_check()`
- `check_host_compatibility()`
-
-**When to do:** When adding new operations or modifying migration logic.
-
---
-
-## 5. Consider Structured Logging (LOW)
-
-**Current:** Operations print directly to console using Rich. This couples the operations module to the Rich library.
-
-**Potential fix:** Use Python's logging module with a custom Rich handler:
-
-```python
-import logging
-
-logger = logging.getLogger(__name__)
-
-# In operations:
-logger.info("Migrating %s from %s to %s", service, old_host, new_host)
-
-# In cli.py - configure Rich handler:
-from rich.logging import RichHandler
-logging.basicConfig(handlers=[RichHandler()])
-```
-
-**Benefits:**
- Operations become testable without capturing stdout
- Logs can be redirected to files
- Log levels provide filtering
-
-**When to do:** Only if console output coupling becomes a problem for testing or extensibility.
-
---
-
-## Design Decisions to Keep
-
-These patterns are working well and should be preserved:
-
-1. **asyncio + asyncssh** - Solid async foundation
-2. **Pydantic models** - Clean validation
-3. **Rich for output** - Good UX
-4. **Test structure** - Good coverage
-5. **Module separation** - cli/operations/executor/compose pattern
-6. **KISS principle** - Don't over-engineer
--- a/docs/getting-started.md
+++ b/docs/getting-started.md
@@ -0,0 +1,318 @@
+---
+icon: lucide/rocket
+---
+
+# Getting Started
+
+This guide walks you through installing Compose Farm and setting up your first multi-host deployment.
+
+## Prerequisites
+
+Before you begin, ensure you have:
+
+- **[uv](https://docs.astral.sh/uv/)** (recommended) or Python 3.11+
+- **SSH key-based authentication** to your Docker hosts
+- **Docker and Docker Compose** installed on all target hosts
+- **Shared storage** for compose files (NFS, Syncthing, etc.)
+
+## Installation
+
+<video autoplay loop muted playsinline>
+  <source src="/assets/install.webm" type="video/webm">
+</video>
+
+### One-liner (recommended)
+
+```bash
+curl -fsSL https://compose-farm.nijho.lt/install | sh
+```
+
+This installs [uv](https://docs.astral.sh/uv/) if needed, then installs compose-farm.
+
+### Using uv
+
+If you already have [uv](https://docs.astral.sh/uv/) installed:
+
+```bash
+uv tool install compose-farm
+```
+
+### Using pip
+
+If you already have Python 3.11+ installed:
+
+```bash
+pip install compose-farm
+```
+
+### Using Docker
+
+```bash
+docker run --rm \
+  -v $SSH_AUTH_SOCK:/ssh-agent -e SSH_AUTH_SOCK=/ssh-agent \
+  -v ./compose-farm.yaml:/root/.config/compose-farm/compose-farm.yaml:ro \
+  ghcr.io/basnijholt/compose-farm up --all
+```
+
+### Verify Installation
+
+```bash
+cf --version
+cf --help
+```
+
+## SSH Setup
+
+Compose Farm uses SSH to run commands on remote hosts. You need passwordless SSH access.
+
+### Option 1: SSH Agent (default)
+
+If you already have SSH keys loaded in your agent:
+
+```bash
+# Verify keys are loaded
+ssh-add -l
+
+# Test connection
+ssh user@192.168.1.10 "docker --version"
+```
+
+### Option 2: Dedicated Key (recommended for Docker)
+
+For persistent access when running in Docker:
+
+```bash
+# Generate and distribute key to all hosts
+cf ssh setup
+
+# Check status
+cf ssh status
+```
+
+This creates `~/.ssh/compose-farm/id_ed25519` and copies the public key to each host.
+
+## Shared Storage Setup
+
+Compose files must be accessible at the **same path** on all hosts. Common approaches:
+
+### NFS Mount
+
+```bash
+# On each Docker host
+sudo mount nas:/volume1/compose /opt/compose
+
+# Or add to /etc/fstab
+nas:/volume1/compose /opt/compose nfs defaults 0 0
+```
+
+### Directory Structure
+
+```
+/opt/compose/           # compose_dir in config
+├── plex/
+│   └── docker-compose.yml
+├── sonarr/
+│   └── docker-compose.yml
+├── radarr/
+│   └── docker-compose.yml
+└── jellyfin/
+    └── docker-compose.yml
+```
+
+## Configuration
+
+### Create Config File
+
+Create `compose-farm.yaml` in the directory where you'll run commands. For example, if your stacks are in `/opt/stacks`, place the config there too:
+
+```bash
+cd /opt/stacks
+cf config init
+```
+
+Alternatively, use `~/.config/compose-farm/compose-farm.yaml` for a global config. You can also symlink a working directory config to the global location:
+
+```bash
+# Create config in your stacks directory, symlink to ~/.config
+cf config symlink /opt/stacks/compose-farm.yaml
+```
+
+This way, `cf` commands work from anywhere while the config lives with your stacks.
+
+#### Single host example
+
+```yaml
+# Where compose files are located (one folder per stack)
+compose_dir: /opt/stacks
+
+hosts:
+  local: localhost
+
+stacks:
+  plex: local
+  sonarr: local
+  radarr: local
+```
+
+#### Multi-host example
+```yaml
+# Where compose files are located (same path on all hosts)
+compose_dir: /opt/compose
+
+# Define your Docker hosts
+hosts:
+  nuc:
+    address: 192.168.1.10
+    user: docker           # SSH user
+  hp:
+    address: 192.168.1.11
+    # user defaults to current user
+
+# Map stacks to hosts
+stacks:
+  plex: nuc
+  sonarr: nuc
+  radarr: hp
+```
+
+Each entry in `stacks:` maps to a folder under `compose_dir` that contains a compose file.
+
+For cross-host HTTP routing, add Traefik labels and configure `traefik_file` (see [Traefik Integration](traefik.md)).
+### Validate Configuration
+
+```bash
+cf check --local
+```
+
+This validates syntax without SSH connections. For full validation:
+
+```bash
+cf check
+```
+
+## First Commands
+
+### Check Status
+
+```bash
+cf ps
+```
+
+Shows all configured stacks and their status.
+
+### Start All Stacks
+
+```bash
+cf up --all
+```
+
+Starts all stacks on their assigned hosts.
+
+### Start Specific Stacks
+
+```bash
+cf up plex sonarr
+```
+
+### Apply Configuration
+
+The most powerful command - reconciles reality with your config:
+
+```bash
+cf apply --dry-run   # Preview changes
+cf apply             # Execute changes
+```
+
+This will:
+1. Start stacks in config but not running
+2. Migrate stacks on wrong host
+3. Stop stacks removed from config
+
+## Docker Network Setup
+
+If your stacks use an external Docker network:
+
+```bash
+# Create network on all hosts
+cf init-network
+
+# Or specific hosts
+cf init-network nuc hp
+```
+
+Default network: `mynetwork` with subnet `172.20.0.0/16`
+
+## Example Workflow
+
+### 1. Add a New Stack
+
+Create the compose file:
+
+```bash
+# On any host (shared storage)
+mkdir -p /opt/compose/prowlarr
+cat > /opt/compose/prowlarr/docker-compose.yml << 'EOF'
+services:
+  prowlarr:
+    image: lscr.io/linuxserver/prowlarr:latest
+    container_name: prowlarr
+    environment:
+      - PUID=1000
+      - PGID=1000
+    volumes:
+      - /opt/config/prowlarr:/config
+    ports:
+      - "9696:9696"
+    restart: unless-stopped
+EOF
+```
+
+Add to config:
+
+```yaml
+stacks:
+  # ... existing stacks
+  prowlarr: nuc
+```
+
+Start the stack:
+
+```bash
+cf up prowlarr
+```
+
+### 2. Move a Stack to Another Host
+
+Edit `compose-farm.yaml`:
+
+```yaml
+stacks:
+  plex: hp  # Changed from nuc
+```
+
+Apply the change:
+
+```bash
+cf up plex
+# Automatically: down on nuc, up on hp
+```
+
+Or use apply to reconcile everything:
+
+```bash
+cf apply
+```
+
+### 3. Update All Stacks
+
+```bash
+cf update --all
+# Runs: pull + build + down + up for each stack
+```
+
+## Next Steps
+
+- [Configuration Reference](configuration.md) - All config options
+- [Commands Reference](commands.md) - Full CLI documentation
+- [Traefik Integration](traefik.md) - Multi-host routing
+- [Best Practices](best-practices.md) - Tips and limitations
--- a/docs/index.md
+++ b/docs/index.md
@@ -0,0 +1,167 @@
+---
+icon: lucide/server
+---
+
+# Compose Farm
+
+A minimal CLI tool to run Docker Compose commands across multiple hosts via SSH.
+
+## What is Compose Farm?
+
+Compose Farm lets you manage Docker Compose stacks across multiple machines from a single command line. Think [Dockge](https://dockge.kuma.pet/) but with a CLI and web interface, designed for multi-host deployments.
+
+Define which stacks run where in one YAML file, then use `cf apply` to make reality match your configuration.
+It also works great on a single host with one folder per stack; just map stacks to `localhost`.
+
+## Quick Demo
+
+**CLI:**
+<video autoplay loop muted playsinline>
+  <source src="/assets/quickstart.webm" type="video/webm">
+</video>
+
+**[Web UI](web-ui.md):**
+<video autoplay loop muted playsinline>
+  <source src="/assets/web-workflow.webm" type="video/webm">
+</video>
+
+## Why Compose Farm?
+
+| Problem | Compose Farm Solution |
+|---------|----------------------|
+| 100+ containers on one machine | Distribute across multiple hosts |
+| Kubernetes too complex | Just SSH + docker compose |
+| Swarm in maintenance mode | Zero infrastructure changes |
+| Manual SSH for each host | Single command for all |
+
+**It's a convenience wrapper, not a new paradigm.** Your existing `docker-compose.yml` files work unchanged.
+
+## Quick Start
+
+### Single host
+
+No SSH, shared storage, or Traefik file-provider required.
+
+```yaml
+# compose-farm.yaml
+compose_dir: /opt/stacks
+
+hosts:
+  local: localhost
+
+stacks:
+  plex: local
+  jellyfin: local
+  traefik: local
+```
+
+```bash
+cf apply  # Start/stop stacks to match config
+```
+
+### Multi-host
+
+Requires SSH plus a shared `compose_dir` path on all hosts (NFS or sync).
+
+```yaml
+# compose-farm.yaml
+compose_dir: /opt/compose
+
+hosts:
+  server-1:
+    address: 192.168.1.10
+  server-2:
+    address: 192.168.1.11
+
+stacks:
+  plex: server-1
+  jellyfin: server-2
+  sonarr: server-1
+```
+
+```bash
+cf apply  # Stacks start, migrate, or stop as needed
+```
+
+Each entry in `stacks:` maps to a folder under `compose_dir` that contains a compose file.
+
+For cross-host HTTP routing, add Traefik labels and configure `traefik_file` to generate file-provider config.
+### Installation
+
+```bash
+uv tool install compose-farm
+# or
+pip install compose-farm
+```
+
+### Configuration
+
+Create `compose-farm.yaml` in the directory where you'll run commands (e.g., `/opt/stacks`), or in `~/.config/compose-farm/`:
+
+```yaml
+compose_dir: /opt/compose
+
+hosts:
+  nuc:
+    address: 192.168.1.10
+    user: docker
+  hp:
+    address: 192.168.1.11
+
+stacks:
+  plex: nuc
+  sonarr: nuc
+  radarr: hp
+```
+
+See [Configuration](configuration.md) for all options and the full search order.
+
+### Usage
+
+```bash
+# Make reality match config
+cf apply
+
+# Start specific stacks
+cf up plex sonarr
+
+# Check status
+cf ps
+
+# View logs
+cf logs -f plex
+```
+
+## Key Features
+
+- **Declarative configuration**: One YAML defines where everything runs
+- **Auto-migration**: Change a host assignment, run `cf up`, stack moves automatically
+
+<video autoplay loop muted playsinline>
+  <source src="/assets/migration.webm" type="video/webm">
+</video>
+- **Parallel execution**: Multiple stacks start/stop concurrently
+- **State tracking**: Knows which stacks are running where
+- **Traefik integration**: Generate file-provider config for cross-host routing
+- **Zero changes**: Your compose files work as-is
+
+## Requirements
+
+- [uv](https://docs.astral.sh/uv/) (recommended) or Python 3.11+
+- SSH key-based authentication to your Docker hosts
+- Docker and Docker Compose on all target hosts
+- Shared storage (compose files at same path on all hosts)
+
+## Documentation
+
+- [Getting Started](getting-started.md) - Installation and first steps
+- [Configuration](configuration.md) - All configuration options
+- [Commands](commands.md) - CLI reference
+- [Web UI](web-ui.md) - Browser-based management interface
+- [Architecture](architecture.md) - How it works under the hood
+- [Traefik Integration](traefik.md) - Multi-host routing setup
+- [Best Practices](best-practices.md) - Tips and limitations
+
+## License
+
+MIT
--- a/docs/install
+++ b/docs/install
@@ -0,0 +1,29 @@
+#!/bin/sh
+# Compose Farm bootstrap script
+# Usage: curl -fsSL https://compose-farm.nijho.lt/install | sh
+#
+# This script installs uv (if needed) and then installs compose-farm as a uv tool.
+
+set -e
+
+if ! command -v uv >/dev/null 2>&1; then
+    echo "uv is not installed. Installing..."
+    curl -LsSf https://astral.sh/uv/install.sh | sh
+    echo "uv installation complete!"
+    echo ""
+
+    if [ -x ~/.local/bin/uv ]; then
+        ~/.local/bin/uv tool install compose-farm
+    else
+        echo "Please restart your shell and run this script again"
+        echo ""
+        exit 0
+    fi
+else
+    uv tool install compose-farm
+fi
+
+echo ""
+echo "compose-farm is installed!"
+echo "Run 'cf --help' to get started."
+echo "If 'cf' is not found, restart your shell or run: source ~/.bashrc"
--- a/docs/javascripts/video-fix.js
+++ b/docs/javascripts/video-fix.js
@@ -0,0 +1,21 @@
+// Fix Safari video autoplay issues
+(function() {
+  function initVideos() {
+    document.querySelectorAll('video[autoplay]').forEach(function(video) {
+      video.load();
+      video.play().catch(function() {});
+    });
+  }
+
+  // For initial page load (needed for Chrome)
+  if (document.readyState === 'loading') {
+    document.addEventListener('DOMContentLoaded', initVideos);
+  } else {
+    initVideos();
+  }
+
+  // For MkDocs instant navigation (needed for Safari)
+  if (typeof document$ !== 'undefined') {
+    document$.subscribe(initVideos);
+  }
+})();
--- a/docs/reddit-post.md
+++ b/docs/reddit-post.md
@@ -5,7 +5,7 @@
 - I made a CLI to run Docker Compose across multiple hosts without Kubernetes or Swarm
 ---

-I've been running 100+ Docker Compose stacks on a single machine, and it kept running out of memory. I needed to spread services across multiple hosts, but:
+I've been running 100+ Docker Compose stacks on a single machine, and it kept running out of memory. I needed to spread stacks across multiple hosts, but:

 - **Kubernetes** felt like overkill. I don't need pods, ingress controllers, or 10x more YAML.
 - **Docker Swarm** is basically in maintenance mode.
@@ -15,7 +15,7 @@ So I built **Compose Farm**, a simple CLI that runs `docker compose` commands ov

 ## How it works

-One YAML file maps services to hosts:
+One YAML file maps stacks to hosts:

 ```yaml
 compose_dir: /opt/stacks
@@ -24,7 +24,7 @@ hosts:
  nuc: 192.168.1.10
  hp: 192.168.1.11

-services:
+stacks:
  plex: nuc
  jellyfin: hp
  sonarr: nuc
@@ -43,7 +43,7 @@ cf ps             # shows status across all hosts

 ## Auto-migration

-Change a service's host in the config and run `cf up`. It stops the service on the old host and starts it on the new one. No manual SSH needed.
+Change a stack's host in the config and run `cf up`. It stops the stack on the old host and starts it on the new one. No manual SSH needed.

 ```yaml
 # Before
@@ -65,7 +65,7 @@ cf up plex  # migrates automatically

 ## What it doesn't do

- No high availability (if a host goes down, services don't auto-migrate)
+- No high availability (if a host goes down, stacks don't auto-migrate)
 - No overlay networking (containers on different hosts can't talk via Docker DNS)
 - No health checks or automatic restarts

--- a/docs/traefik.md
+++ b/docs/traefik.md
@@ -0,0 +1,385 @@
+---
+icon: lucide/globe
+---
+
+# Traefik Integration
+
+Compose Farm can generate Traefik file-provider configuration for routing traffic across multiple hosts.
+
+## The Problem
+
+When you run Traefik on one host but stacks on others, Traefik's docker provider can't see remote containers. The file provider bridges this gap.
+
+```
+                    Internet
+                        │
+                        ▼
+┌─────────────────────────────────────────────────────────────┐
+│                     Host: nuc                                │
+│                                                             │
+│  ┌─────────┐                                                │
+│  │ Traefik │◄─── Docker provider sees local containers      │
+│  │         │                                                │
+│  │         │◄─── File provider sees remote stacks           │
+│  └────┬────┘     (from compose-farm.yml)                    │
+│       │                                                     │
+└───────┼─────────────────────────────────────────────────────┘
+        │
+        ├────────────────────┐
+        │                    │
+        ▼                    ▼
+┌───────────────┐    ┌───────────────┐
+│   Host: hp    │    │  Host: nas    │
+│               │    │               │
+│  plex:32400   │    │ jellyfin:8096 │
+└───────────────┘    └───────────────┘
+```
+
+## How It Works
+
+1. Your compose files have standard Traefik labels
+2. Compose Farm reads labels and generates file-provider config
+3. Traefik watches the generated file
+4. Traffic routes to remote stacks via host IP + published port
+
+## Setup
+
+### Step 1: Configure Traefik File Provider
+
+Add directory watching to your Traefik config:
+
+```yaml
+# traefik.yml or docker-compose.yml command
+providers:
+  file:
+    directory: /opt/traefik/dynamic.d
+    watch: true
+```
+
+Or via command line:
+
+```yaml
+services:
+  traefik:
+    command:
+      - --providers.file.directory=/dynamic.d
+      - --providers.file.watch=true
+    volumes:
+      - /opt/traefik/dynamic.d:/dynamic.d:ro
+```
+
+### Step 2: Add Traefik Labels to Services
+
+Your compose files use standard Traefik labels:
+
+```yaml
+# /opt/compose/plex/docker-compose.yml
+services:
+  plex:
+    image: lscr.io/linuxserver/plex
+    ports:
+      - "32400:32400"  # IMPORTANT: Must publish port!
+    labels:
+      - traefik.enable=true
+      - traefik.http.routers.plex.rule=Host(`plex.example.com`)
+      - traefik.http.routers.plex.entrypoints=websecure
+      - traefik.http.routers.plex.tls.certresolver=letsencrypt
+      - traefik.http.services.plex.loadbalancer.server.port=32400
+```
+
+**Important:** Services must publish ports for cross-host routing. Traefik connects via `host_ip:published_port`.
+
+### Step 3: Generate File Provider Config
+
+```bash
+cf traefik-file --all -o /opt/traefik/dynamic.d/compose-farm.yml
+```
+
+This generates:
+
+```yaml
+# /opt/traefik/dynamic.d/compose-farm.yml
+http:
+  routers:
+    plex:
+      rule: Host(`plex.example.com`)
+      entryPoints:
+        - websecure
+      tls:
+        certResolver: letsencrypt
+      service: plex
+  services:
+    plex:
+      loadBalancer:
+        servers:
+          - url: http://192.168.1.11:32400
+```
+
+## Auto-Regeneration
+
+Configure automatic regeneration in `compose-farm.yaml`:
+
+```yaml
+compose_dir: /opt/compose
+traefik_file: /opt/traefik/dynamic.d/compose-farm.yml
+traefik_stack: traefik
+
+hosts:
+  nuc:
+    address: 192.168.1.10
+  hp:
+    address: 192.168.1.11
+
+stacks:
+  traefik: nuc      # Traefik runs here
+  plex: hp          # Routed via file-provider
+  sonarr: hp
+```
+
+With `traefik_file` set, these commands auto-regenerate the config:
+- `cf up`
+- `cf down`
+- `cf restart`
+- `cf update`
+- `cf apply`
+
+### traefik_stack Option
+
+When set, stacks on the **same host as Traefik** are skipped in file-provider output. Traefik's docker provider handles them directly.
+
+```yaml
+traefik_stack: traefik  # traefik runs on nuc
+stacks:
+  traefik: nuc            # NOT in file-provider (docker provider)
+  portainer: nuc          # NOT in file-provider (docker provider)
+  plex: hp                # IN file-provider (cross-host)
+```
+
+## Label Syntax
+
+### Routers
+
+```yaml
+labels:
+  # Basic router
+  - traefik.http.routers.myapp.rule=Host(`app.example.com`)
+  - traefik.http.routers.myapp.entrypoints=websecure
+
+  # With TLS
+  - traefik.http.routers.myapp.tls=true
+  - traefik.http.routers.myapp.tls.certresolver=letsencrypt
+
+  # With middleware
+  - traefik.http.routers.myapp.middlewares=auth@file
+```
+
+### Services
+
+```yaml
+labels:
+  # Load balancer port
+  - traefik.http.services.myapp.loadbalancer.server.port=8080
+
+  # Health check
+  - traefik.http.services.myapp.loadbalancer.healthcheck.path=/health
+```
+
+### Middlewares
+
+Middlewares should be defined in a separate file (not generated by Compose Farm):
+
+```yaml
+# /opt/traefik/dynamic.d/middlewares.yml
+http:
+  middlewares:
+    auth:
+      basicAuth:
+        users:
+          - "user:$apr1$..."
+```
+
+Reference in labels:
+
+```yaml
+labels:
+  - traefik.http.routers.myapp.middlewares=auth@file
+```
+
+## Variable Substitution
+
+Labels can use environment variables:
+
+```yaml
+labels:
+  - traefik.http.routers.myapp.rule=Host(`${DOMAIN}`)
+```
+
+Compose Farm resolves variables from:
+1. Stack's `.env` file
+2. Current environment
+
+```bash
+# /opt/compose/myapp/.env
+DOMAIN=app.example.com
+```
+
+## Port Resolution
+
+Compose Farm determines the target URL from published ports:
+
+```yaml
+ports:
+  - "8080:80"           # Uses 8080
+  - "192.168.1.11:8080:80"  # Uses 8080 on specific IP
+```
+
+If no suitable port is found, a warning is shown.
+
+## Complete Example
+
+### compose-farm.yaml
+
+```yaml
+compose_dir: /opt/compose
+traefik_file: /opt/traefik/dynamic.d/compose-farm.yml
+traefik_stack: traefik
+
+hosts:
+  nuc:
+    address: 192.168.1.10
+  hp:
+    address: 192.168.1.11
+  nas:
+    address: 192.168.1.100
+
+stacks:
+  traefik: nuc
+  plex: hp
+  jellyfin: nas
+  sonarr: nuc
+  radarr: nuc
+```
+
+### /opt/compose/plex/docker-compose.yml
+
+```yaml
+services:
+  plex:
+    image: lscr.io/linuxserver/plex
+    container_name: plex
+    ports:
+      - "32400:32400"
+    labels:
+      - traefik.enable=true
+      - traefik.http.routers.plex.rule=Host(`plex.example.com`)
+      - traefik.http.routers.plex.entrypoints=websecure
+      - traefik.http.routers.plex.tls.certresolver=letsencrypt
+      - traefik.http.services.plex.loadbalancer.server.port=32400
+    # ... other config
+```
+
+### Generated compose-farm.yml
+
+```yaml
+http:
+  routers:
+    plex:
+      rule: Host(`plex.example.com`)
+      entryPoints:
+        - websecure
+      tls:
+        certResolver: letsencrypt
+      service: plex
+    jellyfin:
+      rule: Host(`jellyfin.example.com`)
+      entryPoints:
+        - websecure
+      tls:
+        certResolver: letsencrypt
+      service: jellyfin
+
+  services:
+    plex:
+      loadBalancer:
+        servers:
+          - url: http://192.168.1.11:32400
+    jellyfin:
+      loadBalancer:
+        servers:
+          - url: http://192.168.1.100:8096
+```
+
+Note: `sonarr` and `radarr` are NOT in the file because they're on the same host as Traefik (`nuc`).
+
+## Combining with Existing Config
+
+If you have existing Traefik dynamic config:
+
+```bash
+# Move existing config to directory
+mkdir -p /opt/traefik/dynamic.d
+mv /opt/traefik/dynamic.yml /opt/traefik/dynamic.d/manual.yml
+
+# Generate Compose Farm config
+cf traefik-file --all -o /opt/traefik/dynamic.d/compose-farm.yml
+
+# Update Traefik to watch directory
+# --providers.file.directory=/dynamic.d
+```
+
+Traefik merges all YAML files in the directory.
+
+## Troubleshooting
+
+### Stack Not Accessible
+
+1. **Check port is published:**
+   ```yaml
+   ports:
+     - "8080:80"  # Must be published, not just exposed
+   ```
+
+2. **Check label syntax:**
+   ```bash
+   cf check mystack
+   ```
+
+3. **Verify generated config:**
+   ```bash
+   cf traefik-file mystack
+   ```
+
+4. **Check Traefik logs:**
+   ```bash
+   docker logs traefik
+   ```
+
+### Config Not Regenerating
+
+1. **Verify traefik_file is set:**
+   ```bash
+   cf config show | grep traefik
+   ```
+
+2. **Check file permissions:**
+   ```bash
+   ls -la /opt/traefik/dynamic.d/
+   ```
+
+3. **Manually regenerate:**
+   ```bash
+   cf traefik-file --all -o /opt/traefik/dynamic.d/compose-farm.yml
+   ```
+
+### Variable Not Resolved
+
+1. **Check .env file exists:**
+   ```bash
+   cat /opt/compose/myservice/.env
+   ```
+
+2. **Test variable resolution:**
+   ```bash
+   cd /opt/compose/myservice
+   docker compose config
+   ```
--- a/docs/web-ui.md
+++ b/docs/web-ui.md
@@ -0,0 +1,130 @@
+---
+icon: lucide/layout-dashboard
+---
+
+# Web UI
+
+Compose Farm includes a web interface for managing stacks from your browser. Start it with:
+
+```bash
+cf web
+```
+
+Then open [http://localhost:8000](http://localhost:8000).
+
+## Features
+
+### Full Workflow
+
+Console terminal, config editor, stack navigation, actions (up, logs, update), dashboard overview, and theme switching - all in one flow.
+
+<video autoplay loop muted playsinline>
+  <source src="/assets/web-workflow.webm" type="video/webm">
+</video>
+
+### Stack Actions
+
+Navigate to any stack and use the command palette to trigger actions like restart, pull, update, or view logs. Output streams in real-time via WebSocket.
+
+<video autoplay loop muted playsinline>
+  <source src="/assets/web-stack.webm" type="video/webm">
+</video>
+
+### Theme Switching
+
+35 themes available via the command palette. Type `theme:` to filter, then use arrow keys to preview themes live before selecting.
+
+<video autoplay loop muted playsinline>
+  <source src="/assets/web-themes.webm" type="video/webm">
+</video>
+
+### Command Palette
+
+Press `Ctrl+K` (or `Cmd+K` on macOS) to open the command palette. Use fuzzy search to quickly navigate, trigger actions, or change themes.
+
+<video autoplay loop muted playsinline>
+  <source src="/assets/web-navigation.webm" type="video/webm">
+</video>
+
+## Pages
+
+### Dashboard (`/`)
+
+- Stack overview with status indicators
+- Host statistics
+- Pending operations (migrations, orphaned stacks)
+- Quick actions via command palette
+
+### Stack Detail (`/stack/{name}`)
+
+- Compose file editor (Monaco)
+- Environment file editor
+- Action buttons: Up, Down, Restart, Update, Pull, Logs
+- Container shell access (exec into running containers)
+- Terminal output for running commands
+
+### Console (`/console`)
+
+- Full shell access to any host
+- File editor for remote files
+- Monaco editor with syntax highlighting
+
+<video autoplay loop muted playsinline>
+  <source src="/assets/web-console.webm" type="video/webm">
+</video>
+
+### Container Shell
+
+Click the Shell button on any running container to exec into it directly from the browser.
+
+<video autoplay loop muted playsinline>
+  <source src="/assets/web-shell.webm" type="video/webm">
+</video>
+
+## Keyboard Shortcuts
+
+| Shortcut | Action |
+|----------|--------|
+| `Ctrl+K` / `Cmd+K` | Open command palette |
+| `Ctrl+S` / `Cmd+S` | Save editors |
+| `Escape` | Close command palette |
+| `Arrow keys` | Navigate command list |
+| `Enter` | Execute selected command |
+
+## Starting the Server
+
+```bash
+# Default: http://0.0.0.0:8000
+cf web
+
+# Custom port
+cf web --port 3000
+
+# Development mode with auto-reload
+cf web --reload
+
+# Bind to specific interface
+cf web --host 127.0.0.1
+```
+
+## Requirements
+
+The web UI requires additional dependencies:
+
+```bash
+# If installed via pip
+pip install compose-farm[web]
+
+# If installed via uv
+uv tool install compose-farm --with web
+```
+
+## Architecture
+
+The web UI uses:
+
+- **FastAPI** - Backend API and WebSocket handling
+- **HTMX** - Dynamic page updates without full reloads
+- **DaisyUI + Tailwind** - Theming and styling
+- **Monaco Editor** - Code editing for compose/env files
+- **xterm.js** - Terminal emulation for logs and shell access
--- a/examples/README.md
+++ b/examples/README.md
@@ -2,9 +2,9 @@

 Real-world examples demonstrating compose-farm patterns for multi-host Docker deployments.

-## Services
+## Stacks

-| Service | Type | Demonstrates |
+| Stack | Type | Demonstrates |
 |---------|------|--------------|
 | [traefik](traefik/) | Infrastructure | Reverse proxy, Let's Encrypt, file-provider |
 | [mealie](mealie/) | Single container | Traefik labels, resource limits, environment vars |
@@ -16,7 +16,7 @@ Real-world examples demonstrating compose-farm patterns for multi-host Docker de

 ### External Network

-All services connect to a shared external network for inter-service communication:
+All stacks connect to a shared external network for inter-service communication:

 ```yaml
 networks:
@@ -32,12 +32,12 @@ compose-farm init-network --network mynetwork --subnet 172.20.0.0/16

 ### Traefik Labels (Dual Routes)

-Services expose two routes for different access patterns:
+Stacks expose two routes for different access patterns:

 1. **HTTPS route** (`websecure` entrypoint): For your custom domain with Let's Encrypt TLS
 2. **HTTP route** (`web` entrypoint): For `.local` domains on your LAN (no TLS needed)

-This pattern allows accessing services via:
+This pattern allows accessing stacks via:
 - `https://mealie.example.com` - from anywhere, with TLS
 - `http://mealie.local` - from your local network, no TLS overhead

@@ -57,7 +57,7 @@ labels:

 ### Environment Variables

-Each service has a `.env` file for secrets and domain configuration.
+Each stack has a `.env` file for secrets and domain configuration.
 Edit these files to set your domain and credentials:

 ```bash
@@ -76,15 +76,15 @@ volumes:
  - /mnt/data/myapp:/app/data
 ```

-This allows services to migrate between hosts without data loss.
+This allows stacks to migrate between hosts without data loss.

-### Multi-Host Services
+### Multi-Host Stacks

-Services that need to run on every host (e.g., monitoring agents):
+Stacks that need to run on every host (e.g., monitoring agents):

 ```yaml
 # In compose-farm.yaml
-services:
+stacks:
  autokuma: all  # Runs on every configured host
 ```

@@ -107,7 +107,7 @@ services:

 ### AutoKuma Labels (Optional)

-The autokuma example demonstrates compose-farm's **multi-host feature** - running the same service on all hosts using the `all` keyword. AutoKuma itself is not part of compose-farm; it's just a good example because it needs to run on every host to monitor local Docker containers.
+The autokuma example demonstrates compose-farm's **multi-host feature** - running the same stack on all hosts using the `all` keyword. AutoKuma itself is not part of compose-farm; it's just a good example because it needs to run on every host to monitor local Docker containers.

 [AutoKuma](https://github.com/BigBoot/AutoKuma) automatically creates Uptime Kuma monitors from Docker labels:

@@ -128,7 +128,7 @@ compose-farm init-network
 # 2. Start Traefik first (the reverse proxy)
 compose-farm up traefik

-# 3. Start other services
+# 3. Start other stacks
 compose-farm up mealie uptime-kuma

 # 4. Check status
@@ -148,24 +148,24 @@ compose-farm down --all

 The `compose-farm.yaml` shows a multi-host setup:

- **primary** (192.168.1.10): Runs Traefik and heavy services
- **secondary** (192.168.1.11): Runs lighter services
+- **primary** (192.168.1.10): Runs Traefik and heavy stacks
+- **secondary** (192.168.1.11): Runs lighter stacks
 - **autokuma**: Runs on ALL hosts to monitor local containers

-When Traefik runs on `primary` and a service runs on `secondary`, compose-farm
+When Traefik runs on `primary` and a stack runs on `secondary`, compose-farm
 automatically generates file-provider config so Traefik can route to it.

 ## Traefik File-Provider

-When services run on different hosts than Traefik, use `traefik-file` to generate routing config:
+When stacks run on different hosts than Traefik, use `traefik-file` to generate routing config:

 ```bash
-# Generate config for all services
+# Generate config for all stacks
 compose-farm traefik-file --all -o traefik/dynamic.d/compose-farm.yml

 # Or configure auto-generation in compose-farm.yaml:
 traefik_file: /opt/stacks/traefik/dynamic.d/compose-farm.yml
-traefik_service: traefik
+traefik_stack: traefik
 ```

 With `traefik_file` configured, compose-farm automatically regenerates the config after `up`, `down`, `restart`, and `update` commands.
--- a/examples/compose-farm.yaml
+++ b/examples/compose-farm.yaml
@@ -7,34 +7,34 @@ compose_dir: /opt/stacks/compose-farm/examples

 # Auto-regenerate Traefik file-provider config after up/down/restart/update
 traefik_file: /opt/stacks/compose-farm/examples/traefik/dynamic.d/compose-farm.yml
-traefik_service: traefik  # Skip Traefik's host in file-provider (docker provider handles it)
+traefik_stack: traefik  # Skip Traefik's host in file-provider (docker provider handles it)

 hosts:
-  # Primary server - runs Traefik and most services
+  # Primary server - runs Traefik and most stacks
  # Full form with all options
  primary:
    address: 192.168.1.10
    user: deploy
    port: 22

-  # Secondary server - runs some services for load distribution
+  # Secondary server - runs some stacks for load distribution
  # Short form (user defaults to current user, port defaults to 22)
  secondary: 192.168.1.11

  # Local execution (no SSH) - for testing or when running on the host itself
  local: localhost

-services:
+stacks:
  # Infrastructure (runs on primary where Traefik is)
  traefik: primary

-  # Multi-host services (runs on ALL hosts)
+  # Multi-host stacks (runs on ALL hosts)
  # AutoKuma monitors Docker containers on each host
  autokuma: all

-  # Primary server services
+  # Primary server stacks
  paperless-ngx: primary

-  # Secondary server services (distributed for performance)
+  # Secondary server stacks (distributed for performance)
  mealie: secondary
  uptime-kuma: secondary
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -133,6 +133,10 @@ disallow_untyped_decorators = false
 module = "compose_farm.web.*"
 disallow_untyped_decorators = false

+[[tool.mypy.overrides]]
+module = "docs.demos.web.*"
+disallow_untyped_decorators = false
+
 [tool.pytest.ini_options]
 asyncio_mode = "auto"
 testpaths = ["tests"]
@@ -145,6 +149,9 @@ addopts = [
    "--no-cov-on-fail",
    "-v",
 ]
+markers = [
+    "browser: marks tests as browser tests (deselect with '-m \"not browser\"')",
+]

 [tool.coverage.run]
 omit = []
@@ -157,9 +164,19 @@ exclude_lines = [
    'if __name__ == "__main__":',
 ]

+[tool.ty.environment]
+python-version = "3.11"
+
+[tool.ty.src]
+exclude = [
+    "hatch_build.py",  # Build-time only, hatchling not in dev deps
+    "docs/demos/**",   # Demo scripts with local conftest imports
+]
+
 [dependency-groups]
 dev = [
    "mypy>=1.19.0",
+    "ty>=0.0.1a13",
    "pre-commit>=4.5.0",
    "pytest>=9.0.2",
    "pytest-asyncio>=1.3.0",
@@ -174,4 +191,8 @@ dev = [
    "websockets>=12.0",
    # For FastAPI TestClient
    "httpx>=0.28.0",
+    # For browser tests (use system chromium via nix-shell -p chromium)
+    "pytest-playwright>=0.7.0",
+    # For parallel test execution
+    "pytest-xdist>=3.0.0",
 ]
--- a/src/compose_farm/cli/init.py
+++ b/src/compose_farm/cli/init.py
@@ -8,6 +8,7 @@ from compose_farm.cli import (
    lifecycle,  # noqa: F401
    management,  # noqa: F401
    monitoring,  # noqa: F401
+    ssh,  # noqa: F401
    web,  # noqa: F401
 )

--- a/src/compose_farm/cli/app.py
+++ b/src/compose_farm/cli/app.py
@@ -23,6 +23,7 @@ app = typer.Typer(
    help="Compose Farm - run docker compose commands across multiple hosts",
    no_args_is_help=True,
    context_settings={"help_option_names": ["-h", "--help"]},
+    rich_markup_mode="rich",
 )


--- a/src/compose_farm/cli/common.py
+++ b/src/compose_farm/cli/common.py
@@ -18,7 +18,15 @@ from rich.progress import (
    TimeElapsedColumn,
 )

-from compose_farm.console import console, err_console
+from compose_farm.console import (
+    MSG_HOST_NOT_FOUND,
+    MSG_STACK_NOT_FOUND,
+    console,
+    print_error,
+    print_hint,
+    print_success,
+    print_warning,
+)

 if TYPE_CHECKING:
    from collections.abc import Callable, Coroutine, Generator
@@ -27,16 +35,17 @@ if TYPE_CHECKING:
    from compose_farm.executor import CommandResult

 _T = TypeVar("_T")
+_R = TypeVar("_R")


 # --- Shared CLI Options ---
-ServicesArg = Annotated[
+StacksArg = Annotated[
    list[str] | None,
-    typer.Argument(help="Services to operate on"),
+    typer.Argument(help="Stacks to operate on"),
 ]
 AllOption = Annotated[
    bool,
-    typer.Option("--all", "-a", help="Run on all services"),
+    typer.Option("--all", "-a", help="Run on all stacks"),
 ]
 ConfigOption = Annotated[
    Path | None,
@@ -48,7 +57,11 @@ LogPathOption = Annotated[
 ]
 HostOption = Annotated[
    str | None,
-    typer.Option("--host", "-H", help="Filter to services on this host"),
+    typer.Option("--host", "-H", help="Filter to stacks on this host"),
+]
+ServiceOption = Annotated[
+    str | None,
+    typer.Option("--service", "-s", help="Target a specific service within the stack"),
 ]

 # --- Constants (internal) ---
@@ -56,6 +69,13 @@ _MISSING_PATH_PREVIEW_LIMIT = 2
 _STATS_PREVIEW_LIMIT = 3  # Max number of pending migrations to show by name


+def format_host(host: str | list[str]) -> str:
+    """Format a host value for display."""
+    if isinstance(host, list):
+        return ", ".join(host)
+    return host
+
+
@contextlib.contextmanager
 def progress_bar(
    label: str, total: int, *, initial_description: str = "[dim]connecting...[/]"
@@ -81,6 +101,37 @@ def progress_bar(
        yield progress, task_id


+def run_parallel_with_progress(
+    label: str,
+    items: list[_T],
+    async_fn: Callable[[_T], Coroutine[None, None, _R]],
+) -> list[_R]:
+    """Run async tasks in parallel with a progress bar.
+
+    Args:
+        label: Progress bar label (e.g., "Discovering", "Querying hosts")
+        items: List of items to process
+        async_fn: Async function to call for each item, returns tuple where
+                  first element is used for progress description
+
+    Returns:
+        List of results from async_fn in completion order.
+
+    """
+
+    async def gather() -> list[_R]:
+        with progress_bar(label, len(items)) as (progress, task_id):
+            tasks = [asyncio.create_task(async_fn(item)) for item in items]
+            results: list[_R] = []
+            for coro in asyncio.as_completed(tasks):
+                result = await coro
+                results.append(result)
+                progress.update(task_id, advance=1, description=f"[cyan]{result[0]}[/]")  # type: ignore[index]
+            return results
+
+    return asyncio.run(gather())
+
+
 def load_config_or_exit(config_path: Path | None) -> Config:
    """Load config or exit with a friendly error message."""
    # Lazy import: pydantic adds ~50ms to startup, only load when actually needed
@@ -89,37 +140,62 @@ def load_config_or_exit(config_path: Path | None) -> Config:
    try:
        return load_config(config_path)
    except FileNotFoundError as e:
-        err_console.print(f"[red]✗[/] {e}")
+        print_error(str(e))
        raise typer.Exit(1) from e


-def get_services(
-    services: list[str],
-    all_services: bool,
+def get_stacks(
+    stacks: list[str],
+    all_stacks: bool,
    config_path: Path | None,
+    *,
+    host: str | None = None,
+    default_all: bool = False,
 ) -> tuple[list[str], Config]:
-    """Resolve service list and load config.
+    """Resolve stack list and load config.
+
+    Handles three mutually exclusive selection methods:
+    - Explicit stack names
+    - --all flag
+    - --host filter
+
+    Args:
+        stacks: Explicit stack names
+        all_stacks: Whether --all was specified
+        config_path: Path to config file
+        host: Filter to stacks on this host
+        default_all: If True, default to all stacks when nothing specified (for ps)

    Supports "." as shorthand for the current directory name.
+
    """
+    validate_stack_selection(stacks, all_stacks, host)
    config = load_config_or_exit(config_path)

-    if all_services:
-        return list(config.services.keys()), config
-    if not services:
-        err_console.print("[red]✗[/] Specify services or use --all")
+    if host is not None:
+        validate_hosts(config, host)
+        stack_list = [s for s in config.stacks if host in config.get_hosts(s)]
+        if not stack_list:
+            print_warning(f"No stacks configured for host [magenta]{host}[/]")
+            raise typer.Exit(0)
+        return stack_list, config
+
+    if all_stacks:
+        return list(config.stacks.keys()), config
+
+    if not stacks:
+        if default_all:
+            return list(config.stacks.keys()), config
+        print_error("Specify stacks or use [bold]--all[/] / [bold]--host[/]")
        raise typer.Exit(1)

    # Resolve "." to current directory name
-    resolved = [Path.cwd().name if svc == "." else svc for svc in services]
+    resolved = [Path.cwd().name if stack == "." else stack for stack in stacks]

-    # Validate all services exist in config
-    unknown = [svc for svc in resolved if svc not in config.services]
-    if unknown:
-        for svc in unknown:
-            err_console.print(f"[red]✗[/] Unknown service: [cyan]{svc}[/]")
-        err_console.print("[dim]Hint: Add the service to compose-farm.yaml or use --all[/]")
-        raise typer.Exit(1)
+    # Validate all stacks exist in config
+    validate_stacks(
+        config, resolved, hint="Add the stack to compose-farm.yaml or use [bold]--all[/]"
+    )

    return resolved, config

@@ -143,21 +219,19 @@ def report_results(results: list[CommandResult]) -> None:
        console.print()  # Blank line before summary
        if failed:
            for r in failed:
-                err_console.print(
-                    f"[red]✗[/] [cyan]{r.service}[/] failed with exit code {r.exit_code}"
-                )
+                print_error(f"[cyan]{r.stack}[/] failed with exit code {r.exit_code}")
            console.print()
            console.print(
-                f"[green]✓[/] {len(succeeded)}/{len(results)} services succeeded, "
+                f"[green]✓[/] {len(succeeded)}/{len(results)} stacks succeeded, "
                f"[red]✗[/] {len(failed)} failed"
            )
        else:
-            console.print(f"[green]✓[/] All {len(results)} services succeeded")
+            print_success(f"All {len(results)} stacks succeeded")

    elif failed:
-        # Single service failed
+        # Single stack failed
        r = failed[0]
-        err_console.print(f"[red]✗[/] [cyan]{r.service}[/] failed with exit code {r.exit_code}")
+        print_error(f"[cyan]{r.stack}[/] failed with exit code {r.exit_code}")

    if failed:
        raise typer.Exit(1)
@@ -169,12 +243,12 @@ def maybe_regenerate_traefik(
 ) -> None:
    """Regenerate traefik config if traefik_file is configured.

-    If results are provided, skips regeneration if all services failed.
+    If results are provided, skips regeneration if all stacks failed.
    """
    if cfg.traefik_file is None:
        return

-    # Skip if all services failed
+    # Skip if all stacks failed
    if results and not any(r.success for r in results):
        return

@@ -185,7 +259,7 @@ def maybe_regenerate_traefik(
    )

    try:
-        dynamic, warnings = generate_traefik_config(cfg, list(cfg.services.keys()))
+        dynamic, warnings = generate_traefik_config(cfg, list(cfg.stacks.keys()))
        new_content = render_traefik_config(dynamic)

        # Check if content changed
@@ -197,47 +271,58 @@ def maybe_regenerate_traefik(
            cfg.traefik_file.parent.mkdir(parents=True, exist_ok=True)
            cfg.traefik_file.write_text(new_content)
            console.print()  # Ensure we're on a new line after streaming output
-            console.print(f"[green]✓[/] Traefik config updated: {cfg.traefik_file}")
+            print_success(f"Traefik config updated: {cfg.traefik_file}")

        for warning in warnings:
-            err_console.print(f"[yellow]![/] {warning}")
+            print_warning(warning)
    except (FileNotFoundError, ValueError) as exc:
-        err_console.print(f"[yellow]![/] Failed to update traefik config: {exc}")
+        print_warning(f"Failed to update traefik config: {exc}")


-def validate_host_for_service(cfg: Config, service: str, host: str) -> None:
-    """Validate that a host is valid for a service."""
-    if host not in cfg.hosts:
-        err_console.print(f"[red]✗[/] Host '{host}' not found in config")
+def validate_stacks(cfg: Config, stacks: list[str], *, hint: str | None = None) -> None:
+    """Validate that all stacks exist in config. Exits with error if any not found."""
+    invalid = [s for s in stacks if s not in cfg.stacks]
+    if invalid:
+        for svc in invalid:
+            print_error(MSG_STACK_NOT_FOUND.format(name=svc))
+        if hint:
+            print_hint(hint)
        raise typer.Exit(1)
-    allowed_hosts = cfg.get_hosts(service)
+
+
+def validate_hosts(cfg: Config, hosts: str | list[str]) -> None:
+    """Validate that host(s) exist in config. Exits with error if any not found."""
+    host_list = [hosts] if isinstance(hosts, str) else hosts
+    invalid = [h for h in host_list if h not in cfg.hosts]
+    if invalid:
+        for h in invalid:
+            print_error(MSG_HOST_NOT_FOUND.format(name=h))
+        raise typer.Exit(1)
+
+
+def validate_host_for_stack(cfg: Config, stack: str, host: str) -> None:
+    """Validate that a host is valid for a stack."""
+    validate_hosts(cfg, host)
+    allowed_hosts = cfg.get_hosts(stack)
    if host not in allowed_hosts:
-        err_console.print(
-            f"[red]✗[/] Service '{service}' is not configured for host '{host}' "
+        print_error(
+            f"Stack [cyan]{stack}[/] is not configured for host [magenta]{host}[/] "
            f"(configured: {', '.join(allowed_hosts)})"
        )
        raise typer.Exit(1)


-def run_host_operation(
-    cfg: Config,
-    svc_list: list[str],
-    host: str,
-    command: str,
-    action_verb: str,
-    state_callback: Callable[[Config, str, str], None],
+def validate_stack_selection(
+    stacks: list[str] | None,
+    all_stacks: bool,
+    host: str | None,
 ) -> None:
-    """Run an operation on a specific host for multiple services."""
-    from compose_farm.executor import run_compose_on_host  # noqa: PLC0415
+    """Validate that only one stack selection method is used.

-    results: list[CommandResult] = []
-    for service in svc_list:
-        validate_host_for_service(cfg, service, host)
-        console.print(f"[cyan]\\[{service}][/] {action_verb} on [magenta]{host}[/]...")
-        result = run_async(run_compose_on_host(cfg, service, host, command, raw=True))
-        print()  # Newline after raw output
-        results.append(result)
-        if result.success:
-            state_callback(cfg, service, host)
-    maybe_regenerate_traefik(cfg, results)
-    report_results(results)
+    The three selection methods (explicit stacks, --all, --host) are mutually
+    exclusive. This ensures consistent behavior across all commands.
+    """
+    methods = sum([bool(stacks), all_stacks, host is not None])
+    if methods > 1:
+        print_error("Use only one of: stack names, [bold]--all[/], or [bold]--host[/]")
+        raise typer.Exit(1)
--- a/src/compose_farm/cli/config.py
+++ b/src/compose_farm/cli/config.py
@@ -14,8 +14,8 @@ from typing import Annotated
 import typer

 from compose_farm.cli.app import app
-from compose_farm.console import console, err_console
-from compose_farm.paths import config_search_paths, default_config_path
+from compose_farm.console import MSG_CONFIG_NOT_FOUND, console, print_error, print_success
+from compose_farm.paths import config_search_paths, default_config_path, find_config_path

 config_app = typer.Typer(
    name="config",
@@ -40,24 +40,12 @@ _RawOption = Annotated[


 def _get_editor() -> str:
-    """Get the user's preferred editor.
-
-    Checks $EDITOR, then $VISUAL, then falls back to platform defaults.
-    """
-    for env_var in ("EDITOR", "VISUAL"):
-        editor = os.environ.get(env_var)
-        if editor:
-            return editor
-
+    """Get the user's preferred editor ($EDITOR > $VISUAL > platform default)."""
+    if editor := os.environ.get("EDITOR") or os.environ.get("VISUAL"):
+        return editor
    if platform.system() == "Windows":
        return "notepad"
-
-    # Try common editors on Unix-like systems
-    for editor in ("nano", "vim", "vi"):
-        if shutil.which(editor):
-            return editor
-
-    return "vi"
+    return next((e for e in ("nano", "vim", "vi") if shutil.which(e)), "vi")


 def _generate_template() -> str:
@@ -66,8 +54,8 @@ def _generate_template() -> str:
        template_file = resources.files("compose_farm") / "example-config.yaml"
        return template_file.read_text(encoding="utf-8")
    except FileNotFoundError as e:
-        err_console.print("[red]Example config template is missing from the package.[/red]")
-        err_console.print("Reinstall compose-farm or report this issue.")
+        print_error("Example config template is missing from the package")
+        console.print("Reinstall compose-farm or report this issue.")
        raise typer.Exit(1) from e


@@ -76,18 +64,21 @@ def _get_config_file(path: Path | None) -> Path | None:
    if path:
        return path.expanduser().resolve()

-    # Check environment variable
-    if env_path := os.environ.get("CF_CONFIG"):
-        p = Path(env_path)
-        if p.exists():
-            return p.resolve()
+    config_path = find_config_path()
+    return config_path.resolve() if config_path else None

-    # Check standard locations
-    for p in config_search_paths():
-        if p.exists():
-            return p.resolve()

-    return None
+def _report_missing_config(explicit_path: Path | None = None) -> None:
+    """Report that a config file was not found."""
+    console.print("[yellow]Config file not found.[/yellow]")
+    if explicit_path:
+        console.print(f"\nProvided path does not exist: [cyan]{explicit_path}[/cyan]")
+    else:
+        console.print("\nSearched locations:")
+        for p in config_search_paths():
+            status = "[green]exists[/green]" if p.exists() else "[dim]not found[/dim]"
+            console.print(f"  - {p} ({status})")
+    console.print("\nRun [bold cyan]cf config init[/bold cyan] to create one.")


@config_app.command("init")
@@ -117,7 +108,7 @@ def config_init(
    template_content = _generate_template()
    target_path.write_text(template_content, encoding="utf-8")

-    console.print(f"[green]✓[/] Config file created at: {target_path}")
+    print_success(f"Config file created at: {target_path}")
    console.print("\n[dim]Edit the file to customize your settings:[/dim]")
    console.print("  [cyan]cf config edit[/cyan]")

@@ -133,17 +124,11 @@ def config_edit(
    config_file = _get_config_file(path)

    if config_file is None:
-        console.print("[yellow]No config file found.[/yellow]")
-        console.print("\nRun [bold cyan]cf config init[/bold cyan] to create one.")
-        console.print("\nSearched locations:")
-        for p in config_search_paths():
-            console.print(f"  - {p}")
+        _report_missing_config()
        raise typer.Exit(1)

    if not config_file.exists():
-        console.print("[yellow]Config file not found.[/yellow]")
-        console.print(f"\nProvided path does not exist: [cyan]{config_file}[/cyan]")
-        console.print("\nRun [bold cyan]cf config init[/bold cyan] to create one.")
+        _report_missing_config(config_file)
        raise typer.Exit(1)

    editor = _get_editor()
@@ -152,21 +137,21 @@ def config_edit(
    try:
        editor_cmd = shlex.split(editor, posix=os.name != "nt")
    except ValueError as e:
-        err_console.print("[red]Invalid editor command. Check $EDITOR/$VISUAL.[/red]")
+        print_error("Invalid editor command. Check [bold]$EDITOR[/]/[bold]$VISUAL[/]")
        raise typer.Exit(1) from e

    if not editor_cmd:
-        err_console.print("[red]Editor command is empty.[/red]")
+        print_error("Editor command is empty")
        raise typer.Exit(1)

    try:
        subprocess.run([*editor_cmd, str(config_file)], check=True)
    except FileNotFoundError:
-        err_console.print(f"[red]Editor '{editor_cmd[0]}' not found.[/red]")
-        err_console.print("Set $EDITOR environment variable to your preferred editor.")
+        print_error(f"Editor [cyan]{editor_cmd[0]}[/] not found")
+        console.print("Set [bold]$EDITOR[/] environment variable to your preferred editor.")
        raise typer.Exit(1) from None
    except subprocess.CalledProcessError as e:
-        err_console.print(f"[red]Editor exited with error code {e.returncode}[/red]")
+        print_error(f"Editor exited with error code {e.returncode}")
        raise typer.Exit(e.returncode) from None


@@ -179,18 +164,11 @@ def config_show(
    config_file = _get_config_file(path)

    if config_file is None:
-        console.print("[yellow]No config file found.[/yellow]")
-        console.print("\nSearched locations:")
-        for p in config_search_paths():
-            status = "[green]exists[/green]" if p.exists() else "[dim]not found[/dim]"
-            console.print(f"  - {p} ({status})")
-        console.print("\nRun [bold cyan]cf config init[/bold cyan] to create one.")
+        _report_missing_config()
        raise typer.Exit(0)

    if not config_file.exists():
-        console.print("[yellow]Config file not found.[/yellow]")
-        console.print(f"\nProvided path does not exist: [cyan]{config_file}[/cyan]")
-        console.print("\nRun [bold cyan]cf config init[/bold cyan] to create one.")
+        _report_missing_config(config_file)
        raise typer.Exit(1)

    content = config_file.read_text(encoding="utf-8")
@@ -217,11 +195,7 @@ def config_path(
    config_file = _get_config_file(path)

    if config_file is None:
-        console.print("[yellow]No config file found.[/yellow]")
-        console.print("\nSearched locations:")
-        for p in config_search_paths():
-            status = "[green]exists[/green]" if p.exists() else "[dim]not found[/dim]"
-            console.print(f"  - {p} ({status})")
+        _report_missing_config()
        raise typer.Exit(1)

    # Just print the path for easy piping
@@ -236,7 +210,7 @@ def config_validate(
    config_file = _get_config_file(path)

    if config_file is None:
-        err_console.print("[red]✗[/] No config file found")
+        print_error(MSG_CONFIG_NOT_FOUND)
        raise typer.Exit(1)

    # Lazy import: pydantic adds ~50ms to startup, only load when actually needed
@@ -245,15 +219,15 @@ def config_validate(
    try:
        cfg = load_config(config_file)
    except FileNotFoundError as e:
-        err_console.print(f"[red]✗[/] {e}")
+        print_error(str(e))
        raise typer.Exit(1) from e
    except Exception as e:
-        err_console.print(f"[red]✗[/] Invalid config: {e}")
+        print_error(f"Invalid config: {e}")
        raise typer.Exit(1) from e

-    console.print(f"[green]✓[/] Valid config: {config_file}")
+    print_success(f"Valid config: {config_file}")
    console.print(f"  Hosts: {len(cfg.hosts)}")
-    console.print(f"  Services: {len(cfg.services)}")
+    console.print(f"  Stacks: {len(cfg.stacks)}")


@config_app.command("symlink")
@@ -278,11 +252,11 @@ def config_symlink(
    target_path = (target or Path("compose-farm.yaml")).expanduser().resolve()

    if not target_path.exists():
-        err_console.print(f"[red]✗[/] Target config file not found: {target_path}")
+        print_error(f"Target config file not found: {target_path}")
        raise typer.Exit(1)

    if not target_path.is_file():
-        err_console.print(f"[red]✗[/] Target is not a file: {target_path}")
+        print_error(f"Target is not a file: {target_path}")
        raise typer.Exit(1)

    symlink_path = default_config_path()
@@ -292,7 +266,7 @@ def config_symlink(
        if symlink_path.is_symlink():
            current_target = symlink_path.resolve() if symlink_path.exists() else None
            if current_target == target_path:
-                console.print(f"[green]✓[/] Symlink already points to: {target_path}")
+                print_success(f"Symlink already points to: {target_path}")
                return
            # Update existing symlink
            if not force:
@@ -304,8 +278,8 @@ def config_symlink(
            symlink_path.unlink()
        else:
            # Regular file exists
-            err_console.print(f"[red]✗[/] A regular file exists at: {symlink_path}")
-            err_console.print("    Back it up or remove it first, then retry.")
+            print_error(f"A regular file exists at: {symlink_path}")
+            console.print("    Back it up or remove it first, then retry.")
            raise typer.Exit(1)

    # Create parent directories
@@ -314,7 +288,7 @@ def config_symlink(
    # Create symlink with absolute path
    symlink_path.symlink_to(target_path)

-    console.print("[green]✓[/] Created symlink:")
+    print_success("Created symlink:")
    console.print(f"    {symlink_path}")
    console.print(f"    -> {target_path}")

--- a/src/compose_farm/cli/lifecycle.py
+++ b/src/compose_farm/cli/lifecycle.py
@@ -2,242 +2,249 @@

 from __future__ import annotations

-from typing import TYPE_CHECKING, Annotated
+from pathlib import Path
+from typing import Annotated

 import typer

-if TYPE_CHECKING:
-    from compose_farm.config import Config
-
 from compose_farm.cli.app import app
 from compose_farm.cli.common import (
    AllOption,
    ConfigOption,
    HostOption,
-    ServicesArg,
-    get_services,
+    ServiceOption,
+    StacksArg,
+    format_host,
+    get_stacks,
    load_config_or_exit,
    maybe_regenerate_traefik,
    report_results,
    run_async,
-    run_host_operation,
+    validate_host_for_stack,
+    validate_stacks,
 )
-from compose_farm.console import console, err_console
-from compose_farm.executor import run_on_services, run_sequential_on_services
-from compose_farm.operations import stop_orphaned_services, up_services
+from compose_farm.console import MSG_DRY_RUN, console, print_error, print_success
+from compose_farm.executor import run_compose_on_host, run_on_stacks, run_sequential_on_stacks
+from compose_farm.operations import stop_orphaned_stacks, up_stacks
 from compose_farm.state import (
-    add_service_to_host,
-    get_orphaned_services,
-    get_service_host,
-    get_services_needing_migration,
-    get_services_not_in_state,
-    remove_service,
-    remove_service_from_host,
+    get_orphaned_stacks,
+    get_stack_host,
+    get_stacks_needing_migration,
+    get_stacks_not_in_state,
+    remove_stack,
 )


@app.command(rich_help_panel="Lifecycle")
 def up(
-    services: ServicesArg = None,
-    all_services: AllOption = False,
+    stacks: StacksArg = None,
+    all_stacks: AllOption = False,
    host: HostOption = None,
+    service: ServiceOption = None,
    config: ConfigOption = None,
 ) -> None:
-    """Start services (docker compose up -d). Auto-migrates if host changed."""
-    svc_list, cfg = get_services(services or [], all_services, config)
-
-    # Per-host operation: run on specific host only
-    if host:
-        run_host_operation(cfg, svc_list, host, "up -d", "Starting", add_service_to_host)
-        return
-
-    # Normal operation: use up_services with migration logic
-    results = run_async(up_services(cfg, svc_list, raw=True))
+    """Start stacks (docker compose up -d). Auto-migrates if host changed."""
+    stack_list, cfg = get_stacks(stacks or [], all_stacks, config, host=host)
+    if service:
+        if len(stack_list) != 1:
+            print_error("--service requires exactly one stack")
+            raise typer.Exit(1)
+        # For service-level up, use run_on_stacks directly (no migration logic)
+        results = run_async(run_on_stacks(cfg, stack_list, f"up -d {service}", raw=True))
+    else:
+        results = run_async(up_stacks(cfg, stack_list, raw=True))
    maybe_regenerate_traefik(cfg, results)
    report_results(results)


@app.command(rich_help_panel="Lifecycle")
 def down(
-    services: ServicesArg = None,
-    all_services: AllOption = False,
+    stacks: StacksArg = None,
+    all_stacks: AllOption = False,
    orphaned: Annotated[
        bool,
-        typer.Option(
-            "--orphaned", help="Stop orphaned services (in state but removed from config)"
-        ),
+        typer.Option("--orphaned", help="Stop orphaned stacks (in state but removed from config)"),
    ] = False,
    host: HostOption = None,
    config: ConfigOption = None,
 ) -> None:
-    """Stop services (docker compose down)."""
-    # Handle --orphaned flag
+    """Stop stacks (docker compose down)."""
+    # Handle --orphaned flag (mutually exclusive with other selection methods)
    if orphaned:
-        if services or all_services or host:
-            err_console.print("[red]✗[/] Cannot use --orphaned with services, --all, or --host")
+        if stacks or all_stacks or host:
+            print_error(
+                "Cannot combine [bold]--orphaned[/] with stacks, [bold]--all[/], or [bold]--host[/]"
+            )
            raise typer.Exit(1)

        cfg = load_config_or_exit(config)
-        orphaned_services = get_orphaned_services(cfg)
+        orphaned_stacks = get_orphaned_stacks(cfg)

-        if not orphaned_services:
-            console.print("[green]✓[/] No orphaned services to stop")
+        if not orphaned_stacks:
+            print_success("No orphaned stacks to stop")
            return

        console.print(
-            f"[yellow]Stopping {len(orphaned_services)} orphaned service(s):[/] "
-            f"{', '.join(orphaned_services.keys())}"
+            f"[yellow]Stopping {len(orphaned_stacks)} orphaned stack(s):[/] "
+            f"{', '.join(orphaned_stacks.keys())}"
        )
-        results = run_async(stop_orphaned_services(cfg))
+        results = run_async(stop_orphaned_stacks(cfg))
        report_results(results)
        return

-    svc_list, cfg = get_services(services or [], all_services, config)
-
-    # Per-host operation: run on specific host only
-    if host:
-        run_host_operation(cfg, svc_list, host, "down", "Stopping", remove_service_from_host)
-        return
-
-    # Normal operation
-    raw = len(svc_list) == 1
-    results = run_async(run_on_services(cfg, svc_list, "down", raw=raw))
+    stack_list, cfg = get_stacks(stacks or [], all_stacks, config, host=host)
+    raw = len(stack_list) == 1
+    results = run_async(run_on_stacks(cfg, stack_list, "down", raw=raw))

    # Remove from state on success
-    # For multi-host services, result.service is "svc@host", extract base name
-    removed_services: set[str] = set()
+    # For multi-host stacks, result.stack is "stack@host", extract base name
+    removed_stacks: set[str] = set()
    for result in results:
        if result.success:
-            base_service = result.service.split("@")[0]
-            if base_service not in removed_services:
-                remove_service(cfg, base_service)
-                removed_services.add(base_service)
+            base_stack = result.stack.split("@")[0]
+            if base_stack not in removed_stacks:
+                remove_stack(cfg, base_stack)
+                removed_stacks.add(base_stack)

    maybe_regenerate_traefik(cfg, results)
    report_results(results)


+@app.command(rich_help_panel="Lifecycle")
+def stop(
+    stacks: StacksArg = None,
+    all_stacks: AllOption = False,
+    service: ServiceOption = None,
+    config: ConfigOption = None,
+) -> None:
+    """Stop services without removing containers (docker compose stop)."""
+    stack_list, cfg = get_stacks(stacks or [], all_stacks, config)
+    if service and len(stack_list) != 1:
+        print_error("--service requires exactly one stack")
+        raise typer.Exit(1)
+    cmd = f"stop {service}" if service else "stop"
+    raw = len(stack_list) == 1
+    results = run_async(run_on_stacks(cfg, stack_list, cmd, raw=raw))
+    report_results(results)
+
+
@app.command(rich_help_panel="Lifecycle")
 def pull(
-    services: ServicesArg = None,
-    all_services: AllOption = False,
+    stacks: StacksArg = None,
+    all_stacks: AllOption = False,
+    service: ServiceOption = None,
    config: ConfigOption = None,
 ) -> None:
    """Pull latest images (docker compose pull)."""
-    svc_list, cfg = get_services(services or [], all_services, config)
-    raw = len(svc_list) == 1
-    results = run_async(run_on_services(cfg, svc_list, "pull", raw=raw))
+    stack_list, cfg = get_stacks(stacks or [], all_stacks, config)
+    if service and len(stack_list) != 1:
+        print_error("--service requires exactly one stack")
+        raise typer.Exit(1)
+    cmd = f"pull {service}" if service else "pull"
+    raw = len(stack_list) == 1
+    results = run_async(run_on_stacks(cfg, stack_list, cmd, raw=raw))
    report_results(results)


@app.command(rich_help_panel="Lifecycle")
 def restart(
-    services: ServicesArg = None,
-    all_services: AllOption = False,
+    stacks: StacksArg = None,
+    all_stacks: AllOption = False,
+    service: ServiceOption = None,
    config: ConfigOption = None,
 ) -> None:
-    """Restart services (down + up)."""
-    svc_list, cfg = get_services(services or [], all_services, config)
-    raw = len(svc_list) == 1
-    results = run_async(run_sequential_on_services(cfg, svc_list, ["down", "up -d"], raw=raw))
+    """Restart stacks (down + up). With --service, restarts just that service."""
+    stack_list, cfg = get_stacks(stacks or [], all_stacks, config)
+    if service:
+        if len(stack_list) != 1:
+            print_error("--service requires exactly one stack")
+            raise typer.Exit(1)
+        # For service-level restart, use docker compose restart (more efficient)
+        raw = True
+        results = run_async(run_on_stacks(cfg, stack_list, f"restart {service}", raw=raw))
+    else:
+        raw = len(stack_list) == 1
+        results = run_async(run_sequential_on_stacks(cfg, stack_list, ["down", "up -d"], raw=raw))
    maybe_regenerate_traefik(cfg, results)
    report_results(results)


@app.command(rich_help_panel="Lifecycle")
 def update(
-    services: ServicesArg = None,
-    all_services: AllOption = False,
+    stacks: StacksArg = None,
+    all_stacks: AllOption = False,
+    service: ServiceOption = None,
    config: ConfigOption = None,
 ) -> None:
-    """Update services (pull + build + down + up)."""
-    svc_list, cfg = get_services(services or [], all_services, config)
-    raw = len(svc_list) == 1
-    results = run_async(
-        run_sequential_on_services(
-            cfg, svc_list, ["pull --ignore-buildable", "build", "down", "up -d"], raw=raw
+    """Update stacks (pull + build + down + up). With --service, updates just that service."""
+    stack_list, cfg = get_stacks(stacks or [], all_stacks, config)
+    if service:
+        if len(stack_list) != 1:
+            print_error("--service requires exactly one stack")
+            raise typer.Exit(1)
+        # For service-level update: pull + build + stop + up (stop instead of down)
+        raw = True
+        results = run_async(
+            run_sequential_on_stacks(
+                cfg,
+                stack_list,
+                [
+                    f"pull --ignore-buildable {service}",
+                    f"build {service}",
+                    f"stop {service}",
+                    f"up -d {service}",
+                ],
+                raw=raw,
+            )
+        )
+    else:
+        raw = len(stack_list) == 1
+        results = run_async(
+            run_sequential_on_stacks(
+                cfg, stack_list, ["pull --ignore-buildable", "build", "down", "up -d"], raw=raw
+            )
        )
-    )
    maybe_regenerate_traefik(cfg, results)
    report_results(results)


-def _format_host(host: str | list[str]) -> str:
-    """Format a host value for display."""
-    if isinstance(host, list):
-        return ", ".join(host)
-    return host
-
-
-def _report_pending_migrations(cfg: Config, migrations: list[str]) -> None:
-    """Report services that need migration."""
-    console.print(f"[cyan]Services to migrate ({len(migrations)}):[/]")
-    for svc in migrations:
-        current = get_service_host(cfg, svc)
-        target = cfg.get_hosts(svc)[0]
-        console.print(f"  [cyan]{svc}[/]: [magenta]{current}[/] → [magenta]{target}[/]")
-
-
-def _report_pending_orphans(orphaned: dict[str, str | list[str]]) -> None:
-    """Report orphaned services that will be stopped."""
-    console.print(f"[yellow]Orphaned services to stop ({len(orphaned)}):[/]")
-    for svc, hosts in orphaned.items():
-        console.print(f"  [cyan]{svc}[/] on [magenta]{_format_host(hosts)}[/]")
-
-
-def _report_pending_starts(cfg: Config, missing: list[str]) -> None:
-    """Report services that will be started."""
-    console.print(f"[green]Services to start ({len(missing)}):[/]")
-    for svc in missing:
-        target = _format_host(cfg.get_hosts(svc))
-        console.print(f"  [cyan]{svc}[/] on [magenta]{target}[/]")
-
-
-def _report_pending_refresh(cfg: Config, to_refresh: list[str]) -> None:
-    """Report services that will be refreshed."""
-    console.print(f"[blue]Services to refresh ({len(to_refresh)}):[/]")
-    for svc in to_refresh:
-        target = _format_host(cfg.get_hosts(svc))
-        console.print(f"  [cyan]{svc}[/] on [magenta]{target}[/]")
-
-
@app.command(rich_help_panel="Lifecycle")
-def apply(
+def apply(  # noqa: PLR0912 (multi-phase reconciliation needs these branches)
    dry_run: Annotated[
        bool,
        typer.Option("--dry-run", "-n", help="Show what would change without executing"),
    ] = False,
    no_orphans: Annotated[
        bool,
-        typer.Option("--no-orphans", help="Only migrate, don't stop orphaned services"),
+        typer.Option("--no-orphans", help="Only migrate, don't stop orphaned stacks"),
    ] = False,
    full: Annotated[
        bool,
-        typer.Option("--full", "-f", help="Also run up on all services to apply config changes"),
+        typer.Option("--full", "-f", help="Also run up on all stacks to apply config changes"),
    ] = False,
    config: ConfigOption = None,
 ) -> None:
    """Make reality match config (start, migrate, stop as needed).

-    This is the "reconcile" command that ensures running services match your
+    This is the "reconcile" command that ensures running stacks match your
    config file. It will:

-    1. Stop orphaned services (in state but removed from config)
-    2. Migrate services on wrong host (host in state ≠ host in config)
-    3. Start missing services (in config but not in state)
+    1. Stop orphaned stacks (in state but removed from config)
+    2. Migrate stacks on wrong host (host in state ≠ host in config)
+    3. Start missing stacks (in config but not in state)

    Use --dry-run to preview changes before applying.
-    Use --no-orphans to only migrate/start without stopping orphaned services.
-    Use --full to also run 'up' on all services (picks up compose/env changes).
+    Use --no-orphans to only migrate/start without stopping orphaned stacks.
+    Use --full to also run 'up' on all stacks (picks up compose/env changes).
    """
    cfg = load_config_or_exit(config)
-    orphaned = get_orphaned_services(cfg)
-    migrations = get_services_needing_migration(cfg)
-    missing = get_services_not_in_state(cfg)
+    orphaned = get_orphaned_stacks(cfg)
+    migrations = get_stacks_needing_migration(cfg)
+    missing = get_stacks_not_in_state(cfg)

-    # For --full: refresh all services not already being started/migrated
+    # For --full: refresh all stacks not already being started/migrated
    handled = set(migrations) | set(missing)
-    to_refresh = [svc for svc in cfg.services if svc not in handled] if full else []
+    to_refresh = [stack for stack in cfg.stacks if stack not in handled] if full else []

    has_orphans = bool(orphaned) and not no_orphans
    has_migrations = bool(migrations)
@@ -245,55 +252,122 @@ def apply(
    has_refresh = bool(to_refresh)

    if not has_orphans and not has_migrations and not has_missing and not has_refresh:
-        console.print("[green]✓[/] Nothing to apply - reality matches config")
+        print_success("Nothing to apply - reality matches config")
        return

    # Report what will be done
    if has_orphans:
-        _report_pending_orphans(orphaned)
+        console.print(f"[yellow]Orphaned stacks to stop ({len(orphaned)}):[/]")
+        for svc, hosts in orphaned.items():
+            console.print(f"  [cyan]{svc}[/] on [magenta]{format_host(hosts)}[/]")
    if has_migrations:
-        _report_pending_migrations(cfg, migrations)
+        console.print(f"[cyan]Stacks to migrate ({len(migrations)}):[/]")
+        for stack in migrations:
+            current = get_stack_host(cfg, stack)
+            target = cfg.get_hosts(stack)[0]
+            console.print(f"  [cyan]{stack}[/]: [magenta]{current}[/] → [magenta]{target}[/]")
    if has_missing:
-        _report_pending_starts(cfg, missing)
+        console.print(f"[green]Stacks to start ({len(missing)}):[/]")
+        for stack in missing:
+            console.print(f"  [cyan]{stack}[/] on [magenta]{format_host(cfg.get_hosts(stack))}[/]")
    if has_refresh:
-        _report_pending_refresh(cfg, to_refresh)
+        console.print(f"[blue]Stacks to refresh ({len(to_refresh)}):[/]")
+        for stack in to_refresh:
+            console.print(f"  [cyan]{stack}[/] on [magenta]{format_host(cfg.get_hosts(stack))}[/]")

    if dry_run:
-        console.print("\n[dim](dry-run: no changes made)[/]")
+        console.print(f"\n{MSG_DRY_RUN}")
        return

    # Execute changes
    console.print()
    all_results = []

-    # 1. Stop orphaned services first
+    # 1. Stop orphaned stacks first
    if has_orphans:
-        console.print("[yellow]Stopping orphaned services...[/]")
-        all_results.extend(run_async(stop_orphaned_services(cfg)))
+        console.print("[yellow]Stopping orphaned stacks...[/]")
+        all_results.extend(run_async(stop_orphaned_stacks(cfg)))

-    # 2. Migrate services on wrong host
+    # 2. Migrate stacks on wrong host
    if has_migrations:
-        console.print("[cyan]Migrating services...[/]")
-        migrate_results = run_async(up_services(cfg, migrations, raw=True))
+        console.print("[cyan]Migrating stacks...[/]")
+        migrate_results = run_async(up_stacks(cfg, migrations, raw=True))
        all_results.extend(migrate_results)
        maybe_regenerate_traefik(cfg, migrate_results)

-    # 3. Start missing services (reuse up_services which handles state updates)
+    # 3. Start missing stacks (reuse up_stacks which handles state updates)
    if has_missing:
-        console.print("[green]Starting missing services...[/]")
-        start_results = run_async(up_services(cfg, missing, raw=True))
+        console.print("[green]Starting missing stacks...[/]")
+        start_results = run_async(up_stacks(cfg, missing, raw=True))
        all_results.extend(start_results)
        maybe_regenerate_traefik(cfg, start_results)

-    # 4. Refresh remaining services (--full: run up to apply config changes)
+    # 4. Refresh remaining stacks (--full: run up to apply config changes)
    if has_refresh:
-        console.print("[blue]Refreshing services...[/]")
-        refresh_results = run_async(up_services(cfg, to_refresh, raw=True))
+        console.print("[blue]Refreshing stacks...[/]")
+        refresh_results = run_async(up_stacks(cfg, to_refresh, raw=True))
        all_results.extend(refresh_results)
        maybe_regenerate_traefik(cfg, refresh_results)

    report_results(all_results)


+@app.command(
+    rich_help_panel="Lifecycle",
+    context_settings={"allow_interspersed_args": False},
+)
+def compose(
+    stack: Annotated[str, typer.Argument(help="Stack to operate on (use '.' for current dir)")],
+    command: Annotated[str, typer.Argument(help="Docker compose command")],
+    args: Annotated[list[str] | None, typer.Argument(help="Additional arguments")] = None,
+    host: HostOption = None,
+    config: ConfigOption = None,
+) -> None:
+    """Run any docker compose command on a stack.
+
+    Passthrough to docker compose for commands not wrapped by cf.
+    Options after COMMAND are passed to docker compose, not cf.
+
+    Examples:
+      cf compose mystack --help        - show docker compose help
+      cf compose mystack top           - view running processes
+      cf compose mystack images        - list images
+      cf compose mystack exec web bash - interactive shell
+      cf compose mystack config        - view parsed config
+
+    """
+    cfg = load_config_or_exit(config)
+
+    # Resolve "." to current directory name
+    resolved_stack = Path.cwd().name if stack == "." else stack
+    validate_stacks(cfg, [resolved_stack])
+
+    # Handle multi-host stacks
+    hosts = cfg.get_hosts(resolved_stack)
+    if len(hosts) > 1:
+        if host is None:
+            print_error(
+                f"Stack [cyan]{resolved_stack}[/] runs on multiple hosts: {', '.join(hosts)}\n"
+                f"Use [bold]--host[/] to specify which host"
+            )
+            raise typer.Exit(1)
+        validate_host_for_stack(cfg, resolved_stack, host)
+        target_host = host
+    else:
+        target_host = hosts[0]
+
+    # Build the full compose command
+    full_cmd = command
+    if args:
+        full_cmd += " " + " ".join(args)
+
+    # Run with raw=True for proper TTY handling (progress bars, interactive)
+    result = run_async(run_compose_on_host(cfg, resolved_stack, target_host, full_cmd, raw=True))
+    print()  # Ensure newline after raw output
+
+    if not result.success:
+        raise typer.Exit(result.exit_code)
+
+
 # Alias: cf a = cf apply
 app.command("a", hidden=True)(apply)
--- a/src/compose_farm/cli/management.py
+++ b/src/compose_farm/cli/management.py
@@ -8,7 +8,6 @@ from pathlib import Path  # noqa: TC003
 from typing import TYPE_CHECKING, Annotated

 import typer
-from rich.progress import Progress, TaskID  # noqa: TC002

 from compose_farm.cli.app import app
 from compose_farm.cli.common import (
@@ -16,17 +15,26 @@ from compose_farm.cli.common import (
    AllOption,
    ConfigOption,
    LogPathOption,
-    ServicesArg,
-    get_services,
+    StacksArg,
+    format_host,
+    get_stacks,
    load_config_or_exit,
-    progress_bar,
    run_async,
+    run_parallel_with_progress,
+    validate_hosts,
+    validate_stacks,
 )

 if TYPE_CHECKING:
    from compose_farm.config import Config

-from compose_farm.console import console, err_console
+from compose_farm.console import (
+    MSG_DRY_RUN,
+    console,
+    print_error,
+    print_success,
+    print_warning,
+)
 from compose_farm.executor import (
    CommandResult,
    is_local,
@@ -35,7 +43,7 @@ from compose_farm.executor import (
 from compose_farm.logs import (
    DEFAULT_LOG_PATH,
    SnapshotEntry,
-    collect_service_entries,
+    collect_stack_entries,
    isoformat,
    load_existing_entries,
    merge_entries,
@@ -43,70 +51,48 @@ from compose_farm.logs import (
 )
 from compose_farm.operations import (
    check_host_compatibility,
-    check_service_requirements,
-    discover_service_host,
+    check_stack_requirements,
+    discover_stack_host,
 )
-from compose_farm.state import get_orphaned_services, load_state, save_state
+from compose_farm.state import get_orphaned_stacks, load_state, save_state
 from compose_farm.traefik import generate_traefik_config, render_traefik_config

 # --- Sync helpers ---


-def _discover_services(cfg: Config) -> dict[str, str | list[str]]:
-    """Discover running services with a progress bar."""
-
-    async def gather_with_progress(
-        progress: Progress, task_id: TaskID
-    ) -> dict[str, str | list[str]]:
-        tasks = [asyncio.create_task(discover_service_host(cfg, s)) for s in cfg.services]
-        discovered: dict[str, str | list[str]] = {}
-        for coro in asyncio.as_completed(tasks):
-            service, host = await coro
-            if host is not None:
-                discovered[service] = host
-            progress.update(task_id, advance=1, description=f"[cyan]{service}[/]")
-        return discovered
-
-    with progress_bar("Discovering", len(cfg.services)) as (progress, task_id):
-        return asyncio.run(gather_with_progress(progress, task_id))
+def _discover_stacks(cfg: Config, stacks: list[str] | None = None) -> dict[str, str | list[str]]:
+    """Discover running stacks with a progress bar."""
+    stack_list = stacks if stacks is not None else list(cfg.stacks)
+    results = run_parallel_with_progress(
+        "Discovering",
+        stack_list,
+        lambda s: discover_stack_host(cfg, s),
+    )
+    return {svc: host for svc, host in results if host is not None}


-def _snapshot_services(
+def _snapshot_stacks(
    cfg: Config,
-    services: list[str],
+    stacks: list[str],
    log_path: Path | None,
 ) -> Path:
    """Capture image digests with a progress bar."""
-
-    async def collect_service(service: str, now: datetime) -> list[SnapshotEntry]:
-        try:
-            return await collect_service_entries(cfg, service, now=now)
-        except RuntimeError:
-            return []
-
-    async def gather_with_progress(
-        progress: Progress, task_id: TaskID, now: datetime, svc_list: list[str]
-    ) -> list[SnapshotEntry]:
-        # Map tasks to service names so we can update description
-        task_to_service = {asyncio.create_task(collect_service(s, now)): s for s in svc_list}
-        all_entries: list[SnapshotEntry] = []
-        for coro in asyncio.as_completed(list(task_to_service.keys())):
-            entries = await coro
-            all_entries.extend(entries)
-            # Find which service just completed (by checking done tasks)
-            for t, svc in task_to_service.items():
-                if t.done() and not hasattr(t, "_reported"):
-                    t._reported = True  # type: ignore[attr-defined]
-                    progress.update(task_id, advance=1, description=f"[cyan]{svc}[/]")
-                    break
-        return all_entries
-
    effective_log_path = log_path or DEFAULT_LOG_PATH
    now_dt = datetime.now(UTC)
    now_iso = isoformat(now_dt)

-    with progress_bar("Capturing", len(services)) as (progress, task_id):
-        snapshot_entries = asyncio.run(gather_with_progress(progress, task_id, now_dt, services))
+    async def collect_stack(stack: str) -> tuple[str, list[SnapshotEntry]]:
+        try:
+            return stack, await collect_stack_entries(cfg, stack, now=now_dt)
+        except RuntimeError:
+            return stack, []
+
+    results = run_parallel_with_progress(
+        "Capturing",
+        stacks,
+        collect_stack,
+    )
+    snapshot_entries = [entry for _, entries in results for entry in entries]

    if not snapshot_entries:
        msg = "No image digests were captured"
@@ -119,11 +105,16 @@ def _snapshot_services(
    return effective_log_path


-def _format_host(host: str | list[str]) -> str:
-    """Format a host value for display."""
-    if isinstance(host, list):
-        return ", ".join(host)
-    return host
+def _merge_state(
+    current_state: dict[str, str | list[str]],
+    discovered: dict[str, str | list[str]],
+    removed: list[str],
+) -> dict[str, str | list[str]]:
+    """Merge discovered stacks into existing state for partial refresh."""
+    new_state = {**current_state, **discovered}
+    for svc in removed:
+        new_state.pop(svc, None)
+    return new_state


 def _report_sync_changes(
@@ -135,25 +126,25 @@ def _report_sync_changes(
 ) -> None:
    """Report sync changes to the user."""
    if added:
-        console.print(f"\nNew services found ({len(added)}):")
-        for service in sorted(added):
-            host_str = _format_host(discovered[service])
-            console.print(f"  [green]+[/] [cyan]{service}[/] on [magenta]{host_str}[/]")
+        console.print(f"\nNew stacks found ({len(added)}):")
+        for stack in sorted(added):
+            host_str = format_host(discovered[stack])
+            console.print(f"  [green]+[/] [cyan]{stack}[/] on [magenta]{host_str}[/]")

    if changed:
-        console.print(f"\nServices on different hosts ({len(changed)}):")
-        for service, old_host, new_host in sorted(changed):
-            old_str = _format_host(old_host)
-            new_str = _format_host(new_host)
+        console.print(f"\nStacks on different hosts ({len(changed)}):")
+        for stack, old_host, new_host in sorted(changed):
+            old_str = format_host(old_host)
+            new_str = format_host(new_host)
            console.print(
-                f"  [yellow]~[/] [cyan]{service}[/]: [magenta]{old_str}[/] → [magenta]{new_str}[/]"
+                f"  [yellow]~[/] [cyan]{stack}[/]: [magenta]{old_str}[/] → [magenta]{new_str}[/]"
            )

    if removed:
-        console.print(f"\nServices no longer running ({len(removed)}):")
-        for service in sorted(removed):
-            host_str = _format_host(current_state[service])
-            console.print(f"  [red]-[/] [cyan]{service}[/] (was on [magenta]{host_str}[/])")
+        console.print(f"\nStacks no longer running ({len(removed)}):")
+        for stack in sorted(removed):
+            host_str = format_host(current_state[stack])
+            console.print(f"  [red]-[/] [cyan]{stack}[/] (was on [magenta]{host_str}[/])")


 # --- Check helpers ---
@@ -171,83 +162,77 @@ def _check_ssh_connectivity(cfg: Config) -> list[str]:

    async def check_host(host_name: str) -> tuple[str, bool]:
        host = cfg.hosts[host_name]
-        result = await run_command(host, "echo ok", host_name, stream=False)
-        return host_name, result.success
+        try:
+            result = await asyncio.wait_for(
+                run_command(host, "echo ok", host_name, stream=False),
+                timeout=5.0,
+            )
+            return host_name, result.success
+        except TimeoutError:
+            return host_name, False

-    async def gather_with_progress(progress: Progress, task_id: TaskID) -> list[str]:
-        tasks = [asyncio.create_task(check_host(h)) for h in remote_hosts]
-        unreachable: list[str] = []
-        for coro in asyncio.as_completed(tasks):
-            host_name, success = await coro
-            if not success:
-                unreachable.append(host_name)
-            progress.update(task_id, advance=1, description=f"[cyan]{host_name}[/]")
-        return unreachable
-
-    with progress_bar("Checking SSH connectivity", len(remote_hosts)) as (progress, task_id):
-        return asyncio.run(gather_with_progress(progress, task_id))
+    results = run_parallel_with_progress(
+        "Checking SSH connectivity",
+        remote_hosts,
+        check_host,
+    )
+    return [host for host, success in results if not success]


-def _check_service_requirements(
+def _check_stack_requirements(
    cfg: Config,
-    services: list[str],
+    stacks: list[str],
 ) -> tuple[list[tuple[str, str, str]], list[tuple[str, str, str]], list[tuple[str, str, str]]]:
-    """Check mounts, networks, and devices for all services with a progress bar.
+    """Check mounts, networks, and devices for all stacks with a progress bar.

    Returns (mount_errors, network_errors, device_errors) where each is a list of
-    (service, host, missing_item) tuples.
+    (stack, host, missing_item) tuples.
    """

-    async def check_service(
-        service: str,
+    async def check_stack(
+        stack: str,
    ) -> tuple[
        str,
        list[tuple[str, str, str]],
        list[tuple[str, str, str]],
        list[tuple[str, str, str]],
    ]:
-        """Check requirements for a single service on all its hosts."""
-        host_names = cfg.get_hosts(service)
+        """Check requirements for a single stack on all its hosts."""
+        host_names = cfg.get_hosts(stack)
        mount_errors: list[tuple[str, str, str]] = []
        network_errors: list[tuple[str, str, str]] = []
        device_errors: list[tuple[str, str, str]] = []

        for host_name in host_names:
-            missing_paths, missing_nets, missing_devs = await check_service_requirements(
-                cfg, service, host_name
+            missing_paths, missing_nets, missing_devs = await check_stack_requirements(
+                cfg, stack, host_name
            )
-            mount_errors.extend((service, host_name, p) for p in missing_paths)
-            network_errors.extend((service, host_name, n) for n in missing_nets)
-            device_errors.extend((service, host_name, d) for d in missing_devs)
+            mount_errors.extend((stack, host_name, p) for p in missing_paths)
+            network_errors.extend((stack, host_name, n) for n in missing_nets)
+            device_errors.extend((stack, host_name, d) for d in missing_devs)

-        return service, mount_errors, network_errors, device_errors
+        return stack, mount_errors, network_errors, device_errors

-    async def gather_with_progress(
-        progress: Progress, task_id: TaskID
-    ) -> tuple[list[tuple[str, str, str]], list[tuple[str, str, str]], list[tuple[str, str, str]]]:
-        tasks = [asyncio.create_task(check_service(s)) for s in services]
-        all_mount_errors: list[tuple[str, str, str]] = []
-        all_network_errors: list[tuple[str, str, str]] = []
-        all_device_errors: list[tuple[str, str, str]] = []
+    results = run_parallel_with_progress(
+        "Checking requirements",
+        stacks,
+        check_stack,
+    )

-        for coro in asyncio.as_completed(tasks):
-            service, mount_errs, net_errs, dev_errs = await coro
-            all_mount_errors.extend(mount_errs)
-            all_network_errors.extend(net_errs)
-            all_device_errors.extend(dev_errs)
-            progress.update(task_id, advance=1, description=f"[cyan]{service}[/]")
+    all_mount_errors: list[tuple[str, str, str]] = []
+    all_network_errors: list[tuple[str, str, str]] = []
+    all_device_errors: list[tuple[str, str, str]] = []
+    for _, mount_errs, net_errs, dev_errs in results:
+        all_mount_errors.extend(mount_errs)
+        all_network_errors.extend(net_errs)
+        all_device_errors.extend(dev_errs)

-        return all_mount_errors, all_network_errors, all_device_errors
-
-    with progress_bar(
-        "Checking requirements", len(services), initial_description="[dim]checking...[/]"
-    ) as (progress, task_id):
-        return asyncio.run(gather_with_progress(progress, task_id))
+    return all_mount_errors, all_network_errors, all_device_errors


 def _report_config_status(cfg: Config) -> bool:
    """Check and report config vs disk status. Returns True if errors found."""
-    configured = set(cfg.services.keys())
+    configured = set(cfg.stacks.keys())
    on_disk = cfg.discover_compose_dirs()
    unmanaged = sorted(on_disk - configured)
    missing_from_disk = sorted(configured - on_disk)
@@ -263,86 +248,55 @@ def _report_config_status(cfg: Config) -> bool:
            console.print(f"  [red]-[/] [cyan]{name}[/]")

    if not unmanaged and not missing_from_disk:
-        console.print("[green]✓[/] Config matches disk")
+        print_success("Config matches disk")

    return bool(missing_from_disk)


-def _report_orphaned_services(cfg: Config) -> bool:
-    """Check for services in state but not in config. Returns True if orphans found."""
-    orphaned = get_orphaned_services(cfg)
+def _report_orphaned_stacks(cfg: Config) -> bool:
+    """Check for stacks in state but not in config. Returns True if orphans found."""
+    orphaned = get_orphaned_stacks(cfg)

    if orphaned:
-        console.print("\n[yellow]Orphaned services[/] (in state but not in config):")
+        console.print("\n[yellow]Orphaned stacks[/] (in state but not in config):")
        console.print(
-            "[dim]Run 'cf apply' to stop them, or 'cf down --orphaned' for just orphans.[/]"
+            "[dim]Run [bold]cf apply[/bold] to stop them, or [bold]cf down --orphaned[/bold] for just orphans.[/]"
        )
        for name, hosts in sorted(orphaned.items()):
-            host_str = ", ".join(hosts) if isinstance(hosts, list) else hosts
-            console.print(f"  [yellow]![/] [cyan]{name}[/] on [magenta]{host_str}[/]")
+            console.print(f"  [yellow]![/] [cyan]{name}[/] on [magenta]{format_host(hosts)}[/]")
        return True

    return False


-def _report_traefik_status(cfg: Config, services: list[str]) -> None:
+def _report_traefik_status(cfg: Config, stacks: list[str]) -> None:
    """Check and report traefik label status."""
    try:
-        _, warnings = generate_traefik_config(cfg, services, check_all=True)
+        _, warnings = generate_traefik_config(cfg, stacks, check_all=True)
    except (FileNotFoundError, ValueError):
        return

    if warnings:
        console.print(f"\n[yellow]Traefik issues[/] ({len(warnings)}):")
        for warning in warnings:
-            console.print(f"  [yellow]![/] {warning}")
+            print_warning(warning)
    else:
-        console.print("[green]✓[/] Traefik labels valid")
+        print_success("Traefik labels valid")


-def _report_mount_errors(mount_errors: list[tuple[str, str, str]]) -> None:
-    """Report mount errors grouped by service."""
-    by_service: dict[str, list[tuple[str, str]]] = {}
-    for svc, host, path in mount_errors:
-        by_service.setdefault(svc, []).append((host, path))
+def _report_requirement_errors(errors: list[tuple[str, str, str]], category: str) -> None:
+    """Report requirement errors (mounts, networks, devices) grouped by stack."""
+    by_stack: dict[str, list[tuple[str, str]]] = {}
+    for stack, host, item in errors:
+        by_stack.setdefault(stack, []).append((host, item))

-    console.print(f"[red]Missing mounts[/] ({len(mount_errors)}):")
-    for svc, items in sorted(by_service.items()):
+    console.print(f"[red]Missing {category}[/] ({len(errors)}):")
+    for stack, items in sorted(by_stack.items()):
        host = items[0][0]
-        paths = [p for _, p in items]
-        console.print(f"  [cyan]{svc}[/] on [magenta]{host}[/]:")
-        for path in paths:
-            console.print(f"    [red]✗[/] {path}")
-
-
-def _report_network_errors(network_errors: list[tuple[str, str, str]]) -> None:
-    """Report network errors grouped by service."""
-    by_service: dict[str, list[tuple[str, str]]] = {}
-    for svc, host, net in network_errors:
-        by_service.setdefault(svc, []).append((host, net))
-
-    console.print(f"[red]Missing networks[/] ({len(network_errors)}):")
-    for svc, items in sorted(by_service.items()):
-        host = items[0][0]
-        networks = [n for _, n in items]
-        console.print(f"  [cyan]{svc}[/] on [magenta]{host}[/]:")
-        for net in networks:
-            console.print(f"    [red]✗[/] {net}")
-
-
-def _report_device_errors(device_errors: list[tuple[str, str, str]]) -> None:
-    """Report device errors grouped by service."""
-    by_service: dict[str, list[tuple[str, str]]] = {}
-    for svc, host, dev in device_errors:
-        by_service.setdefault(svc, []).append((host, dev))
-
-    console.print(f"[red]Missing devices[/] ({len(device_errors)}):")
-    for svc, items in sorted(by_service.items()):
-        host = items[0][0]
-        devices = [d for _, d in items]
-        console.print(f"  [cyan]{svc}[/] on [magenta]{host}[/]:")
-        for dev in devices:
-            console.print(f"    [red]✗[/] {dev}")
+        missing = [i for _, i in items]
+        console.print(f"  [cyan]{stack}[/] on [magenta]{host}[/]:")
+        for item in missing:
+            console.print(f"    [red]✗[/] {item}")


 def _report_ssh_status(unreachable_hosts: list[str]) -> bool:
@@ -350,9 +304,9 @@ def _report_ssh_status(unreachable_hosts: list[str]) -> bool:
    if unreachable_hosts:
        console.print(f"[red]Unreachable hosts[/] ({len(unreachable_hosts)}):")
        for host in sorted(unreachable_hosts):
-            console.print(f"  [red]✗[/] [magenta]{host}[/]")
+            print_error(f"[magenta]{host}[/]")
        return True
-    console.print("[green]✓[/] All hosts reachable")
+    print_success("All hosts reachable")
    return False


@@ -360,7 +314,7 @@ def _report_host_compatibility(
    compat: dict[str, tuple[int, int, list[str]]],
    assigned_hosts: list[str],
 ) -> None:
-    """Report host compatibility for a service."""
+    """Report host compatibility for a stack."""
    for host_name, (found, total, missing) in sorted(compat.items()):
        is_assigned = host_name in assigned_hosts
        marker = " [dim](assigned)[/]" if is_assigned else ""
@@ -391,25 +345,25 @@ def _run_remote_checks(cfg: Config, svc_list: list[str], *, show_host_compat: bo
    console.print()  # Spacing before mounts/networks check

    # Check mounts, networks, and devices
-    mount_errors, network_errors, device_errors = _check_service_requirements(cfg, svc_list)
+    mount_errors, network_errors, device_errors = _check_stack_requirements(cfg, svc_list)

    if mount_errors:
-        _report_mount_errors(mount_errors)
+        _report_requirement_errors(mount_errors, "mounts")
        has_errors = True
    if network_errors:
-        _report_network_errors(network_errors)
+        _report_requirement_errors(network_errors, "networks")
        has_errors = True
    if device_errors:
-        _report_device_errors(device_errors)
+        _report_requirement_errors(device_errors, "devices")
        has_errors = True
    if not mount_errors and not network_errors and not device_errors:
-        console.print("[green]✓[/] All mounts, networks, and devices exist")
+        print_success("All mounts, networks, and devices exist")

    if show_host_compat:
-        for service in svc_list:
-            console.print(f"\n[bold]Host compatibility for[/] [cyan]{service}[/]:")
-            compat = run_async(check_host_compatibility(cfg, service))
-            assigned_hosts = cfg.get_hosts(service)
+        for stack in svc_list:
+            console.print(f"\n[bold]Host compatibility for[/] [cyan]{stack}[/]:")
+            compat = run_async(check_host_compatibility(cfg, stack))
+            assigned_hosts = cfg.get_hosts(stack)
            _report_host_compatibility(compat, assigned_hosts)

    return has_errors
@@ -423,8 +377,8 @@ _DEFAULT_NETWORK_GATEWAY = "172.20.0.1"

@app.command("traefik-file", rich_help_panel="Configuration")
 def traefik_file(
-    services: ServicesArg = None,
-    all_services: AllOption = False,
+    stacks: StacksArg = None,
+    all_stacks: AllOption = False,
    output: Annotated[
        Path | None,
        typer.Option(
@@ -436,11 +390,11 @@ def traefik_file(
    config: ConfigOption = None,
 ) -> None:
    """Generate a Traefik file-provider fragment from compose Traefik labels."""
-    svc_list, cfg = get_services(services or [], all_services, config)
+    stack_list, cfg = get_stacks(stacks or [], all_stacks, config)
    try:
-        dynamic, warnings = generate_traefik_config(cfg, svc_list)
+        dynamic, warnings = generate_traefik_config(cfg, stack_list)
    except (FileNotFoundError, ValueError) as exc:
-        err_console.print(f"[red]✗[/] {exc}")
+        print_error(str(exc))
        raise typer.Exit(1) from exc

    rendered = render_traefik_config(dynamic)
@@ -448,16 +402,18 @@ def traefik_file(
    if output:
        output.parent.mkdir(parents=True, exist_ok=True)
        output.write_text(rendered)
-        console.print(f"[green]✓[/] Traefik config written to {output}")
+        print_success(f"Traefik config written to {output}")
    else:
        console.print(rendered)

    for warning in warnings:
-        err_console.print(f"[yellow]![/] {warning}")
+        print_warning(warning)


@app.command(rich_help_panel="Configuration")
 def refresh(
+    stacks: StacksArg = None,
+    all_stacks: AllOption = False,
    config: ConfigOption = None,
    log_path: LogPathOption = None,
    dry_run: Annotated[
@@ -465,22 +421,35 @@ def refresh(
        typer.Option("--dry-run", "-n", help="Show what would change without writing"),
    ] = False,
 ) -> None:
-    """Update local state from running services.
+    """Update local state from running stacks.

-    Discovers which services are running on which hosts, updates the state
+    Discovers which stacks are running on which hosts, updates the state
    file, and captures image digests. This is a read operation - it updates
    your local state to match reality, not the other way around.

+    Without arguments: refreshes all stacks (same as --all).
+    With stack names: refreshes only those stacks.
+
    Use 'cf apply' to make reality match your config (stop orphans, migrate).
    """
-    cfg = load_config_or_exit(config)
+    stack_list, cfg = get_stacks(stacks or [], all_stacks, config, default_all=True)
+
+    # Partial refresh merges with existing state; full refresh replaces it
+    # Partial = specific stacks provided (not --all, not default)
+    partial_refresh = bool(stacks) and not all_stacks
+
    current_state = load_state(cfg)

-    discovered = _discover_services(cfg)
+    discovered = _discover_stacks(cfg, stack_list)

-    # Calculate changes
+    # Calculate changes (only for the stacks we're refreshing)
    added = [s for s in discovered if s not in current_state]
-    removed = [s for s in current_state if s not in discovered]
+    # Only mark as "removed" if we're doing a full refresh
+    if partial_refresh:
+        # In partial refresh, a stack not running is just "not found"
+        removed = [s for s in stack_list if s in current_state and s not in discovered]
+    else:
+        removed = [s for s in current_state if s not in discovered]
    changed = [
        (s, current_state[s], discovered[s])
        for s in discovered
@@ -492,29 +461,32 @@ def refresh(
    if state_changed:
        _report_sync_changes(added, removed, changed, discovered, current_state)
    else:
-        console.print("[green]✓[/] State is already in sync.")
+        print_success("State is already in sync.")

    if dry_run:
-        console.print("\n[dim](dry-run: no changes made)[/]")
+        console.print(f"\n{MSG_DRY_RUN}")
        return

    # Update state file
    if state_changed:
-        save_state(cfg, discovered)
-        console.print(f"\n[green]✓[/] State updated: {len(discovered)} services tracked.")
+        new_state = (
+            _merge_state(current_state, discovered, removed) if partial_refresh else discovered
+        )
+        save_state(cfg, new_state)
+        print_success(f"State updated: {len(new_state)} stacks tracked.")

-    # Capture image digests for running services
+    # Capture image digests for running stacks
    if discovered:
        try:
-            path = _snapshot_services(cfg, list(discovered.keys()), log_path)
-            console.print(f"[green]✓[/] Digests written to {path}")
+            path = _snapshot_stacks(cfg, list(discovered.keys()), log_path)
+            print_success(f"Digests written to {path}")
        except RuntimeError as exc:
-            err_console.print(f"[yellow]![/] {exc}")
+            print_warning(str(exc))


@app.command(rich_help_panel="Configuration")
 def check(
-    services: ServicesArg = None,
+    stacks: StacksArg = None,
    local: Annotated[
        bool,
        typer.Option("--local", help="Skip SSH-based checks (faster)"),
@@ -523,35 +495,31 @@ def check(
 ) -> None:
    """Validate configuration, traefik labels, mounts, and networks.

-    Without arguments: validates all services against configured hosts.
-    With service arguments: validates specific services and shows host compatibility.
+    Without arguments: validates all stacks against configured hosts.
+    With stack arguments: validates specific stacks and shows host compatibility.

    Use --local to skip SSH-based checks for faster validation.
    """
    cfg = load_config_or_exit(config)

-    # Determine which services to check and whether to show host compatibility
-    if services:
-        svc_list = list(services)
-        invalid = [s for s in svc_list if s not in cfg.services]
-        if invalid:
-            for svc in invalid:
-                err_console.print(f"[red]✗[/] Service '{svc}' not found in config")
-            raise typer.Exit(1)
+    # Determine which stacks to check and whether to show host compatibility
+    if stacks:
+        stack_list = list(stacks)
+        validate_stacks(cfg, stack_list)
        show_host_compat = True
    else:
-        svc_list = list(cfg.services.keys())
+        stack_list = list(cfg.stacks.keys())
        show_host_compat = False

    # Run checks
    has_errors = _report_config_status(cfg)
-    _report_traefik_status(cfg, svc_list)
+    _report_traefik_status(cfg, stack_list)

-    if not local and _run_remote_checks(cfg, svc_list, show_host_compat=show_host_compat):
+    if not local and _run_remote_checks(cfg, stack_list, show_host_compat=show_host_compat):
        has_errors = True

-    # Check for orphaned services (in state but removed from config)
-    if _report_orphaned_services(cfg):
+    # Check for orphaned stacks (in state but removed from config)
+    if _report_orphaned_stacks(cfg):
        has_errors = True

    if has_errors:
@@ -580,18 +548,14 @@ def init_network(
 ) -> None:
    """Create Docker network on hosts with consistent settings.

-    Creates an external Docker network that services can use for cross-host
+    Creates an external Docker network that stacks can use for cross-host
    communication. Uses the same subnet/gateway on all hosts to ensure
    consistent networking.
    """
    cfg = load_config_or_exit(config)

    target_hosts = list(hosts) if hosts else list(cfg.hosts.keys())
-    invalid = [h for h in target_hosts if h not in cfg.hosts]
-    if invalid:
-        for h in invalid:
-            err_console.print(f"[red]✗[/] Host '{h}' not found in config")
-        raise typer.Exit(1)
+    validate_hosts(cfg, target_hosts)

    async def create_network_on_host(host_name: str) -> CommandResult:
        host = cfg.hosts[host_name]
@@ -601,7 +565,7 @@ def init_network(

        if check_result.success:
            console.print(f"[cyan]\\[{host_name}][/] Network '{network}' already exists")
-            return CommandResult(service=host_name, exit_code=0, success=True)
+            return CommandResult(stack=host_name, exit_code=0, success=True)

        # Create the network
        create_cmd = (
@@ -616,9 +580,8 @@ def init_network(
        if result.success:
            console.print(f"[cyan]\\[{host_name}][/] [green]✓[/] Created network '{network}'")
        else:
-            err_console.print(
-                f"[cyan]\\[{host_name}][/] [red]✗[/] Failed to create network: "
-                f"{result.stderr.strip()}"
+            print_error(
+                f"[cyan]\\[{host_name}][/] Failed to create network: {result.stderr.strip()}"
            )

        return result
--- a/src/compose_farm/cli/monitoring.py
+++ b/src/compose_farm/cli/monitoring.py
@@ -2,12 +2,10 @@

 from __future__ import annotations

-import asyncio
 import contextlib
 from typing import TYPE_CHECKING, Annotated

 import typer
-from rich.progress import Progress, TaskID  # noqa: TC002
 from rich.table import Table

 from compose_farm.cli.app import app
@@ -16,50 +14,22 @@ from compose_farm.cli.common import (
    AllOption,
    ConfigOption,
    HostOption,
-    ServicesArg,
-    get_services,
+    ServiceOption,
+    StacksArg,
+    get_stacks,
    load_config_or_exit,
-    progress_bar,
    report_results,
    run_async,
+    run_parallel_with_progress,
 )
-from compose_farm.console import console, err_console
-from compose_farm.executor import run_command, run_on_services
-from compose_farm.state import get_services_needing_migration, load_state
+from compose_farm.console import console, print_error
+from compose_farm.executor import run_command, run_on_stacks
+from compose_farm.state import get_stacks_needing_migration, group_stacks_by_host, load_state

 if TYPE_CHECKING:
-    from collections.abc import Mapping
-
    from compose_farm.config import Config


-def _group_services_by_host(
-    services: dict[str, str | list[str]],
-    hosts: Mapping[str, object],
-    all_hosts: list[str] | None = None,
-) -> dict[str, list[str]]:
-    """Group services by their assigned host(s).
-
-    For multi-host services (list or "all"), the service appears in multiple host lists.
-    """
-    by_host: dict[str, list[str]] = {h: [] for h in hosts}
-    for service, host_value in services.items():
-        if isinstance(host_value, list):
-            # Explicit list of hosts
-            for host_name in host_value:
-                if host_name in by_host:
-                    by_host[host_name].append(service)
-        elif host_value == "all" and all_hosts:
-            # "all" keyword - add to all hosts
-            for host_name in all_hosts:
-                if host_name in by_host:
-                    by_host[host_name].append(service)
-        elif host_value in by_host:
-            # Single host
-            by_host[host_value].append(service)
-    return by_host
-
-
 def _get_container_counts(cfg: Config) -> dict[str, int]:
    """Get container counts from all hosts with a progress bar."""

@@ -72,23 +42,17 @@ def _get_container_counts(cfg: Config) -> dict[str, int]:
                count = int(result.stdout.strip())
        return host_name, count

-    async def gather_with_progress(progress: Progress, task_id: TaskID) -> dict[str, int]:
-        hosts = list(cfg.hosts.keys())
-        tasks = [asyncio.create_task(get_count(h)) for h in hosts]
-        results: dict[str, int] = {}
-        for coro in asyncio.as_completed(tasks):
-            host_name, count = await coro
-            results[host_name] = count
-            progress.update(task_id, advance=1, description=f"[cyan]{host_name}[/]")
-        return results
-
-    with progress_bar("Querying hosts", len(cfg.hosts)) as (progress, task_id):
-        return asyncio.run(gather_with_progress(progress, task_id))
+    results = run_parallel_with_progress(
+        "Querying hosts",
+        list(cfg.hosts.keys()),
+        get_count,
+    )
+    return dict(results)


 def _build_host_table(
    cfg: Config,
-    services_by_host: dict[str, list[str]],
+    stacks_by_host: dict[str, list[str]],
    running_by_host: dict[str, list[str]],
    container_counts: dict[str, int],
    *,
@@ -105,7 +69,7 @@ def _build_host_table(

    for host_name in sorted(cfg.hosts.keys()):
        host = cfg.hosts[host_name]
-        configured = len(services_by_host[host_name])
+        configured = len(stacks_by_host[host_name])
        running = len(running_by_host[host_name])

        row = [
@@ -133,8 +97,8 @@ def _build_summary_table(
    table.add_column("Value", style="bold")

    table.add_row("Total hosts", str(len(cfg.hosts)))
-    table.add_row("Services (configured)", str(len(cfg.services)))
-    table.add_row("Services (tracked)", str(len(state)))
+    table.add_row("Stacks (configured)", str(len(cfg.stacks)))
+    table.add_row("Stacks (tracked)", str(len(state)))
    table.add_row("Compose files on disk", str(len(on_disk)))

    if pending:
@@ -152,9 +116,10 @@ def _build_summary_table(

@app.command(rich_help_panel="Monitoring")
 def logs(
-    services: ServicesArg = None,
-    all_services: AllOption = False,
+    stacks: StacksArg = None,
+    all_stacks: AllOption = False,
    host: HostOption = None,
+    service: ServiceOption = None,
    follow: Annotated[bool, typer.Option("--follow", "-f", help="Follow logs")] = False,
    tail: Annotated[
        int | None,
@@ -162,43 +127,45 @@ def logs(
    ] = None,
    config: ConfigOption = None,
 ) -> None:
-    """Show service logs."""
-    if all_services and host is not None:
-        err_console.print("[red]✗[/] Cannot use --all and --host together")
+    """Show stack logs. With --service, shows logs for just that service."""
+    stack_list, cfg = get_stacks(stacks or [], all_stacks, config, host=host)
+    if service and len(stack_list) != 1:
+        print_error("--service requires exactly one stack")
        raise typer.Exit(1)

-    cfg = load_config_or_exit(config)
-
-    # Determine service list based on options
-    if host is not None:
-        if host not in cfg.hosts:
-            err_console.print(f"[red]✗[/] Host '{host}' not found in config")
-            raise typer.Exit(1)
-        # Include services where host is in the list of configured hosts
-        svc_list = [s for s in cfg.services if host in cfg.get_hosts(s)]
-        if not svc_list:
-            err_console.print(f"[yellow]![/] No services configured for host '{host}'")
-            return
-    else:
-        svc_list, cfg = get_services(services or [], all_services, config)
-
-    # Default to fewer lines when showing multiple services
-    many_services = all_services or host is not None or len(svc_list) > 1
-    effective_tail = tail if tail is not None else (20 if many_services else 100)
+    # Default to fewer lines when showing multiple stacks
+    many_stacks = all_stacks or host is not None or len(stack_list) > 1
+    effective_tail = tail if tail is not None else (20 if many_stacks else 100)
    cmd = f"logs --tail {effective_tail}"
    if follow:
        cmd += " -f"
-    results = run_async(run_on_services(cfg, svc_list, cmd))
+    if service:
+        cmd += f" {service}"
+    results = run_async(run_on_stacks(cfg, stack_list, cmd))
    report_results(results)


@app.command(rich_help_panel="Monitoring")
 def ps(
+    stacks: StacksArg = None,
+    all_stacks: AllOption = False,
+    host: HostOption = None,
+    service: ServiceOption = None,
    config: ConfigOption = None,
 ) -> None:
-    """Show status of all services."""
-    cfg = load_config_or_exit(config)
-    results = run_async(run_on_services(cfg, list(cfg.services.keys()), "ps"))
+    """Show status of stacks.
+
+    Without arguments: shows all stacks (same as --all).
+    With stack names: shows only those stacks.
+    With --host: shows stacks on that host.
+    With --service: filters to a specific service within the stack.
+    """
+    stack_list, cfg = get_stacks(stacks or [], all_stacks, config, host=host, default_all=True)
+    if service and len(stack_list) != 1:
+        print_error("--service requires exactly one stack")
+        raise typer.Exit(1)
+    cmd = f"ps {service}" if service else "ps"
+    results = run_async(run_on_stacks(cfg, stack_list, cmd))
    report_results(results)


@@ -210,25 +177,25 @@ def stats(
    ] = False,
    config: ConfigOption = None,
 ) -> None:
-    """Show overview statistics for hosts and services.
+    """Show overview statistics for hosts and stacks.

-    Without --live: Shows config/state info (hosts, services, pending migrations).
+    Without --live: Shows config/state info (hosts, stacks, pending migrations).
    With --live: Also queries Docker on each host for container counts.
    """
    cfg = load_config_or_exit(config)
    state = load_state(cfg)
-    pending = get_services_needing_migration(cfg)
+    pending = get_stacks_needing_migration(cfg)

    all_hosts = list(cfg.hosts.keys())
-    services_by_host = _group_services_by_host(cfg.services, cfg.hosts, all_hosts)
-    running_by_host = _group_services_by_host(state, cfg.hosts, all_hosts)
+    stacks_by_host = group_stacks_by_host(cfg.stacks, cfg.hosts, all_hosts)
+    running_by_host = group_stacks_by_host(state, cfg.hosts, all_hosts)

    container_counts: dict[str, int] = {}
    if live:
        container_counts = _get_container_counts(cfg)

    host_table = _build_host_table(
-        cfg, services_by_host, running_by_host, container_counts, show_containers=live
+        cfg, stacks_by_host, running_by_host, container_counts, show_containers=live
    )
    console.print(host_table)

--- a/src/compose_farm/cli/ssh.py
+++ b/src/compose_farm/cli/ssh.py
@@ -0,0 +1,282 @@
+"""SSH key management commands for compose-farm."""
+
+from __future__ import annotations
+
+import asyncio
+import subprocess
+from typing import TYPE_CHECKING, Annotated
+
+import typer
+
+from compose_farm.cli.app import app
+from compose_farm.cli.common import ConfigOption, load_config_or_exit, run_parallel_with_progress
+from compose_farm.console import console, err_console
+from compose_farm.executor import run_command
+
+if TYPE_CHECKING:
+    from compose_farm.config import Host
+
+from compose_farm.ssh_keys import (
+    SSH_KEY_PATH,
+    SSH_PUBKEY_PATH,
+    get_pubkey_content,
+    get_ssh_env,
+    key_exists,
+)
+
+_DEFAULT_SSH_PORT = 22
+_PUBKEY_DISPLAY_THRESHOLD = 60
+
+ssh_app = typer.Typer(
+    name="ssh",
+    help="Manage SSH keys for passwordless authentication.",
+    no_args_is_help=True,
+)
+
+_ForceOption = Annotated[
+    bool,
+    typer.Option("--force", "-f", help="Regenerate key even if it exists."),
+]
+
+
+def _generate_key(*, force: bool = False) -> bool:
+    """Generate an ED25519 SSH key with no passphrase.
+
+    Returns True if key was generated, False if skipped.
+    """
+    if key_exists() and not force:
+        console.print(f"[yellow]![/] SSH key already exists: {SSH_KEY_PATH}")
+        console.print("[dim]Use --force to regenerate[/]")
+        return False
+
+    # Create .ssh directory if it doesn't exist
+    SSH_KEY_PATH.parent.mkdir(parents=True, exist_ok=True, mode=0o700)
+
+    # Remove existing key if forcing regeneration
+    if force:
+        SSH_KEY_PATH.unlink(missing_ok=True)
+        SSH_PUBKEY_PATH.unlink(missing_ok=True)
+
+    console.print(f"[dim]Generating SSH key at {SSH_KEY_PATH}...[/]")
+
+    try:
+        subprocess.run(
+            [  # noqa: S607
+                "ssh-keygen",
+                "-t",
+                "ed25519",
+                "-N",
+                "",  # No passphrase
+                "-f",
+                str(SSH_KEY_PATH),
+                "-C",
+                "compose-farm",
+            ],
+            check=True,
+            capture_output=True,
+        )
+    except subprocess.CalledProcessError as e:
+        err_console.print(f"[red]Failed to generate SSH key:[/] {e.stderr.decode()}")
+        return False
+    except FileNotFoundError:
+        err_console.print("[red]ssh-keygen not found. Is OpenSSH installed?[/]")
+        return False
+
+    # Set correct permissions
+    SSH_KEY_PATH.chmod(0o600)
+    SSH_PUBKEY_PATH.chmod(0o644)
+
+    console.print(f"[green]Generated SSH key:[/] {SSH_KEY_PATH}")
+    return True
+
+
+def _copy_key_to_host(host_name: str, address: str, user: str, port: int) -> bool:
+    """Copy public key to a host's authorized_keys.
+
+    Uses ssh-copy-id which handles agent vs password fallback automatically.
+    Returns True on success, False on failure.
+    """
+    target = f"{user}@{address}"
+    console.print(f"[dim]Copying key to {host_name} ({target})...[/]")
+
+    cmd = ["ssh-copy-id"]
+
+    # Disable strict host key checking (consistent with executor.py)
+    cmd.extend(["-o", "StrictHostKeyChecking=no"])
+    cmd.extend(["-o", "UserKnownHostsFile=/dev/null"])
+
+    if port != _DEFAULT_SSH_PORT:
+        cmd.extend(["-p", str(port)])
+
+    cmd.extend(["-i", str(SSH_PUBKEY_PATH), target])
+
+    try:
+        # Don't capture output so user can see password prompt
+        result = subprocess.run(cmd, check=False, env=get_ssh_env())
+        if result.returncode == 0:
+            console.print(f"[green]Key copied to {host_name}[/]")
+            return True
+        err_console.print(f"[red]Failed to copy key to {host_name}[/]")
+        return False
+    except FileNotFoundError:
+        err_console.print("[red]ssh-copy-id not found. Is OpenSSH installed?[/]")
+        return False
+
+
+@ssh_app.command("keygen")
+def ssh_keygen(
+    force: _ForceOption = False,
+) -> None:
+    """Generate SSH key (does not distribute to hosts).
+
+    Creates an ED25519 key at ~/.ssh/compose-farm/id_ed25519 with no passphrase.
+    Use 'cf ssh setup' to also distribute the key to all configured hosts.
+    """
+    success = _generate_key(force=force)
+    if not success and not key_exists():
+        raise typer.Exit(1)
+
+
+@ssh_app.command("setup")
+def ssh_setup(
+    config: ConfigOption = None,
+    force: _ForceOption = False,
+) -> None:
+    """Generate SSH key and distribute to all configured hosts.
+
+    Creates an ED25519 key at ~/.ssh/compose-farm/id_ed25519 (no passphrase)
+    and copies the public key to authorized_keys on each host.
+
+    For each host, tries SSH agent first. If agent is unavailable,
+    prompts for password.
+    """
+    cfg = load_config_or_exit(config)
+
+    # Skip localhost hosts
+    remote_hosts = {
+        name: host
+        for name, host in cfg.hosts.items()
+        if host.address.lower() not in ("localhost", "127.0.0.1")
+    }
+
+    if not remote_hosts:
+        console.print("[yellow]No remote hosts configured.[/]")
+        raise typer.Exit(0)
+
+    # Generate key if needed
+    if not key_exists() or force:
+        if not _generate_key(force=force):
+            raise typer.Exit(1)
+    else:
+        console.print(f"[dim]Using existing key: {SSH_KEY_PATH}[/]")
+
+    console.print()
+    console.print(f"[bold]Distributing key to {len(remote_hosts)} host(s)...[/]")
+    console.print()
+
+    # Copy key to each host
+    succeeded = 0
+    failed = 0
+
+    for host_name, host in remote_hosts.items():
+        if _copy_key_to_host(host_name, host.address, host.user, host.port):
+            succeeded += 1
+        else:
+            failed += 1
+
+    console.print()
+    if failed == 0:
+        console.print(
+            f"[green]Setup complete.[/] {succeeded}/{len(remote_hosts)} hosts configured."
+        )
+    else:
+        console.print(
+            f"[yellow]Setup partially complete.[/] {succeeded}/{len(remote_hosts)} hosts configured, "
+            f"[red]{failed} failed[/]."
+        )
+        raise typer.Exit(1)
+
+
+@ssh_app.command("status")
+def ssh_status(
+    config: ConfigOption = None,
+) -> None:
+    """Show SSH key status and host connectivity."""
+    from rich.table import Table  # noqa: PLC0415
+
+    cfg = load_config_or_exit(config)
+
+    # Key status
+    console.print("[bold]SSH Key Status[/]")
+    console.print()
+
+    if key_exists():
+        console.print(f"  [green]Key exists:[/] {SSH_KEY_PATH}")
+        pubkey = get_pubkey_content()
+        if pubkey:
+            # Show truncated public key
+            if len(pubkey) > _PUBKEY_DISPLAY_THRESHOLD:
+                console.print(f"  [dim]Public key:[/] {pubkey[:30]}...{pubkey[-20:]}")
+            else:
+                console.print(f"  [dim]Public key:[/] {pubkey}")
+    else:
+        console.print(f"  [yellow]No key found:[/] {SSH_KEY_PATH}")
+        console.print("  [dim]Run 'cf ssh setup' to generate and distribute a key[/]")
+
+    console.print()
+    console.print("[bold]Host Connectivity[/]")
+    console.print()
+
+    # Skip localhost hosts
+    remote_hosts = {
+        name: host
+        for name, host in cfg.hosts.items()
+        if host.address.lower() not in ("localhost", "127.0.0.1")
+    }
+
+    if not remote_hosts:
+        console.print("  [dim]No remote hosts configured[/]")
+        return
+
+    async def check_host(item: tuple[str, Host]) -> tuple[str, str, str]:
+        """Check connectivity to a single host."""
+        host_name, host = item
+        target = f"{host.user}@{host.address}"
+        if host.port != _DEFAULT_SSH_PORT:
+            target += f":{host.port}"
+
+        try:
+            result = await asyncio.wait_for(
+                run_command(host, "echo ok", host_name, stream=False),
+                timeout=5.0,
+            )
+            status = "[green]OK[/]" if result.success else "[red]Auth failed[/]"
+        except TimeoutError:
+            status = "[red]Timeout (5s)[/]"
+        except Exception as e:
+            status = f"[red]Error: {e}[/]"
+
+        return host_name, target, status
+
+    # Check connectivity in parallel with progress bar
+    results = run_parallel_with_progress(
+        "Checking hosts",
+        list(remote_hosts.items()),
+        check_host,
+    )
+
+    # Build table from results
+    table = Table(show_header=True, header_style="bold")
+    table.add_column("Host")
+    table.add_column("Address")
+    table.add_column("Status")
+
+    # Sort by host name for consistent order
+    for host_name, target, status in sorted(results, key=lambda r: r[0]):
+        table.add_row(host_name, target, status)
+
+    console.print(table)
+
+
+# Register ssh subcommand on the shared app
+app.add_typer(ssh_app, name="ssh", rich_help_panel="Configuration")
--- a/src/compose_farm/compose.py
+++ b/src/compose_farm/compose.py
@@ -7,14 +7,14 @@ from __future__ import annotations

 import os
 import re
+import stat
 from dataclasses import dataclass
+from pathlib import Path
 from typing import TYPE_CHECKING, Any

 import yaml

 if TYPE_CHECKING:
-    from pathlib import Path
-
    from .config import Config

 # Port parsing constants
@@ -141,32 +141,51 @@ def _resolve_host_path(host_path: str, compose_dir: Path) -> str | None:
    return None  # Named volume


+def _is_socket(path: str) -> bool:
+    """Check if a path is a socket (e.g., SSH agent socket)."""
+    try:
+        return stat.S_ISSOCK(Path(path).stat().st_mode)
+    except (FileNotFoundError, PermissionError, OSError):
+        return False
+
+
 def _parse_volume_item(
    item: str | dict[str, Any],
    env: dict[str, str],
    compose_dir: Path,
 ) -> str | None:
-    """Parse a single volume item and return host path if it's a bind mount."""
+    """Parse a single volume item and return host path if it's a bind mount.
+
+    Skips socket paths (e.g., SSH_AUTH_SOCK) since they're machine-local
+    and shouldn't be validated on remote hosts.
+    """
+    host_path: str | None = None
+
    if isinstance(item, str):
        interpolated = _interpolate(item, env)
        parts = interpolated.split(":")
        if len(parts) >= _MIN_VOLUME_PARTS:
-            return _resolve_host_path(parts[0], compose_dir)
+            host_path = _resolve_host_path(parts[0], compose_dir)
    elif isinstance(item, dict) and item.get("type") == "bind":
        source = item.get("source")
        if source:
            interpolated = _interpolate(str(source), env)
-            return _resolve_host_path(interpolated, compose_dir)
-    return None
+            host_path = _resolve_host_path(interpolated, compose_dir)
+
+    # Skip sockets - they're machine-local (e.g., SSH agent)
+    if host_path and _is_socket(host_path):
+        return None
+
+    return host_path


-def parse_host_volumes(config: Config, service: str) -> list[str]:
-    """Extract host bind mount paths from a service's compose file.
+def parse_host_volumes(config: Config, stack: str) -> list[str]:
+    """Extract host bind mount paths from a stack's compose file.

    Returns a list of absolute host paths used as volume mounts.
    Skips named volumes and resolves relative paths.
    """
-    compose_path = config.get_compose_path(service)
+    compose_path = config.get_compose_path(stack)
    if not compose_path.exists():
        return []

@@ -194,21 +213,15 @@ def parse_host_volumes(config: Config, service: str) -> list[str]:
                paths.append(host_path)

    # Return unique paths, preserving order
-    seen: set[str] = set()
-    unique: list[str] = []
-    for p in paths:
-        if p not in seen:
-            seen.add(p)
-            unique.append(p)
-    return unique
+    return list(dict.fromkeys(paths))


-def parse_devices(config: Config, service: str) -> list[str]:
-    """Extract host device paths from a service's compose file.
+def parse_devices(config: Config, stack: str) -> list[str]:
+    """Extract host device paths from a stack's compose file.

    Returns a list of host device paths (e.g., /dev/dri, /dev/dri/renderD128).
    """
-    compose_path = config.get_compose_path(service)
+    compose_path = config.get_compose_path(stack)
    if not compose_path.exists():
        return []

@@ -239,21 +252,15 @@ def parse_devices(config: Config, service: str) -> list[str]:
                    devices.append(host_path)

    # Return unique devices, preserving order
-    seen: set[str] = set()
-    unique: list[str] = []
-    for d in devices:
-        if d not in seen:
-            seen.add(d)
-            unique.append(d)
-    return unique
+    return list(dict.fromkeys(devices))


-def parse_external_networks(config: Config, service: str) -> list[str]:
-    """Extract external network names from a service's compose file.
+def parse_external_networks(config: Config, stack: str) -> list[str]:
+    """Extract external network names from a stack's compose file.

    Returns a list of network names marked as external: true.
    """
-    compose_path = config.get_compose_path(service)
+    compose_path = config.get_compose_path(stack)
    if not compose_path.exists():
        return []

--- a/src/compose_farm/config.py
+++ b/src/compose_farm/config.py
@@ -3,13 +3,15 @@
 from __future__ import annotations

 import getpass
-import os
 from pathlib import Path

 import yaml
 from pydantic import BaseModel, Field, model_validator

-from .paths import xdg_config_home
+from .paths import config_search_paths, find_config_path
+
+# Supported compose filenames, in priority order
+COMPOSE_FILENAMES = ("compose.yaml", "compose.yml", "docker-compose.yml", "docker-compose.yaml")


 class Host(BaseModel):
@@ -25,9 +27,9 @@ class Config(BaseModel):

    compose_dir: Path = Path("/opt/compose")
    hosts: dict[str, Host]
-    services: dict[str, str | list[str]]  # service_name -> host_name or list of hosts
+    stacks: dict[str, str | list[str]]  # stack_name -> host_name or list of hosts
    traefik_file: Path | None = None  # Auto-regenerate traefik config after up/down
-    traefik_service: str | None = None  # Service name for Traefik (skip its host in file-provider)
+    traefik_stack: str | None = None  # Stack name for Traefik (skip its host in file-provider)
    config_path: Path = Path()  # Set by load_config()

    def get_state_path(self) -> Path:
@@ -35,96 +37,79 @@ class Config(BaseModel):
        return self.config_path.parent / "compose-farm-state.yaml"

    @model_validator(mode="after")
-    def validate_hosts_and_services(self) -> Config:
-        """Validate host names and service configurations."""
+    def validate_hosts_and_stacks(self) -> Config:
+        """Validate host names and stack configurations."""
        # "all" is reserved keyword, cannot be used as host name
        if "all" in self.hosts:
            msg = "'all' is a reserved keyword and cannot be used as a host name"
            raise ValueError(msg)

-        for service, host_value in self.services.items():
+        for stack, host_value in self.stacks.items():
            # Validate list configurations
            if isinstance(host_value, list):
                if not host_value:
-                    msg = f"Service '{service}' has empty host list"
+                    msg = f"Stack '{stack}' has empty host list"
                    raise ValueError(msg)
                if len(host_value) != len(set(host_value)):
-                    msg = f"Service '{service}' has duplicate hosts in list"
+                    msg = f"Stack '{stack}' has duplicate hosts in list"
                    raise ValueError(msg)

            # Validate all referenced hosts exist
-            host_names = self.get_hosts(service)
+            host_names = self.get_hosts(stack)
            for host_name in host_names:
                if host_name not in self.hosts:
-                    msg = f"Service '{service}' references unknown host '{host_name}'"
+                    msg = f"Stack '{stack}' references unknown host '{host_name}'"
                    raise ValueError(msg)
        return self

-    def get_hosts(self, service: str) -> list[str]:
-        """Get list of host names for a service.
+    def get_hosts(self, stack: str) -> list[str]:
+        """Get list of host names for a stack.

        Supports:
        - Single host: "truenas-debian" -> ["truenas-debian"]
        - All hosts: "all" -> list of all configured hosts
        - Explicit list: ["host1", "host2"] -> ["host1", "host2"]
        """
-        if service not in self.services:
-            msg = f"Unknown service: {service}"
+        if stack not in self.stacks:
+            msg = f"Unknown stack: {stack}"
            raise ValueError(msg)
-        host_value = self.services[service]
+        host_value = self.stacks[stack]
        if isinstance(host_value, list):
            return host_value
        if host_value == "all":
            return list(self.hosts.keys())
        return [host_value]

-    def is_multi_host(self, service: str) -> bool:
-        """Check if a service runs on multiple hosts."""
-        return len(self.get_hosts(service)) > 1
+    def is_multi_host(self, stack: str) -> bool:
+        """Check if a stack runs on multiple hosts."""
+        return len(self.get_hosts(stack)) > 1

-    def get_host(self, service: str) -> Host:
-        """Get host config for a service (first host if multi-host)."""
-        if service not in self.services:
-            msg = f"Unknown service: {service}"
+    def get_host(self, stack: str) -> Host:
+        """Get host config for a stack (first host if multi-host)."""
+        if stack not in self.stacks:
+            msg = f"Unknown stack: {stack}"
            raise ValueError(msg)
-        host_names = self.get_hosts(service)
+        host_names = self.get_hosts(stack)
        return self.hosts[host_names[0]]

-    def get_compose_path(self, service: str) -> Path:
-        """Get compose file path for a service.
-
-        Tries compose.yaml first, then docker-compose.yml.
-        """
-        service_dir = self.compose_dir / service
-        for filename in (
-            "compose.yaml",
-            "compose.yml",
-            "docker-compose.yml",
-            "docker-compose.yaml",
-        ):
-            candidate = service_dir / filename
+    def get_compose_path(self, stack: str) -> Path:
+        """Get compose file path for a stack (tries compose.yaml first)."""
+        stack_dir = self.compose_dir / stack
+        for filename in COMPOSE_FILENAMES:
+            candidate = stack_dir / filename
            if candidate.exists():
                return candidate
        # Default to compose.yaml if none exist (will error later)
-        return service_dir / "compose.yaml"
+        return stack_dir / "compose.yaml"

    def discover_compose_dirs(self) -> set[str]:
        """Find all directories in compose_dir that contain a compose file."""
-        compose_filenames = {
-            "compose.yaml",
-            "compose.yml",
-            "docker-compose.yml",
-            "docker-compose.yaml",
-        }
        found: set[str] = set()
        if not self.compose_dir.exists():
            return found
        for subdir in self.compose_dir.iterdir():
-            if subdir.is_dir():
-                for filename in compose_filenames:
-                    if (subdir / filename).exists():
-                        found.add(subdir.name)
-                        break
+            if subdir.is_dir() and any((subdir / f).exists() for f in COMPOSE_FILENAMES):
+                found.add(subdir.name)
        return found


@@ -137,7 +122,11 @@ def _parse_hosts(raw_hosts: dict[str, str | dict[str, str | int]]) -> dict[str,
            hosts[name] = Host(address=value)
        else:
            # Full form: hostname: {address: ..., user: ..., port: ...}
-            hosts[name] = Host(**value)
+            hosts[name] = Host(
+                address=str(value.get("address", "")),
+                user=str(value["user"]) if "user" in value else getpass.getuser(),
+                port=int(value["port"]) if "port" in value else 22,
+            )
    return hosts


@@ -150,24 +139,10 @@ def load_config(path: Path | None = None) -> Config:
    3. ./compose-farm.yaml
    4. $XDG_CONFIG_HOME/compose-farm/compose-farm.yaml (defaults to ~/.config)
    """
-    search_paths = [
-        Path("compose-farm.yaml"),
-        xdg_config_home() / "compose-farm" / "compose-farm.yaml",
-    ]
-
-    if path:
-        config_path = path
-    elif env_path := os.environ.get("CF_CONFIG"):
-        config_path = Path(env_path)
-    else:
-        config_path = None
-        for p in search_paths:
-            if p.exists():
-                config_path = p
-                break
+    config_path = path or find_config_path()

    if config_path is None or not config_path.exists():
-        msg = f"Config file not found. Searched: {', '.join(str(p) for p in search_paths)}"
+        msg = f"Config file not found. Searched: {', '.join(str(p) for p in config_search_paths())}"
        raise FileNotFoundError(msg)

    if config_path.is_dir():
--- a/src/compose_farm/console.py
+++ b/src/compose_farm/console.py
@@ -4,3 +4,35 @@ from rich.console import Console

 console = Console(highlight=False)
 err_console = Console(stderr=True, highlight=False)
+
+
+# --- Message Constants ---
+# Standardized message templates for consistent user-facing output
+
+MSG_STACK_NOT_FOUND = "Stack [cyan]{name}[/] not found in config"
+MSG_HOST_NOT_FOUND = "Host [magenta]{name}[/] not found in config"
+MSG_CONFIG_NOT_FOUND = "Config file not found"
+MSG_DRY_RUN = "[dim](dry-run: no changes made)[/]"
+
+
+# --- Message Helper Functions ---
+
+
+def print_error(msg: str) -> None:
+    """Print error message with ✗ prefix to stderr."""
+    err_console.print(f"[red]✗[/] {msg}")
+
+
+def print_success(msg: str) -> None:
+    """Print success message with ✓ prefix to stdout."""
+    console.print(f"[green]✓[/] {msg}")
+
+
+def print_warning(msg: str) -> None:
+    """Print warning message with ! prefix to stderr."""
+    err_console.print(f"[yellow]![/] {msg}")
+
+
+def print_hint(msg: str) -> None:
+    """Print hint message in dim style to stdout."""
+    console.print(f"[dim]Hint: {msg}[/]")
--- a/src/compose_farm/example-config.yaml
+++ b/src/compose_farm/example-config.yaml
@@ -1,7 +1,7 @@
 # Compose Farm configuration
 # Documentation: https://github.com/basnijholt/compose-farm
 #
-# This file configures compose-farm to manage Docker Compose services
+# This file configures compose-farm to manage Docker Compose stacks
 # across multiple hosts via SSH.
 #
 # Place this file at:
@@ -11,7 +11,7 @@
 #   - Or set CF_CONFIG environment variable

 # ------------------------------------------------------------------------------
-# compose_dir: Directory containing service subdirectories with compose files
+# compose_dir: Directory containing stack subdirectories with compose files
 # ------------------------------------------------------------------------------
 # Each subdirectory should contain a compose.yaml (or docker-compose.yml).
 # This path must be the same on all hosts (NFS mount recommended).
@@ -48,28 +48,28 @@ hosts:
    port: 2222

 # ------------------------------------------------------------------------------
-# services: Map service names to their target host(s)
+# stacks: Map stack names to their target host(s)
 # ------------------------------------------------------------------------------
-# Each service name must match a subdirectory in compose_dir.
+# Each stack name must match a subdirectory in compose_dir.
 #
 # Single host:
-#   service-name: hostname
+#   stack-name: hostname
 #
 # Multiple hosts (explicit list):
-#   service-name: [host1, host2]
+#   stack-name: [host1, host2]
 #
 # All hosts:
-#   service-name: all
+#   stack-name: all
 #
-services:
-  # Example: service runs on a single host
+stacks:
+  # Example: stack runs on a single host
  nginx: server1
  postgres: server2

-  # Example: service runs on multiple specific hosts
+  # Example: stack runs on multiple specific hosts
  # prometheus: [server1, server2]

-  # Example: service runs on ALL hosts (e.g., monitoring agents)
+  # Example: stack runs on ALL hosts (e.g., monitoring agents)
  # node-exporter: all

 # ------------------------------------------------------------------------------
@@ -81,9 +81,9 @@ services:
 # traefik_file: /opt/compose/traefik/dynamic.d/compose-farm.yml

 # ------------------------------------------------------------------------------
-# traefik_service: (optional) Service name running Traefik
+# traefik_stack: (optional) Stack name running Traefik
 # ------------------------------------------------------------------------------
-# When generating traefik_file, services on the same host as Traefik are
+# When generating traefik_file, stacks on the same host as Traefik are
 # skipped (they're handled by Traefik's Docker provider directly).
 #
-# traefik_service: traefik
+# traefik_stack: traefik
--- a/src/compose_farm/executor.py
+++ b/src/compose_farm/executor.py
@@ -12,6 +12,7 @@ from typing import TYPE_CHECKING, Any
 from rich.markup import escape

 from .console import console, err_console
+from .ssh_keys import get_key_path, get_ssh_auth_sock, get_ssh_env

 if TYPE_CHECKING:
    from collections.abc import Callable
@@ -22,6 +23,85 @@ LOCAL_ADDRESSES = frozenset({"local", "localhost", "127.0.0.1", "::1"})
 _DEFAULT_SSH_PORT = 22


+def _print_compose_command(
+    host_name: str,
+    compose_dir: str,
+    compose_path: str,
+    compose_cmd: str,
+) -> None:
+    """Print the docker compose command being executed.
+
+    Shows the host and a simplified command with relative path from compose_dir.
+    """
+    # Show relative path from compose_dir for cleaner output
+    if compose_path.startswith(compose_dir):
+        rel_path = compose_path[len(compose_dir) :].lstrip("/")
+    else:
+        rel_path = compose_path
+
+    console.print(
+        f"[dim][magenta]{host_name}[/magenta]: docker compose -f {rel_path} {compose_cmd}[/dim]"
+    )
+
+
+async def _stream_output_lines(
+    reader: Any,
+    prefix: str,
+    *,
+    is_stderr: bool = False,
+) -> None:
+    """Stream lines from a reader to console with a stack prefix.
+
+    Works with both asyncio.StreamReader (bytes) and asyncssh readers (str).
+    If prefix is empty, output is printed without a prefix.
+    """
+    out = err_console if is_stderr else console
+    async for line in reader:
+        text = line.decode() if isinstance(line, bytes) else line
+        if text.strip():
+            if prefix:
+                out.print(f"[cyan]\\[{prefix}][/] {escape(text)}", end="")
+            else:
+                out.print(escape(text), end="")
+
+
+def build_ssh_command(host: Host, command: str, *, tty: bool = False) -> list[str]:
+    """Build SSH command args for executing a command on a remote host.
+
+    Args:
+        host: Host configuration with address, port, user
+        command: Command to run on the remote host
+        tty: Whether to allocate a TTY (for interactive/progress bar commands)
+
+    Returns:
+        List of command args suitable for subprocess
+
+    """
+    ssh_args = [
+        "ssh",
+        "-o",
+        "StrictHostKeyChecking=no",
+        "-o",
+        "UserKnownHostsFile=/dev/null",
+        "-o",
+        "LogLevel=ERROR",
+    ]
+    if tty:
+        ssh_args.insert(1, "-tt")  # Force TTY allocation
+
+    key_path = get_key_path()
+    if key_path:
+        ssh_args.extend(["-i", str(key_path)])
+
+    if host.port != _DEFAULT_SSH_PORT:
+        ssh_args.extend(["-p", str(host.port)])
+
+    ssh_args.append(f"{host.user}@{host.address}")
+    ssh_args.append(command)
+
+    return ssh_args
+
+
@lru_cache(maxsize=1)
 def _get_local_ips() -> frozenset[str]:
    """Get all IP addresses of the current machine."""
@@ -46,7 +126,7 @@ def _get_local_ips() -> frozenset[str]:
 class CommandResult:
    """Result of a command execution."""

-    service: str
+    stack: str
    exit_code: int
    success: bool
    stdout: str = ""
@@ -71,12 +151,32 @@ def is_local(host: Host) -> bool:
    return addr in _get_local_ips()


+def ssh_connect_kwargs(host: Host) -> dict[str, Any]:
+    """Get kwargs for asyncssh.connect() from a Host config."""
+    kwargs: dict[str, Any] = {
+        "host": host.address,
+        "port": host.port,
+        "username": host.user,
+        "known_hosts": None,
+    }
+    # Add SSH agent path (auto-detect forwarded agent if needed)
+    agent_path = get_ssh_auth_sock()
+    if agent_path:
+        kwargs["agent_path"] = agent_path
+    # Add key file fallback for when SSH agent is unavailable
+    key_path = get_key_path()
+    if key_path:
+        kwargs["client_keys"] = [str(key_path)]
+    return kwargs
+
+
 async def _run_local_command(
    command: str,
-    service: str,
+    stack: str,
    *,
    stream: bool = True,
    raw: bool = False,
+    prefix: str = "",
 ) -> CommandResult:
    """Run a command locally with streaming output."""
    try:
@@ -89,7 +189,7 @@ async def _run_local_command(
            )
            await proc.wait()
            return CommandResult(
-                service=service,
+                stack=stack,
                exit_code=proc.returncode or 0,
                success=proc.returncode == 0,
            )
@@ -101,25 +201,9 @@ async def _run_local_command(
        )

        if stream and proc.stdout and proc.stderr:
-
-            async def read_stream(
-                reader: asyncio.StreamReader,
-                prefix: str,
-                *,
-                is_stderr: bool = False,
-            ) -> None:
-                out = err_console if is_stderr else console
-                while True:
-                    line = await reader.readline()
-                    if not line:
-                        break
-                    text = line.decode()
-                    if text.strip():  # Skip empty lines
-                        out.print(f"[cyan]\\[{prefix}][/] {escape(text)}", end="")
-
            await asyncio.gather(
-                read_stream(proc.stdout, service),
-                read_stream(proc.stderr, service, is_stderr=True),
+                _stream_output_lines(proc.stdout, prefix),
+                _stream_output_lines(proc.stderr, prefix, is_stderr=True),
            )

        stdout_data = b""
@@ -130,36 +214,39 @@ async def _run_local_command(
            await proc.wait()

        return CommandResult(
-            service=service,
+            stack=stack,
            exit_code=proc.returncode or 0,
            success=proc.returncode == 0,
            stdout=stdout_data.decode() if stdout_data else "",
            stderr=stderr_data.decode() if stderr_data else "",
        )
    except OSError as e:
-        err_console.print(f"[cyan]\\[{service}][/] [red]Local error:[/] {e}")
-        return CommandResult(service=service, exit_code=1, success=False)
+        err_console.print(f"[cyan]\\[{stack}][/] [red]Local error:[/] {e}")
+        return CommandResult(stack=stack, exit_code=1, success=False)


 async def _run_ssh_command(
    host: Host,
    command: str,
-    service: str,
+    stack: str,
    *,
    stream: bool = True,
    raw: bool = False,
+    prefix: str = "",
 ) -> CommandResult:
    """Run a command on a remote host via SSH with streaming output."""
    if raw:
        # Use native ssh with TTY for proper progress bar rendering
-        ssh_args = ["ssh", "-t"]
-        if host.port != _DEFAULT_SSH_PORT:
-            ssh_args.extend(["-p", str(host.port)])
-        ssh_args.extend([f"{host.user}@{host.address}", command])
+        ssh_args = build_ssh_command(host, command, tty=True)
+
+        def run_ssh() -> subprocess.CompletedProcess[bytes]:
+            return subprocess.run(ssh_args, check=False, env=get_ssh_env())
+
        # Run in thread to avoid blocking the event loop
-        result = await asyncio.to_thread(subprocess.run, ssh_args, check=False)
+        # Use get_ssh_env() to auto-detect SSH agent socket
+        result = await asyncio.to_thread(run_ssh)
        return CommandResult(
-            service=service,
+            stack=stack,
            exit_code=result.returncode,
            success=result.returncode == 0,
        )
@@ -168,29 +255,12 @@ async def _run_ssh_command(

    proc: asyncssh.SSHClientProcess[Any]
    try:
-        async with asyncssh.connect(  # noqa: SIM117 - conn needed before create_process
-            host.address,
-            port=host.port,
-            username=host.user,
-            known_hosts=None,
-        ) as conn:
+        async with asyncssh.connect(**ssh_connect_kwargs(host)) as conn:  # noqa: SIM117
            async with conn.create_process(command) as proc:
                if stream:
-
-                    async def read_stream(
-                        reader: Any,
-                        prefix: str,
-                        *,
-                        is_stderr: bool = False,
-                    ) -> None:
-                        out = err_console if is_stderr else console
-                        async for line in reader:
-                            if line.strip():  # Skip empty lines
-                                out.print(f"[cyan]\\[{prefix}][/] {escape(line)}", end="")
-
                    await asyncio.gather(
-                        read_stream(proc.stdout, service),
-                        read_stream(proc.stderr, service, is_stderr=True),
+                        _stream_output_lines(proc.stdout, prefix),
+                        _stream_output_lines(proc.stderr, prefix, is_stderr=True),
                    )

                stdout_data = ""
@@ -201,131 +271,155 @@ async def _run_ssh_command(

                await proc.wait()
                return CommandResult(
-                    service=service,
+                    stack=stack,
                    exit_code=proc.exit_status or 0,
                    success=proc.exit_status == 0,
                    stdout=stdout_data,
                    stderr=stderr_data,
                )
    except (OSError, asyncssh.Error) as e:
-        err_console.print(f"[cyan]\\[{service}][/] [red]SSH error:[/] {e}")
-        return CommandResult(service=service, exit_code=1, success=False)
+        err_console.print(f"[cyan]\\[{stack}][/] [red]SSH error:[/] {e}")
+        return CommandResult(stack=stack, exit_code=1, success=False)


 async def run_command(
    host: Host,
    command: str,
-    service: str,
+    stack: str,
    *,
    stream: bool = True,
    raw: bool = False,
+    prefix: str | None = None,
 ) -> CommandResult:
    """Run a command on a host (locally or via SSH).

    Args:
        host: Host configuration
        command: Command to run
-        service: Service name (used as prefix in output)
+        stack: Stack name (stored in result)
        stream: Whether to stream output (default True)
        raw: Whether to use raw mode with TTY (default False)
+        prefix: Output prefix. None=use stack name, ""=no prefix.

    """
+    output_prefix = stack if prefix is None else prefix
    if is_local(host):
-        return await _run_local_command(command, service, stream=stream, raw=raw)
-    return await _run_ssh_command(host, command, service, stream=stream, raw=raw)
+        return await _run_local_command(
+            command, stack, stream=stream, raw=raw, prefix=output_prefix
+        )
+    return await _run_ssh_command(
+        host, command, stack, stream=stream, raw=raw, prefix=output_prefix
+    )


 async def run_compose(
    config: Config,
-    service: str,
+    stack: str,
    compose_cmd: str,
    *,
    stream: bool = True,
    raw: bool = False,
+    prefix: str | None = None,
 ) -> CommandResult:
-    """Run a docker compose command for a service."""
-    host = config.get_host(service)
-    compose_path = config.get_compose_path(service)
+    """Run a docker compose command for a stack."""
+    host_name = config.get_hosts(stack)[0]
+    host = config.hosts[host_name]
+    compose_path = config.get_compose_path(stack)
+
+    _print_compose_command(host_name, str(config.compose_dir), str(compose_path), compose_cmd)

    command = f"docker compose -f {compose_path} {compose_cmd}"
-    return await run_command(host, command, service, stream=stream, raw=raw)
+    return await run_command(host, command, stack, stream=stream, raw=raw, prefix=prefix)


 async def run_compose_on_host(
    config: Config,
-    service: str,
+    stack: str,
    host_name: str,
    compose_cmd: str,
    *,
    stream: bool = True,
    raw: bool = False,
+    prefix: str | None = None,
 ) -> CommandResult:
-    """Run a docker compose command for a service on a specific host.
+    """Run a docker compose command for a stack on a specific host.

    Used for migration - running 'down' on the old host before 'up' on new host.
    """
    host = config.hosts[host_name]
-    compose_path = config.get_compose_path(service)
+    compose_path = config.get_compose_path(stack)
+
+    _print_compose_command(host_name, str(config.compose_dir), str(compose_path), compose_cmd)

    command = f"docker compose -f {compose_path} {compose_cmd}"
-    return await run_command(host, command, service, stream=stream, raw=raw)
+    return await run_command(host, command, stack, stream=stream, raw=raw, prefix=prefix)


-async def run_on_services(
+async def run_on_stacks(
    config: Config,
-    services: list[str],
+    stacks: list[str],
    compose_cmd: str,
    *,
    stream: bool = True,
    raw: bool = False,
 ) -> list[CommandResult]:
-    """Run a docker compose command on multiple services in parallel.
+    """Run a docker compose command on multiple stacks in parallel.

-    For multi-host services, runs on all configured hosts.
-    Note: raw=True only makes sense for single-service operations.
+    For multi-host stacks, runs on all configured hosts.
+    Note: raw=True only makes sense for single-stack operations.
    """
-    return await run_sequential_on_services(config, services, [compose_cmd], stream=stream, raw=raw)
+    return await run_sequential_on_stacks(config, stacks, [compose_cmd], stream=stream, raw=raw)


-async def _run_sequential_commands(
+async def _run_sequential_stack_commands(
    config: Config,
-    service: str,
+    stack: str,
    commands: list[str],
    *,
    stream: bool = True,
    raw: bool = False,
+    prefix: str | None = None,
 ) -> CommandResult:
-    """Run multiple compose commands sequentially for a service."""
+    """Run multiple compose commands sequentially for a stack."""
    for cmd in commands:
-        result = await run_compose(config, service, cmd, stream=stream, raw=raw)
+        result = await run_compose(config, stack, cmd, stream=stream, raw=raw, prefix=prefix)
        if not result.success:
            return result
-    return CommandResult(service=service, exit_code=0, success=True)
+    return CommandResult(stack=stack, exit_code=0, success=True)


-async def _run_sequential_commands_multi_host(
+async def _run_sequential_stack_commands_multi_host(
    config: Config,
-    service: str,
+    stack: str,
    commands: list[str],
    *,
    stream: bool = True,
    raw: bool = False,
+    prefix: str | None = None,
 ) -> list[CommandResult]:
-    """Run multiple compose commands sequentially for a multi-host service.
+    """Run multiple compose commands sequentially for a multi-host stack.

    Commands are run sequentially, but each command runs on all hosts in parallel.
+    For multi-host stacks, prefix defaults to stack@host format.
    """
-    host_names = config.get_hosts(service)
-    compose_path = config.get_compose_path(service)
+    host_names = config.get_hosts(stack)
+    compose_path = config.get_compose_path(stack)
    final_results: list[CommandResult] = []

    for cmd in commands:
        command = f"docker compose -f {compose_path} {cmd}"
        tasks = []
        for host_name in host_names:
+            _print_compose_command(host_name, str(config.compose_dir), str(compose_path), cmd)
            host = config.hosts[host_name]
-            label = f"{service}@{host_name}" if len(host_names) > 1 else service
-            tasks.append(run_command(host, command, label, stream=stream, raw=raw))
+            # For multi-host stacks, always use stack@host prefix to distinguish output
+            label = f"{stack}@{host_name}" if len(host_names) > 1 else stack
+            # Multi-host stacks always need prefixes to distinguish output from different hosts
+            # (ignore empty prefix from single-stack batches - we still need to distinguish hosts)
+            effective_prefix = label if len(host_names) > 1 else prefix
+            tasks.append(
+                run_command(host, command, label, stream=stream, raw=raw, prefix=effective_prefix)
+            )

        results = await asyncio.gather(*tasks)
        final_results = list(results)
@@ -337,33 +431,38 @@ async def _run_sequential_commands_multi_host(
    return final_results


-async def run_sequential_on_services(
+async def run_sequential_on_stacks(
    config: Config,
-    services: list[str],
+    stacks: list[str],
    commands: list[str],
    *,
    stream: bool = True,
    raw: bool = False,
 ) -> list[CommandResult]:
-    """Run sequential commands on multiple services in parallel.
+    """Run sequential commands on multiple stacks in parallel.

-    For multi-host services, runs on all configured hosts.
-    Note: raw=True only makes sense for single-service operations.
+    For multi-host stacks, runs on all configured hosts.
+    Note: raw=True only makes sense for single-stack operations.
    """
-    # Separate multi-host and single-host services for type-safe gathering
+    # Skip prefix for single-stack operations (command line already shows context)
+    prefix: str | None = "" if len(stacks) == 1 else None
+
+    # Separate multi-host and single-host stacks for type-safe gathering
    multi_host_tasks = []
    single_host_tasks = []

-    for service in services:
-        if config.is_multi_host(service):
+    for stack in stacks:
+        if config.is_multi_host(stack):
            multi_host_tasks.append(
-                _run_sequential_commands_multi_host(
-                    config, service, commands, stream=stream, raw=raw
+                _run_sequential_stack_commands_multi_host(
+                    config, stack, commands, stream=stream, raw=raw, prefix=prefix
                )
            )
        else:
            single_host_tasks.append(
-                _run_sequential_commands(config, service, commands, stream=stream, raw=raw)
+                _run_sequential_stack_commands(
+                    config, stack, commands, stream=stream, raw=raw, prefix=prefix
+                )
            )

    # Gather results separately to maintain type safety
@@ -381,18 +480,18 @@ async def run_sequential_on_services(
    return flat_results


-async def check_service_running(
+async def check_stack_running(
    config: Config,
-    service: str,
+    stack: str,
    host_name: str,
 ) -> bool:
-    """Check if a service has running containers on a specific host."""
+    """Check if a stack has running containers on a specific host."""
    host = config.hosts[host_name]
-    compose_path = config.get_compose_path(service)
+    compose_path = config.get_compose_path(stack)

    # Use ps --status running to check for running containers
    command = f"docker compose -f {compose_path} ps --status running -q"
-    result = await run_command(host, command, service, stream=False)
+    result = await run_command(host, command, stack, stream=False)

    # If command succeeded and has output, containers are running
    return result.success and bool(result.stdout.strip())
--- a/src/compose_farm/logs.py
+++ b/src/compose_farm/logs.py
@@ -25,9 +25,9 @@ _DIGEST_HEX_LENGTH = 64

@dataclass(frozen=True)
 class SnapshotEntry:
-    """Normalized image snapshot for a single service."""
+    """Normalized image snapshot for a single stack."""

-    service: str
+    stack: str
    host: str
    compose_file: Path
    image: str
@@ -37,7 +37,7 @@ class SnapshotEntry:
    def as_dict(self, first_seen: str, last_seen: str) -> dict[str, str]:
        """Render snapshot as a TOML-friendly dict."""
        return {
-            "service": self.service,
+            "stack": self.stack,
            "host": self.host,
            "compose_file": str(self.compose_file),
            "image": self.image,
@@ -103,24 +103,24 @@ def _extract_image_fields(record: dict[str, Any]) -> tuple[str, str]:
    return image, digest


-async def collect_service_entries(
+async def collect_stack_entries(
    config: Config,
-    service: str,
+    stack: str,
    *,
    now: datetime,
    run_compose_fn: Callable[..., Awaitable[CommandResult]] = run_compose,
 ) -> list[SnapshotEntry]:
-    """Run `docker compose images` for a service and normalize results."""
-    result = await run_compose_fn(config, service, "images --format json", stream=False)
+    """Run `docker compose images` for a stack and normalize results."""
+    result = await run_compose_fn(config, stack, "images --format json", stream=False)
    if not result.success:
        msg = result.stderr or f"compose images exited with {result.exit_code}"
-        error = f"[{service}] Unable to read images: {msg}"
+        error = f"[{stack}] Unable to read images: {msg}"
        raise RuntimeError(error)

    records = _parse_images_output(result.stdout)
-    # Use first host for snapshots (multi-host services use same images on all hosts)
-    host_name = config.get_hosts(service)[0]
-    compose_path = config.get_compose_path(service)
+    # Use first host for snapshots (multi-host stacks use same images on all hosts)
+    host_name = config.get_hosts(stack)[0]
+    compose_path = config.get_compose_path(stack)

    entries: list[SnapshotEntry] = []
    for record in records:
@@ -129,7 +129,7 @@ async def collect_service_entries(
            continue
        entries.append(
            SnapshotEntry(
-                service=service,
+                stack=stack,
                host=host_name,
                compose_file=compose_path,
                image=image,
@@ -145,7 +145,14 @@ def load_existing_entries(log_path: Path) -> list[dict[str, str]]:
    if not log_path.exists():
        return []
    data = tomllib.loads(log_path.read_text())
-    return list(data.get("entries", []))
+    entries = list(data.get("entries", []))
+    normalized: list[dict[str, str]] = []
+    for entry in entries:
+        normalized_entry = dict(entry)
+        if "stack" not in normalized_entry and "service" in normalized_entry:
+            normalized_entry["stack"] = normalized_entry.pop("service")
+        normalized.append(normalized_entry)
+    return normalized


 def merge_entries(
@@ -156,11 +163,11 @@ def merge_entries(
 ) -> list[dict[str, str]]:
    """Merge new snapshot entries with existing ones, preserving first_seen timestamps."""
    merged: dict[tuple[str, str, str], dict[str, str]] = {
-        (e["service"], e["host"], e["digest"]): dict(e) for e in existing
+        (e["stack"], e["host"], e["digest"]): dict(e) for e in existing
    }

    for entry in new_entries:
-        key = (entry.service, entry.host, entry.digest)
+        key = (entry.stack, entry.host, entry.digest)
        first_seen = merged.get(key, {}).get("first_seen", now_iso)
        merged[key] = entry.as_dict(first_seen, now_iso)

@@ -175,10 +182,10 @@ def write_toml(log_path: Path, *, meta: dict[str, str], entries: list[dict[str,
    if entries:
        lines.append("")

-    for entry in sorted(entries, key=lambda e: (e["service"], e["host"], e["digest"])):
+    for entry in sorted(entries, key=lambda e: (e["stack"], e["host"], e["digest"])):
        lines.append("[[entries]]")
        for field in [
-            "service",
+            "stack",
            "host",
            "compose_file",
            "image",
--- a/src/compose_farm/operations.py
+++ b/src/compose_farm/operations.py
@@ -10,22 +10,22 @@ import asyncio
 from typing import TYPE_CHECKING, NamedTuple

 from .compose import parse_devices, parse_external_networks, parse_host_volumes
-from .console import console, err_console
+from .console import console, err_console, print_error, print_success, print_warning
 from .executor import (
    CommandResult,
    check_networks_exist,
    check_paths_exist,
-    check_service_running,
+    check_stack_running,
    run_command,
    run_compose,
    run_compose_on_host,
 )
 from .state import (
-    get_orphaned_services,
-    get_service_host,
-    remove_service,
-    set_multi_host_service,
-    set_service_host,
+    get_orphaned_stacks,
+    get_stack_host,
+    remove_stack,
+    set_multi_host_stack,
+    set_stack_host,
 )

 if TYPE_CHECKING:
@@ -37,7 +37,7 @@ class OperationInterruptedError(Exception):


 class PreflightResult(NamedTuple):
-    """Result of pre-flight checks for a service on a host."""
+    """Result of pre-flight checks for a stack on a host."""

    missing_paths: list[str]
    missing_networks: list[str]
@@ -51,7 +51,7 @@ class PreflightResult(NamedTuple):

 async def _run_compose_step(
    cfg: Config,
-    service: str,
+    stack: str,
    command: str,
    *,
    raw: bool,
@@ -59,9 +59,9 @@ async def _run_compose_step(
 ) -> CommandResult:
    """Run a compose command, handle raw output newline, and check for interrupts."""
    if host:
-        result = await run_compose_on_host(cfg, service, host, command, raw=raw)
+        result = await run_compose_on_host(cfg, stack, host, command, raw=raw)
    else:
-        result = await run_compose(cfg, service, command, raw=raw)
+        result = await run_compose(cfg, stack, command, raw=raw)
    if raw:
        print()  # Ensure newline after raw output
    if result.interrupted:
@@ -69,63 +69,61 @@ async def _run_compose_step(
    return result


-def get_service_paths(cfg: Config, service: str) -> list[str]:
-    """Get all required paths for a service (compose_dir + volumes)."""
+def get_stack_paths(cfg: Config, stack: str) -> list[str]:
+    """Get all required paths for a stack (compose_dir + volumes)."""
    paths = [str(cfg.compose_dir)]
-    paths.extend(parse_host_volumes(cfg, service))
+    paths.extend(parse_host_volumes(cfg, stack))
    return paths


-async def discover_service_host(cfg: Config, service: str) -> tuple[str, str | list[str] | None]:
-    """Discover where a service is running.
+async def discover_stack_host(cfg: Config, stack: str) -> tuple[str, str | list[str] | None]:
+    """Discover where a stack is running.

-    For multi-host services, checks all assigned hosts in parallel.
+    For multi-host stacks, checks all assigned hosts in parallel.
    For single-host, checks assigned host first, then others.

-    Returns (service_name, host_or_hosts_or_none).
+    Returns (stack_name, host_or_hosts_or_none).
    """
-    assigned_hosts = cfg.get_hosts(service)
+    assigned_hosts = cfg.get_hosts(stack)

-    if cfg.is_multi_host(service):
+    if cfg.is_multi_host(stack):
        # Check all assigned hosts in parallel
-        checks = await asyncio.gather(
-            *[check_service_running(cfg, service, h) for h in assigned_hosts]
-        )
+        checks = await asyncio.gather(*[check_stack_running(cfg, stack, h) for h in assigned_hosts])
        running = [h for h, is_running in zip(assigned_hosts, checks, strict=True) if is_running]
-        return service, running if running else None
+        return stack, running if running else None

    # Single-host: check assigned host first, then others
-    if await check_service_running(cfg, service, assigned_hosts[0]):
-        return service, assigned_hosts[0]
+    if await check_stack_running(cfg, stack, assigned_hosts[0]):
+        return stack, assigned_hosts[0]
    for host in cfg.hosts:
-        if host != assigned_hosts[0] and await check_service_running(cfg, service, host):
-            return service, host
-    return service, None
+        if host != assigned_hosts[0] and await check_stack_running(cfg, stack, host):
+            return stack, host
+    return stack, None


-async def check_service_requirements(
+async def check_stack_requirements(
    cfg: Config,
-    service: str,
+    stack: str,
    host_name: str,
 ) -> PreflightResult:
-    """Check if a service can run on a specific host.
+    """Check if a stack can run on a specific host.

    Verifies that all required paths (volumes), networks, and devices exist.
    """
    # Check mount paths
-    paths = get_service_paths(cfg, service)
+    paths = get_stack_paths(cfg, stack)
    path_exists = await check_paths_exist(cfg, host_name, paths)
    missing_paths = [p for p, found in path_exists.items() if not found]

    # Check external networks
-    networks = parse_external_networks(cfg, service)
+    networks = parse_external_networks(cfg, stack)
    missing_networks: list[str] = []
    if networks:
        net_exists = await check_networks_exist(cfg, host_name, networks)
        missing_networks = [n for n, found in net_exists.items() if not found]

    # Check devices
-    devices = parse_devices(cfg, service)
+    devices = parse_devices(cfg, stack)
    missing_devices: list[str] = []
    if devices:
        dev_exists = await check_paths_exist(cfg, host_name, devices)
@@ -136,7 +134,7 @@ async def check_service_requirements(

 async def _cleanup_and_rollback(
    cfg: Config,
-    service: str,
+    stack: str,
    target_host: str,
    current_host: str,
    prefix: str,
@@ -145,65 +143,59 @@ async def _cleanup_and_rollback(
    raw: bool = False,
 ) -> None:
    """Clean up failed start and attempt rollback to old host if it was running."""
-    err_console.print(
-        f"{prefix} [yellow]![/] Cleaning up failed start on [magenta]{target_host}[/]"
-    )
-    await run_compose(cfg, service, "down", raw=raw)
+    print_warning(f"{prefix} Cleaning up failed start on [magenta]{target_host}[/]")
+    await run_compose(cfg, stack, "down", raw=raw)

    if not was_running:
        err_console.print(
-            f"{prefix} [dim]Service was not running on [magenta]{current_host}[/], skipping rollback[/]"
+            f"{prefix} [dim]Stack was not running on [magenta]{current_host}[/], skipping rollback[/]"
        )
        return

-    err_console.print(f"{prefix} [yellow]![/] Rolling back to [magenta]{current_host}[/]...")
-    rollback_result = await run_compose_on_host(cfg, service, current_host, "up -d", raw=raw)
+    print_warning(f"{prefix} Rolling back to [magenta]{current_host}[/]...")
+    rollback_result = await run_compose_on_host(cfg, stack, current_host, "up -d", raw=raw)
    if rollback_result.success:
-        console.print(f"{prefix} [green]✓[/] Rollback succeeded on [magenta]{current_host}[/]")
+        print_success(f"{prefix} Rollback succeeded on [magenta]{current_host}[/]")
    else:
-        err_console.print(f"{prefix} [red]✗[/] Rollback failed - service is down")
+        print_error(f"{prefix} Rollback failed - stack is down")


 def _report_preflight_failures(
-    service: str,
+    stack: str,
    target_host: str,
    preflight: PreflightResult,
 ) -> None:
    """Report pre-flight check failures."""
-    err_console.print(
-        f"[cyan]\\[{service}][/] [red]✗[/] Cannot start on [magenta]{target_host}[/]:"
-    )
+    print_error(f"[cyan]\\[{stack}][/] Cannot start on [magenta]{target_host}[/]:")
    for path in preflight.missing_paths:
-        err_console.print(f"  [red]✗[/] missing path: {path}")
+        print_error(f"  missing path: {path}")
    for net in preflight.missing_networks:
-        err_console.print(f"  [red]✗[/] missing network: {net}")
+        print_error(f"  missing network: {net}")
    if preflight.missing_networks:
-        err_console.print(f"  [dim]hint: cf init-network {target_host}[/]")
+        err_console.print(f"  [dim]Hint: cf init-network {target_host}[/]")
    for dev in preflight.missing_devices:
-        err_console.print(f"  [red]✗[/] missing device: {dev}")
+        print_error(f"  missing device: {dev}")


-async def _up_multi_host_service(
+async def _up_multi_host_stack(
    cfg: Config,
-    service: str,
+    stack: str,
    prefix: str,
    *,
    raw: bool = False,
 ) -> list[CommandResult]:
-    """Start a multi-host service on all configured hosts."""
-    host_names = cfg.get_hosts(service)
+    """Start a multi-host stack on all configured hosts."""
+    host_names = cfg.get_hosts(stack)
    results: list[CommandResult] = []
-    compose_path = cfg.get_compose_path(service)
+    compose_path = cfg.get_compose_path(stack)
    command = f"docker compose -f {compose_path} up -d"

    # Pre-flight checks on all hosts
    for host_name in host_names:
-        preflight = await check_service_requirements(cfg, service, host_name)
+        preflight = await check_stack_requirements(cfg, stack, host_name)
        if not preflight.ok:
-            _report_preflight_failures(service, host_name, preflight)
-            results.append(
-                CommandResult(service=f"{service}@{host_name}", exit_code=1, success=False)
-            )
+            _report_preflight_failures(stack, host_name, preflight)
+            results.append(CommandResult(stack=f"{stack}@{host_name}", exit_code=1, success=False))
            return results

    # Start on all hosts
@@ -213,7 +205,7 @@ async def _up_multi_host_service(
    succeeded_hosts: list[str] = []
    for host_name in host_names:
        host = cfg.hosts[host_name]
-        label = f"{service}@{host_name}"
+        label = f"{stack}@{host_name}"
        result = await run_command(host, command, label, stream=not raw, raw=raw)
        if raw:
            print()  # Ensure newline after raw output
@@ -223,92 +215,89 @@ async def _up_multi_host_service(

    # Update state with hosts that succeeded (partial success is tracked)
    if succeeded_hosts:
-        set_multi_host_service(cfg, service, succeeded_hosts)
+        set_multi_host_stack(cfg, stack, succeeded_hosts)

    return results


-async def _migrate_service(
+async def _migrate_stack(
    cfg: Config,
-    service: str,
+    stack: str,
    current_host: str,
    target_host: str,
    prefix: str,
    *,
    raw: bool = False,
 ) -> CommandResult | None:
-    """Migrate a service from current_host to target_host.
+    """Migrate a stack from current_host to target_host.

-    Pre-pulls/builds images on target, then stops service on current host.
+    Pre-pulls/builds images on target, then stops stack on current host.
    Returns failure result if migration prep fails, None on success.
    """
    console.print(
        f"{prefix} Migrating from [magenta]{current_host}[/] → [magenta]{target_host}[/]..."
    )

-    # Prepare images on target host before stopping old service to minimize downtime.
-    # Pull handles image-based services; build handles Dockerfile-based services.
+    # Prepare images on target host before stopping old stack to minimize downtime.
+    # Pull handles image-based compose services; build handles Dockerfile-based ones.
    # --ignore-buildable makes pull skip images that have build: defined.
    for cmd, label in [("pull --ignore-buildable", "Pull"), ("build", "Build")]:
-        result = await _run_compose_step(cfg, service, cmd, raw=raw)
+        result = await _run_compose_step(cfg, stack, cmd, raw=raw)
        if not result.success:
-            err_console.print(
-                f"{prefix} [red]✗[/] {label} failed on [magenta]{target_host}[/], "
-                "leaving service on current host"
+            print_error(
+                f"{prefix} {label} failed on [magenta]{target_host}[/], "
+                "leaving stack on current host"
            )
            return result

    # Stop on current host
-    down_result = await _run_compose_step(cfg, service, "down", raw=raw, host=current_host)
+    down_result = await _run_compose_step(cfg, stack, "down", raw=raw, host=current_host)
    return down_result if not down_result.success else None


-async def _up_single_service(
+async def _up_single_stack(
    cfg: Config,
-    service: str,
+    stack: str,
    prefix: str,
    *,
    raw: bool,
 ) -> CommandResult:
-    """Start a single-host service with migration support."""
-    target_host = cfg.get_hosts(service)[0]
-    current_host = get_service_host(cfg, service)
+    """Start a single-host stack with migration support."""
+    target_host = cfg.get_hosts(stack)[0]
+    current_host = get_stack_host(cfg, stack)

    # Pre-flight check: verify paths, networks, and devices exist on target
-    preflight = await check_service_requirements(cfg, service, target_host)
+    preflight = await check_stack_requirements(cfg, stack, target_host)
    if not preflight.ok:
-        _report_preflight_failures(service, target_host, preflight)
-        return CommandResult(service=service, exit_code=1, success=False)
+        _report_preflight_failures(stack, target_host, preflight)
+        return CommandResult(stack=stack, exit_code=1, success=False)

-    # If service is deployed elsewhere, migrate it
+    # If stack is deployed elsewhere, migrate it
    did_migration = False
    was_running = False
    if current_host and current_host != target_host:
        if current_host in cfg.hosts:
-            was_running = await check_service_running(cfg, service, current_host)
-            failure = await _migrate_service(
-                cfg, service, current_host, target_host, prefix, raw=raw
-            )
+            was_running = await check_stack_running(cfg, stack, current_host)
+            failure = await _migrate_stack(cfg, stack, current_host, target_host, prefix, raw=raw)
            if failure:
                return failure
            did_migration = True
        else:
-            err_console.print(
-                f"{prefix} [yellow]![/] was on "
-                f"[magenta]{current_host}[/] (not in config), skipping down"
+            print_warning(
+                f"{prefix} was on [magenta]{current_host}[/] (not in config), skipping down"
            )

    # Start on target host
    console.print(f"{prefix} Starting on [magenta]{target_host}[/]...")
-    up_result = await _run_compose_step(cfg, service, "up -d", raw=raw)
+    up_result = await _run_compose_step(cfg, stack, "up -d", raw=raw)

    # Update state on success, or rollback on failure
    if up_result.success:
-        set_service_host(cfg, service, target_host)
+        set_stack_host(cfg, stack, target_host)
    elif did_migration and current_host:
        await _cleanup_and_rollback(
            cfg,
-            service,
+            stack,
            target_host,
            current_host,
            prefix,
@@ -319,24 +308,24 @@ async def _up_single_service(
    return up_result


-async def up_services(
+async def up_stacks(
    cfg: Config,
-    services: list[str],
+    stacks: list[str],
    *,
    raw: bool = False,
 ) -> list[CommandResult]:
-    """Start services with automatic migration if host changed."""
+    """Start stacks with automatic migration if host changed."""
    results: list[CommandResult] = []
-    total = len(services)
+    total = len(stacks)

    try:
-        for idx, service in enumerate(services, 1):
-            prefix = f"[dim][{idx}/{total}][/] [cyan]\\[{service}][/]"
+        for idx, stack in enumerate(stacks, 1):
+            prefix = f"[dim][{idx}/{total}][/] [cyan]\\[{stack}][/]"

-            if cfg.is_multi_host(service):
-                results.extend(await _up_multi_host_service(cfg, service, prefix, raw=raw))
+            if cfg.is_multi_host(stack):
+                results.extend(await _up_multi_host_stack(cfg, stack, prefix, raw=raw))
            else:
-                results.append(await _up_single_service(cfg, service, prefix, raw=raw))
+                results.append(await _up_single_stack(cfg, stack, prefix, raw=raw))
    except OperationInterruptedError:
        raise KeyboardInterrupt from None

@@ -345,22 +334,22 @@ async def up_services(

 async def check_host_compatibility(
    cfg: Config,
-    service: str,
+    stack: str,
 ) -> dict[str, tuple[int, int, list[str]]]:
-    """Check which hosts can run a service based on paths, networks, and devices.
+    """Check which hosts can run a stack based on paths, networks, and devices.

    Returns dict of host_name -> (found_count, total_count, missing_items).
    """
    # Get total requirements count
-    paths = get_service_paths(cfg, service)
-    networks = parse_external_networks(cfg, service)
-    devices = parse_devices(cfg, service)
+    paths = get_stack_paths(cfg, stack)
+    networks = parse_external_networks(cfg, stack)
+    devices = parse_devices(cfg, stack)
    total = len(paths) + len(networks) + len(devices)

    results: dict[str, tuple[int, int, list[str]]] = {}

    for host_name in cfg.hosts:
-        preflight = await check_service_requirements(cfg, service, host_name)
+        preflight = await check_stack_requirements(cfg, stack, host_name)
        all_missing = (
            preflight.missing_paths + preflight.missing_networks + preflight.missing_devices
        )
@@ -370,72 +359,68 @@ async def check_host_compatibility(
    return results


-async def stop_orphaned_services(cfg: Config) -> list[CommandResult]:
-    """Stop orphaned services (in state but not in config).
+async def stop_orphaned_stacks(cfg: Config) -> list[CommandResult]:
+    """Stop orphaned stacks (in state but not in config).

-    Runs docker compose down on each service on its tracked host(s).
+    Runs docker compose down on each stack on its tracked host(s).
    Only removes from state on successful stop.

-    Returns list of CommandResults for each service@host.
+    Returns list of CommandResults for each stack@host.
    """
-    orphaned = get_orphaned_services(cfg)
+    orphaned = get_orphaned_stacks(cfg)
    if not orphaned:
        return []

    results: list[CommandResult] = []
    tasks: list[tuple[str, str, asyncio.Task[CommandResult]]] = []

-    # Build list of (service, host, task) for all orphaned services
-    for service, hosts in orphaned.items():
+    # Build list of (stack, host, task) for all orphaned stacks
+    for stack, hosts in orphaned.items():
        host_list = hosts if isinstance(hosts, list) else [hosts]
        for host in host_list:
            # Skip hosts no longer in config
            if host not in cfg.hosts:
-                console.print(
-                    f"  [yellow]![/] {service}@{host}: host no longer in config, skipping"
-                )
+                print_warning(f"{stack}@{host}: host no longer in config, skipping")
                results.append(
                    CommandResult(
-                        service=f"{service}@{host}",
+                        stack=f"{stack}@{host}",
                        exit_code=1,
                        success=False,
                        stderr="host no longer in config",
                    )
                )
                continue
-            coro = run_compose_on_host(cfg, service, host, "down")
-            tasks.append((service, host, asyncio.create_task(coro)))
+            coro = run_compose_on_host(cfg, stack, host, "down")
+            tasks.append((stack, host, asyncio.create_task(coro)))

    # Run all down commands in parallel
    if tasks:
-        for service, host, task in tasks:
+        for stack, host, task in tasks:
            try:
                result = await task
                results.append(result)
                if result.success:
-                    console.print(f"  [green]✓[/] {service}@{host}: stopped")
+                    print_success(f"{stack}@{host}: stopped")
                else:
-                    console.print(f"  [red]✗[/] {service}@{host}: {result.stderr or 'failed'}")
+                    print_error(f"{stack}@{host}: {result.stderr or 'failed'}")
            except Exception as e:
-                console.print(f"  [red]✗[/] {service}@{host}: {e}")
+                print_error(f"{stack}@{host}: {e}")
                results.append(
                    CommandResult(
-                        service=f"{service}@{host}",
+                        stack=f"{stack}@{host}",
                        exit_code=1,
                        success=False,
                        stderr=str(e),
                    )
                )

-    # Remove from state only for services where ALL hosts succeeded
-    for service, hosts in orphaned.items():
+    # Remove from state only for stacks where ALL hosts succeeded
+    for stack, hosts in orphaned.items():
        host_list = hosts if isinstance(hosts, list) else [hosts]
        all_succeeded = all(
-            r.success
-            for r in results
-            if r.service.startswith(f"{service}@") or r.service == service
+            r.success for r in results if r.stack.startswith(f"{stack}@") or r.stack == stack
        )
        if all_succeeded:
-            remove_service(cfg, service)
+            remove_stack(cfg, stack)

    return results
--- a/src/compose_farm/paths.py
+++ b/src/compose_farm/paths.py
@@ -19,3 +19,15 @@ def default_config_path() -> Path:
 def config_search_paths() -> list[Path]:
    """Get search paths for config files."""
    return [Path("compose-farm.yaml"), default_config_path()]
+
+
+def find_config_path() -> Path | None:
+    """Find the config file path, checking CF_CONFIG env var and search paths."""
+    if env_path := os.environ.get("CF_CONFIG"):
+        p = Path(env_path)
+        if p.exists() and p.is_file():
+            return p
+    for p in config_search_paths():
+        if p.exists() and p.is_file():
+            return p
+    return None
--- a/src/compose_farm/ssh_keys.py
+++ b/src/compose_farm/ssh_keys.py
@@ -0,0 +1,67 @@
+"""SSH key utilities for compose-farm."""
+
+from __future__ import annotations
+
+import os
+from pathlib import Path
+
+# Default key paths for compose-farm SSH key
+# Keys are stored in a subdirectory for cleaner docker volume mounting
+SSH_KEY_DIR = Path.home() / ".ssh" / "compose-farm"
+SSH_KEY_PATH = SSH_KEY_DIR / "id_ed25519"
+SSH_PUBKEY_PATH = SSH_KEY_PATH.with_suffix(".pub")
+
+
+def get_ssh_auth_sock() -> str | None:
+    """Get SSH_AUTH_SOCK, auto-detecting forwarded agent if needed.
+
+    Checks in order:
+    1. SSH_AUTH_SOCK environment variable (if socket exists)
+    2. Forwarded agent sockets in ~/.ssh/agent/ (most recent first)
+
+    Returns the socket path or None if no valid socket found.
+    """
+    sock = os.environ.get("SSH_AUTH_SOCK")
+    if sock and Path(sock).is_socket():
+        return sock
+
+    # Try to find a forwarded SSH agent socket
+    agent_dir = Path.home() / ".ssh" / "agent"
+    if agent_dir.is_dir():
+        sockets = sorted(
+            agent_dir.glob("s.*.sshd.*"), key=lambda p: p.stat().st_mtime, reverse=True
+        )
+        for s in sockets:
+            if s.is_socket():
+                return str(s)
+    return None
+
+
+def get_ssh_env() -> dict[str, str]:
+    """Get environment dict for SSH subprocess with auto-detected agent.
+
+    Returns a copy of the current environment with SSH_AUTH_SOCK set
+    to the auto-detected agent socket (if found).
+    """
+    env = os.environ.copy()
+    sock = get_ssh_auth_sock()
+    if sock:
+        env["SSH_AUTH_SOCK"] = sock
+    return env
+
+
+def key_exists() -> bool:
+    """Check if the compose-farm SSH key pair exists."""
+    return SSH_KEY_PATH.exists() and SSH_PUBKEY_PATH.exists()
+
+
+def get_key_path() -> Path | None:
+    """Get the SSH key path if it exists, None otherwise."""
+    return SSH_KEY_PATH if key_exists() else None
+
+
+def get_pubkey_content() -> str | None:
+    """Get the public key content if it exists, None otherwise."""
+    if not SSH_PUBKEY_PATH.exists():
+        return None
+    return SSH_PUBKEY_PATH.read_text().strip()
--- a/src/compose_farm/state.py
+++ b/src/compose_farm/state.py
@@ -1,4 +1,4 @@
-"""State tracking for deployed services."""
+"""State tracking for deployed stacks."""

 from __future__ import annotations

@@ -8,16 +8,49 @@ from typing import TYPE_CHECKING, Any
 import yaml

 if TYPE_CHECKING:
-    from collections.abc import Generator
+    from collections.abc import Generator, Mapping

    from .config import Config


+def group_stacks_by_host(
+    stacks: dict[str, str | list[str]],
+    hosts: Mapping[str, object],
+    all_hosts: list[str] | None = None,
+) -> dict[str, list[str]]:
+    """Group stacks by their assigned host(s).
+
+    For multi-host stacks (list or "all"), the stack appears in multiple host lists.
+    """
+    by_host: dict[str, list[str]] = {h: [] for h in hosts}
+    for stack, host_value in stacks.items():
+        if isinstance(host_value, list):
+            for host_name in host_value:
+                if host_name in by_host:
+                    by_host[host_name].append(stack)
+        elif host_value == "all" and all_hosts:
+            for host_name in all_hosts:
+                if host_name in by_host:
+                    by_host[host_name].append(stack)
+        elif host_value in by_host:
+            by_host[host_value].append(stack)
+    return by_host
+
+
+def group_running_stacks_by_host(
+    state: dict[str, str | list[str]],
+    hosts: Mapping[str, object],
+) -> dict[str, list[str]]:
+    """Group running stacks by host, filtering out hosts with no stacks."""
+    by_host = group_stacks_by_host(state, hosts)
+    return {h: svcs for h, svcs in by_host.items() if svcs}
+
+
 def load_state(config: Config) -> dict[str, str | list[str]]:
    """Load the current deployment state.

-    Returns a dict mapping service names to host name(s).
-    Multi-host services store a list of hosts.
+    Returns a dict mapping stack names to host name(s).
+    Multi-host stacks store a list of hosts.
    """
    state_path = config.get_state_path()
    if not state_path.exists():
@@ -50,13 +83,13 @@ def _modify_state(config: Config) -> Generator[dict[str, str | list[str]], None,
    save_state(config, state)


-def get_service_host(config: Config, service: str) -> str | None:
-    """Get the host where a service is currently deployed.
+def get_stack_host(config: Config, stack: str) -> str | None:
+    """Get the host where a stack is currently deployed.

-    For multi-host services, returns the first host or None.
+    For multi-host stacks, returns the first host or None.
    """
    state = load_state(config)
-    value = state.get(service)
+    value = state.get(stack)
    if value is None:
        return None
    if isinstance(value, list):
@@ -64,103 +97,59 @@ def get_service_host(config: Config, service: str) -> str | None:
    return value


-def set_service_host(config: Config, service: str, host: str) -> None:
-    """Record that a service is deployed on a host."""
+def set_stack_host(config: Config, stack: str, host: str) -> None:
+    """Record that a stack is deployed on a host."""
    with _modify_state(config) as state:
-        state[service] = host
+        state[stack] = host


-def set_multi_host_service(config: Config, service: str, hosts: list[str]) -> None:
-    """Record that a multi-host service is deployed on multiple hosts."""
+def set_multi_host_stack(config: Config, stack: str, hosts: list[str]) -> None:
+    """Record that a multi-host stack is deployed on multiple hosts."""
    with _modify_state(config) as state:
-        state[service] = hosts
+        state[stack] = hosts


-def remove_service(config: Config, service: str) -> None:
-    """Remove a service from the state (after down)."""
+def remove_stack(config: Config, stack: str) -> None:
+    """Remove a stack from the state (after down)."""
    with _modify_state(config) as state:
-        state.pop(service, None)
+        state.pop(stack, None)


-def add_service_to_host(config: Config, service: str, host: str) -> None:
-    """Add a specific host to a service's state.
+def get_stacks_needing_migration(config: Config) -> list[str]:
+    """Get stacks where current host differs from configured host.

-    For multi-host services, adds the host to the list if not present.
-    For single-host services, sets the host.
-    """
-    with _modify_state(config) as state:
-        current = state.get(service)
-
-        if config.is_multi_host(service):
-            # Multi-host: add to list if not present
-            if isinstance(current, list):
-                if host not in current:
-                    state[service] = [*current, host]
-            else:
-                state[service] = [host]
-        else:
-            # Single-host: just set it
-            state[service] = host
-
-
-def remove_service_from_host(config: Config, service: str, host: str) -> None:
-    """Remove a specific host from a service's state.
-
-    For multi-host services, removes just that host from the list.
-    For single-host services, removes the service entirely if host matches.
-    """
-    with _modify_state(config) as state:
-        current = state.get(service)
-        if current is None:
-            return
-
-        if isinstance(current, list):
-            # Multi-host: remove this host from list
-            remaining = [h for h in current if h != host]
-            if remaining:
-                state[service] = remaining
-            else:
-                state.pop(service, None)
-        elif current == host:
-            # Single-host: remove if matches
-            state.pop(service, None)
-
-
-def get_services_needing_migration(config: Config) -> list[str]:
-    """Get services where current host differs from configured host.
-
-    Multi-host services are never considered for migration.
+    Multi-host stacks are never considered for migration.
    """
    needs_migration = []
-    for service in config.services:
-        # Skip multi-host services
-        if config.is_multi_host(service):
+    for stack in config.stacks:
+        # Skip multi-host stacks
+        if config.is_multi_host(stack):
            continue

-        configured_host = config.get_hosts(service)[0]
-        current_host = get_service_host(config, service)
+        configured_host = config.get_hosts(stack)[0]
+        current_host = get_stack_host(config, stack)
        if current_host and current_host != configured_host:
-            needs_migration.append(service)
+            needs_migration.append(stack)
    return needs_migration


-def get_orphaned_services(config: Config) -> dict[str, str | list[str]]:
-    """Get services that are in state but not in config.
+def get_orphaned_stacks(config: Config) -> dict[str, str | list[str]]:
+    """Get stacks that are in state but not in config.

-    These are services that were previously deployed but have been
+    These are stacks that were previously deployed but have been
    removed from the config file (e.g., commented out).

-    Returns a dict mapping service name to host(s) where it's deployed.
+    Returns a dict mapping stack name to host(s) where it's deployed.
    """
    state = load_state(config)
-    return {service: hosts for service, hosts in state.items() if service not in config.services}
+    return {stack: hosts for stack, hosts in state.items() if stack not in config.stacks}


-def get_services_not_in_state(config: Config) -> list[str]:
-    """Get services that are in config but not in state.
+def get_stacks_not_in_state(config: Config) -> list[str]:
+    """Get stacks that are in config but not in state.

-    These are services that should be running but aren't tracked
+    These are stacks that should be running but aren't tracked
    (e.g., newly added to config, or previously stopped as orphans).
    """
    state = load_state(config)
-    return [service for service in config.services if service not in state]
+    return [stack for stack in config.stacks if stack not in state]
--- a/src/compose_farm/traefik.py
+++ b/src/compose_farm/traefik.py
@@ -311,7 +311,7 @@ def _process_service_labels(

 def generate_traefik_config(
    config: Config,
-    services: list[str],
+    stacks: list[str],
    *,
    check_all: bool = False,
 ) -> tuple[dict[str, Any], list[str]]:
@@ -319,8 +319,8 @@ def generate_traefik_config(

    Args:
        config: The compose-farm config.
-        services: List of service names to process.
-        check_all: If True, check all services for warnings (ignore host filtering).
+        stacks: List of stack names to process.
+        check_all: If True, check all stacks for warnings (ignore host filtering).
                   Used by the check command to validate all traefik labels.

    Returns (config_dict, warnings).
@@ -332,14 +332,14 @@ def generate_traefik_config(

    # Determine Traefik's host from service assignment
    traefik_host = None
-    if config.traefik_service and not check_all:
-        traefik_host = config.services.get(config.traefik_service)
+    if config.traefik_stack and not check_all:
+        traefik_host = config.stacks.get(config.traefik_stack)

-    for stack in services:
+    for stack in stacks:
        raw_services, env, host_address = load_compose_services(config, stack)
-        stack_host = config.services.get(stack)
+        stack_host = config.stacks.get(stack)

-        # Skip services on Traefik's host - docker provider handles them directly
+        # Skip stacks on Traefik's host - docker provider handles them directly
        # (unless check_all is True, for validation purposes)
        if not check_all:
            if host_address.lower() in LOCAL_ADDRESSES:
@@ -370,7 +370,7 @@ _TRAEFIK_CONFIG_HEADER = """\
 # Auto-generated by compose-farm
 # https://github.com/basnijholt/compose-farm
 #
-# This file routes traffic to services running on hosts other than Traefik's host.
+# This file routes traffic to stacks running on hosts other than Traefik's host.
 # Services on Traefik's host use the Docker provider directly.
 #
 # Regenerate with: compose-farm traefik-file --all -o <this-file>
--- a/src/compose_farm/web/app.py
+++ b/src/compose_farm/web/app.py
@@ -2,34 +2,65 @@

 from __future__ import annotations

+import asyncio
+import logging
 import sys
-from contextlib import asynccontextmanager
+from contextlib import asynccontextmanager, suppress
 from typing import TYPE_CHECKING

 from fastapi import FastAPI
 from fastapi.staticfiles import StaticFiles
+from pydantic import ValidationError
+from rich.logging import RichHandler

 from compose_farm.web.deps import STATIC_DIR, get_config
 from compose_farm.web.routes import actions, api, pages
+from compose_farm.web.streaming import TASK_TTL_SECONDS, cleanup_stale_tasks
+
+# Configure logging with Rich handler for compose_farm.web modules
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(message)s",
+    datefmt="[%X]",
+    handlers=[RichHandler(rich_tracebacks=True, show_path=False)],
+)
+# Set our web modules to INFO level (uvicorn handles its own logging)
+logging.getLogger("compose_farm.web").setLevel(logging.INFO)

 if TYPE_CHECKING:
    from collections.abc import AsyncGenerator


+async def _task_cleanup_loop() -> None:
+    """Periodically clean up stale completed tasks."""
+    while True:
+        await asyncio.sleep(TASK_TTL_SECONDS // 2)  # Run every 5 minutes
+        cleanup_stale_tasks()
+
+
@asynccontextmanager
 async def lifespan(_app: FastAPI) -> AsyncGenerator[None, None]:
    """Application lifespan handler."""
-    # Startup: pre-load config
-    get_config()
+    # Startup: pre-load config (ignore errors - handled per-request)
+    with suppress(ValidationError, FileNotFoundError):
+        get_config()
+
+    # Start background cleanup task
+    cleanup_task = asyncio.create_task(_task_cleanup_loop())
+
    yield
-    # Shutdown: nothing to clean up
+
+    # Shutdown: cancel cleanup task
+    cleanup_task.cancel()
+    with suppress(asyncio.CancelledError):
+        await cleanup_task


 def create_app() -> FastAPI:
    """Create and configure the FastAPI application."""
    app = FastAPI(
        title="Compose Farm",
-        description="Web UI for managing Docker Compose services across multiple hosts",
+        description="Web UI for managing Docker Compose stacks across multiple hosts",
        lifespan=lifespan,
    )

--- a/src/compose_farm/web/cdn.py
+++ b/src/compose_farm/web/cdn.py
@@ -0,0 +1,108 @@
+"""CDN asset definitions and caching for tests and demo recordings.
+
+This module provides a single source of truth for CDN asset URLs used in
+browser tests and demo recordings. Assets are intercepted and served from
+a local cache to eliminate network variability.
+
+Note: The canonical list of CDN assets for production is in base.html
+(with data-vendor attributes). This module includes those plus dynamically
+loaded assets (like Monaco editor modules loaded by app.js).
+"""
+
+from __future__ import annotations
+
+import subprocess
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from pathlib import Path
+
+# CDN assets to cache locally for tests/demos
+# Format: URL -> (local_filename, content_type)
+#
+# If tests fail with "Uncached CDN request", add the URL here.
+CDN_ASSETS: dict[str, tuple[str, str]] = {
+    # From base.html (data-vendor attributes)
+    "https://cdn.jsdelivr.net/npm/daisyui@5/themes.css": ("daisyui-themes.css", "text/css"),
+    "https://cdn.jsdelivr.net/npm/daisyui@5": ("daisyui.css", "text/css"),
+    "https://cdn.jsdelivr.net/npm/@tailwindcss/browser@4": (
+        "tailwind.js",
+        "application/javascript",
+    ),
+    "https://cdn.jsdelivr.net/npm/@xterm/xterm@5.5.0/css/xterm.css": ("xterm.css", "text/css"),
+    "https://unpkg.com/htmx.org@2.0.4": ("htmx.js", "application/javascript"),
+    "https://cdn.jsdelivr.net/npm/@xterm/xterm@5.5.0/lib/xterm.js": (
+        "xterm.js",
+        "application/javascript",
+    ),
+    "https://cdn.jsdelivr.net/npm/@xterm/addon-fit@0.10.0/lib/addon-fit.js": (
+        "xterm-fit.js",
+        "application/javascript",
+    ),
+    # Monaco editor - dynamically loaded by app.js
+    "https://cdn.jsdelivr.net/npm/monaco-editor@0.52.2/min/vs/loader.js": (
+        "monaco-loader.js",
+        "application/javascript",
+    ),
+    "https://cdn.jsdelivr.net/npm/monaco-editor@0.52.2/min/vs/editor/editor.main.js": (
+        "monaco-editor-main.js",
+        "application/javascript",
+    ),
+    "https://cdn.jsdelivr.net/npm/monaco-editor@0.52.2/min/vs/editor/editor.main.css": (
+        "monaco-editor-main.css",
+        "text/css",
+    ),
+    "https://cdn.jsdelivr.net/npm/monaco-editor@0.52.2/min/vs/base/worker/workerMain.js": (
+        "monaco-workerMain.js",
+        "application/javascript",
+    ),
+    "https://cdn.jsdelivr.net/npm/monaco-editor@0.52.2/min/vs/basic-languages/yaml/yaml.js": (
+        "monaco-yaml.js",
+        "application/javascript",
+    ),
+    "https://cdn.jsdelivr.net/npm/monaco-editor@0.52.2/min/vs/base/browser/ui/codicons/codicon/codicon.ttf": (
+        "monaco-codicon.ttf",
+        "font/ttf",
+    ),
+}
+
+
+def download_url(url: str) -> bytes | None:
+    """Download URL content using curl."""
+    try:
+        result = subprocess.run(
+            ["curl", "-fsSL", "--max-time", "30", url],  # noqa: S607
+            capture_output=True,
+            check=True,
+        )
+        return bytes(result.stdout)
+    except Exception:
+        return None
+
+
+def ensure_vendor_cache(cache_dir: Path) -> Path:
+    """Download CDN assets to cache directory if not already present.
+
+    Args:
+        cache_dir: Directory to store cached assets.
+
+    Returns:
+        The cache directory path.
+
+    Raises:
+        RuntimeError: If any asset fails to download.
+
+    """
+    cache_dir.mkdir(parents=True, exist_ok=True)
+
+    for url, (filename, _content_type) in CDN_ASSETS.items():
+        filepath = cache_dir / filename
+        if filepath.exists():
+            continue
+        content = download_url(url)
+        if not content:
+            msg = f"Failed to download {url} - check network/curl"
+            raise RuntimeError(msg)
+        filepath.write_bytes(content)
+
+    return cache_dir
--- a/src/compose_farm/web/deps.py
+++ b/src/compose_farm/web/deps.py
@@ -10,6 +10,9 @@ from pathlib import Path
 from typing import TYPE_CHECKING

 from fastapi.templating import Jinja2Templates
+from pydantic import ValidationError
+
+from compose_farm.executor import is_local

 if TYPE_CHECKING:
    from compose_farm.config import Config
@@ -30,3 +33,18 @@ def get_config() -> Config:
 def get_templates() -> Jinja2Templates:
    """Get Jinja2 templates instance."""
    return Jinja2Templates(directory=str(TEMPLATES_DIR))
+
+
+def extract_config_error(exc: Exception) -> str:
+    """Extract a user-friendly error message from a config exception."""
+    if isinstance(exc, ValidationError):
+        return "; ".join(err.get("msg", str(err)) for err in exc.errors())
+    return str(exc)
+
+
+def get_local_host(config: Config) -> str | None:
+    """Find the local host name from config, if any."""
+    for name, host in config.hosts.items():
+        if is_local(host):
+            return name
+    return None
--- a/src/compose_farm/web/routes/actions.py
+++ b/src/compose_farm/web/routes/actions.py
@@ -1,4 +1,4 @@
-"""Action routes for service operations."""
+"""Action routes for stack operations."""

 from __future__ import annotations

@@ -32,56 +32,47 @@ def _start_task(coro_factory: Callable[[str], Coroutine[Any, Any, None]]) -> str
    return task_id


-async def _run_service_action(name: str, command: str) -> dict[str, Any]:
-    """Run a compose command for a service."""
-    config = get_config()
+# Allowed stack commands
+ALLOWED_COMMANDS = {"up", "down", "restart", "pull", "update", "logs", "stop"}

-    if name not in config.services:
-        raise HTTPException(status_code=404, detail=f"Service '{name}' not found")
+# Allowed service-level commands (no 'down' - use 'stop' for individual services)
+ALLOWED_SERVICE_COMMANDS = {"logs", "pull", "restart", "up", "stop"}
+
+
+@router.post("/stack/{name}/{command}")
+async def stack_action(name: str, command: str) -> dict[str, Any]:
+    """Run a compose command for a stack (up, down, restart, pull, update, logs, stop)."""
+    if command not in ALLOWED_COMMANDS:
+        raise HTTPException(status_code=404, detail=f"Unknown command '{command}'")
+
+    config = get_config()
+    if name not in config.stacks:
+        raise HTTPException(status_code=404, detail=f"Stack '{name}' not found")

    task_id = _start_task(lambda tid: run_compose_streaming(config, name, command, tid))
-    return {"task_id": task_id, "service": name, "command": command}
+    return {"task_id": task_id, "stack": name, "command": command}


-@router.post("/service/{name}/up")
-async def up_service(name: str) -> dict[str, Any]:
-    """Start a service."""
-    return await _run_service_action(name, "up")
+@router.post("/stack/{name}/service/{service}/{command}")
+async def service_action(name: str, service: str, command: str) -> dict[str, Any]:
+    """Run a compose command for a specific service within a stack."""
+    if command not in ALLOWED_SERVICE_COMMANDS:
+        raise HTTPException(status_code=404, detail=f"Unknown command '{command}'")

+    config = get_config()
+    if name not in config.stacks:
+        raise HTTPException(status_code=404, detail=f"Stack '{name}' not found")

-@router.post("/service/{name}/down")
-async def down_service(name: str) -> dict[str, Any]:
-    """Stop a service."""
-    return await _run_service_action(name, "down")
-
-
-@router.post("/service/{name}/restart")
-async def restart_service(name: str) -> dict[str, Any]:
-    """Restart a service (down + up)."""
-    return await _run_service_action(name, "restart")
-
-
-@router.post("/service/{name}/pull")
-async def pull_service(name: str) -> dict[str, Any]:
-    """Pull latest images for a service."""
-    return await _run_service_action(name, "pull")
-
-
-@router.post("/service/{name}/update")
-async def update_service(name: str) -> dict[str, Any]:
-    """Update a service (pull + build + down + up)."""
-    return await _run_service_action(name, "update")
-
-
-@router.post("/service/{name}/logs")
-async def logs_service(name: str) -> dict[str, Any]:
-    """Show logs for a service."""
-    return await _run_service_action(name, "logs")
+    # Use --service flag to target specific service
+    task_id = _start_task(
+        lambda tid: run_compose_streaming(config, name, f"{command} --service {service}", tid)
+    )
+    return {"task_id": task_id, "stack": name, "service": service, "command": command}


@router.post("/apply")
 async def apply_all() -> dict[str, Any]:
-    """Run cf apply to reconcile all services."""
+    """Run cf apply to reconcile all stacks."""
    config = get_config()
    task_id = _start_task(lambda tid: run_cli_streaming(config, ["apply"], tid))
    return {"task_id": task_id, "command": "apply"}
@@ -89,7 +80,23 @@ async def apply_all() -> dict[str, Any]:

@router.post("/refresh")
 async def refresh_state() -> dict[str, Any]:
-    """Refresh state from running services."""
+    """Refresh state from running stacks."""
    config = get_config()
    task_id = _start_task(lambda tid: run_cli_streaming(config, ["refresh"], tid))
    return {"task_id": task_id, "command": "refresh"}
+
+
+@router.post("/pull-all")
+async def pull_all() -> dict[str, Any]:
+    """Pull latest images for all stacks."""
+    config = get_config()
+    task_id = _start_task(lambda tid: run_cli_streaming(config, ["pull", "--all"], tid))
+    return {"task_id": task_id, "command": "pull --all"}
+
+
+@router.post("/update-all")
+async def update_all() -> dict[str, Any]:
+    """Update all stacks (pull + build + down + up)."""
+    config = get_config()
+    task_id = _start_task(lambda tid: run_cli_streaming(config, ["update", "--all"], tid))
+    return {"task_id": task_id, "command": "update --all"}
--- a/src/compose_farm/web/routes/api.py
+++ b/src/compose_farm/web/routes/api.py
@@ -2,21 +2,30 @@

 from __future__ import annotations

+import asyncio
 import contextlib
 import json
+import logging
+import shlex
+from datetime import UTC, datetime
+from pathlib import Path
 from typing import TYPE_CHECKING, Annotated, Any

+if TYPE_CHECKING:
+    from collections.abc import Callable
+
+import asyncssh
 import yaml
-from fastapi import APIRouter, Body, HTTPException
+from fastapi import APIRouter, Body, HTTPException, Query
 from fastapi.responses import HTMLResponse

-if TYPE_CHECKING:
-    from pathlib import Path
-
-from compose_farm.executor import run_compose_on_host
+from compose_farm.executor import is_local, run_compose_on_host, ssh_connect_kwargs
+from compose_farm.paths import find_config_path
 from compose_farm.state import load_state
 from compose_farm.web.deps import get_config, get_templates

+logger = logging.getLogger(__name__)
+
 router = APIRouter(tags=["api"])


@@ -28,12 +37,57 @@ def _validate_yaml(content: str) -> None:
        raise HTTPException(status_code=400, detail=f"Invalid YAML: {e}") from e


-def _get_service_compose_path(name: str) -> Path:
-    """Get compose path for service, raising HTTPException if not found."""
+def _backup_file(file_path: Path) -> Path | None:
+    """Create a timestamped backup of a file if it exists and content differs.
+
+    Backups are stored in a .backups directory alongside the file.
+    Returns the backup path if created, None if no backup was needed.
+    """
+    if not file_path.exists():
+        return None
+
+    # Create backup directory
+    backup_dir = file_path.parent / ".backups"
+    backup_dir.mkdir(exist_ok=True)
+
+    # Generate timestamped backup filename
+    timestamp = datetime.now(tz=UTC).strftime("%Y%m%d_%H%M%S")
+    backup_name = f"{file_path.name}.{timestamp}"
+    backup_path = backup_dir / backup_name
+
+    # Copy current content to backup
+    backup_path.write_text(file_path.read_text())
+
+    # Clean up old backups (keep last 200)
+    backups = sorted(backup_dir.glob(f"{file_path.name}.*"), reverse=True)
+    for old_backup in backups[200:]:
+        old_backup.unlink()
+
+    return backup_path
+
+
+def _save_with_backup(file_path: Path, content: str) -> bool:
+    """Save content to file, creating a backup first if content changed.
+
+    Returns True if file was saved, False if content was unchanged.
+    """
+    # Check if content actually changed
+    if file_path.exists():
+        current_content = file_path.read_text()
+        if current_content == content:
+            return False  # No change, skip save
+        _backup_file(file_path)
+
+    file_path.write_text(content)
+    return True
+
+
+def _get_stack_compose_path(name: str) -> Path:
+    """Get compose path for stack, raising HTTPException if not found."""
    config = get_config()

-    if name not in config.services:
-        raise HTTPException(status_code=404, detail=f"Service '{name}' not found")
+    if name not in config.stacks:
+        raise HTTPException(status_code=404, detail=f"Stack '{name}' not found")

    compose_path = config.get_compose_path(name)
    if not compose_path:
@@ -42,12 +96,12 @@ def _get_service_compose_path(name: str) -> Path:
    return compose_path


-def _get_compose_services(config: Any, service: str, hosts: list[str]) -> list[dict[str, Any]]:
+def _get_compose_services(config: Any, stack: str, hosts: list[str]) -> list[dict[str, Any]]:
    """Get container info from compose file (fast, local read).

-    Returns one entry per container per host for multi-host services.
+    Returns one entry per container per host for multi-host stacks.
    """
-    compose_path = config.get_compose_path(service)
+    compose_path = config.get_compose_path(stack)
    if not compose_path or not compose_path.exists():
        return []

@@ -79,7 +133,7 @@ def _get_compose_services(config: Any, service: str, hosts: list[str]) -> list[d


 async def _get_container_states(
-    config: Any, service: str, containers: list[dict[str, Any]]
+    config: Any, stack: str, containers: list[dict[str, Any]]
 ) -> list[dict[str, Any]]:
    """Query Docker for actual container states on a single host."""
    if not containers:
@@ -88,54 +142,73 @@ async def _get_container_states(
    # All containers should be on the same host
    host_name = containers[0]["Host"]

-    result = await run_compose_on_host(config, service, host_name, "ps --format json", stream=False)
+    # Use -a to include stopped/exited containers
+    result = await run_compose_on_host(
+        config, stack, host_name, "ps -a --format json", stream=False
+    )
    if not result.success:
+        logger.warning(
+            "Failed to get container states for %s on %s: %s",
+            stack,
+            host_name,
+            result.stderr or result.stdout,
+        )
        return containers

-    # Build state map
-    state_map: dict[str, str] = {}
+    # Build state map: name -> (state, exit_code)
+    state_map: dict[str, tuple[str, int]] = {}
    for line in result.stdout.strip().split("\n"):
        if line.strip():
            with contextlib.suppress(json.JSONDecodeError):
                data = json.loads(line)
-                state_map[data.get("Name", "")] = data.get("State", "unknown")
+                name = data.get("Name", "")
+                state = data.get("State", "unknown")
+                exit_code = data.get("ExitCode", 0)
+                state_map[name] = (state, exit_code)

    # Update container states
    for c in containers:
        if c["Name"] in state_map:
-            c["State"] = state_map[c["Name"]]
+            state, exit_code = state_map[c["Name"]]
+            c["State"] = state
+            c["ExitCode"] = exit_code
+        else:
+            # Container not in ps output means it was never started
+            c["State"] = "created"
+            c["ExitCode"] = None

    return containers


 def _render_containers(
-    service: str, host: str, containers: list[dict[str, Any]], *, show_header: bool = False
+    stack: str, host: str, containers: list[dict[str, Any]], *, show_header: bool = False
 ) -> str:
    """Render containers HTML using Jinja template."""
    templates = get_templates()
    template = templates.env.get_template("partials/containers.html")
    module = template.make_module()
-    result: str = module.host_containers(service, host, containers, show_header=show_header)
-    return result
+    # TemplateModule exports macros as attributes; getattr keeps type checkers happy
+    host_containers: Callable[..., str] = getattr(module, "host_containers")  # noqa: B009
+    return host_containers(stack, host, containers, show_header=show_header)


-@router.get("/service/{name}/containers", response_class=HTMLResponse)
+@router.get("/stack/{name}/containers", response_class=HTMLResponse)
 async def get_containers(name: str, host: str | None = None) -> HTMLResponse:
-    """Get containers for a service as HTML buttons.
+    """Get containers for a stack as HTML buttons.

    If host is specified, queries Docker for that host's status.
    Otherwise returns all hosts with loading spinners that auto-fetch.
    """
    config = get_config()

-    if name not in config.services:
-        raise HTTPException(status_code=404, detail=f"Service '{name}' not found")
+    if name not in config.stacks:
+        raise HTTPException(status_code=404, detail=f"Stack '{name}' not found")

-    # Get hosts where service is running from state
+    # Get hosts where stack is running from state
    state = load_state(config)
    current_hosts = state.get(name)
    if not current_hosts:
-        return HTMLResponse('<span class="text-base-content/60">Service not running</span>')
+        return HTMLResponse('<span class="text-base-content/60">Stack not running</span>')

    all_hosts = current_hosts if isinstance(current_hosts, list) else [current_hosts]

@@ -162,7 +235,7 @@ async def get_containers(name: str, host: str | None = None) -> HTMLResponse:
        # Container for this host that auto-fetches its own status
        html_parts.append(f"""
            <div id="{host_id}"
-                 hx-get="/api/service/{name}/containers?host={h}"
+                 hx-get="/api/stack/{name}/containers?host={h}"
                 hx-trigger="load"
                 hx-target="this"
                 hx-select="unset"
@@ -174,25 +247,27 @@ async def get_containers(name: str, host: str | None = None) -> HTMLResponse:
    return HTMLResponse("".join(html_parts))


-@router.put("/service/{name}/compose")
+@router.put("/stack/{name}/compose")
 async def save_compose(
    name: str, content: Annotated[str, Body(media_type="text/plain")]
 ) -> dict[str, Any]:
    """Save compose file content."""
-    compose_path = _get_service_compose_path(name)
+    compose_path = _get_stack_compose_path(name)
    _validate_yaml(content)
-    compose_path.write_text(content)
-    return {"success": True, "message": "Compose file saved"}
+    saved = _save_with_backup(compose_path, content)
+    msg = "Compose file saved" if saved else "No changes to save"
+    return {"success": True, "message": msg}


-@router.put("/service/{name}/env")
+@router.put("/stack/{name}/env")
 async def save_env(
    name: str, content: Annotated[str, Body(media_type="text/plain")]
 ) -> dict[str, Any]:
    """Save .env file content."""
-    env_path = _get_service_compose_path(name).parent / ".env"
-    env_path.write_text(content)
-    return {"success": True, "message": ".env file saved"}
+    env_path = _get_stack_compose_path(name).parent / ".env"
+    saved = _save_with_backup(env_path, content)
+    msg = ".env file saved" if saved else "No changes to save"
+    return {"success": True, "message": msg}


@router.put("/config")
@@ -200,12 +275,113 @@ async def save_config(
    content: Annotated[str, Body(media_type="text/plain")],
 ) -> dict[str, Any]:
    """Save compose-farm.yaml config file."""
-    config = get_config()
-
-    if not config.config_path:
-        raise HTTPException(status_code=404, detail="Config path not set")
+    config_path = find_config_path()
+    if not config_path:
+        raise HTTPException(status_code=404, detail="Config file not found")

    _validate_yaml(content)
-    config.config_path.write_text(content)
+    saved = _save_with_backup(config_path, content)
+    msg = "Config saved" if saved else "No changes to save"
+    return {"success": True, "message": msg}

-    return {"success": True, "message": "Config saved"}
+
+async def _read_file_local(path: str) -> str:
+    """Read a file from the local filesystem."""
+    expanded = Path(path).expanduser()
+    return await asyncio.to_thread(expanded.read_text, encoding="utf-8")
+
+
+async def _write_file_local(path: str, content: str) -> bool:
+    """Write content to a file on the local filesystem with backup.
+
+    Returns True if file was saved, False if content was unchanged.
+    """
+    expanded = Path(path).expanduser()
+    return await asyncio.to_thread(_save_with_backup, expanded, content)
+
+
+async def _read_file_remote(host: Any, path: str) -> str:
+    """Read a file from a remote host via SSH."""
+    # Expand ~ on remote by using shell
+    cmd = f"cat {shlex.quote(path)}"
+    if path.startswith("~/"):
+        cmd = f"cat ~/{shlex.quote(path[2:])}"
+
+    async with asyncssh.connect(**ssh_connect_kwargs(host)) as conn:
+        result = await conn.run(cmd, check=True)
+        stdout = result.stdout or ""
+        return stdout.decode() if isinstance(stdout, bytes) else stdout
+
+
+async def _write_file_remote(host: Any, path: str, content: str) -> None:
+    """Write content to a file on a remote host via SSH."""
+    # Expand ~ on remote: keep ~ unquoted for shell expansion, quote the rest
+    target = f"~/{shlex.quote(path[2:])}" if path.startswith("~/") else shlex.quote(path)
+    cmd = f"cat > {target}"
+
+    async with asyncssh.connect(**ssh_connect_kwargs(host)) as conn:
+        result = await conn.run(cmd, input=content, check=True)
+        if result.returncode != 0:
+            stderr = result.stderr.decode() if isinstance(result.stderr, bytes) else result.stderr
+            msg = f"Failed to write file: {stderr}"
+            raise RuntimeError(msg)
+
+
+def _get_console_host(host: str, path: str) -> Any:
+    """Validate and return host config for console file operations."""
+    config = get_config()
+    host_config = config.hosts.get(host)
+
+    if not host_config:
+        raise HTTPException(status_code=404, detail=f"Host '{host}' not found")
+    if not path:
+        raise HTTPException(status_code=400, detail="Path is required")
+
+    return host_config
+
+
+@router.get("/console/file")
+async def read_console_file(
+    host: Annotated[str, Query(description="Host name")],
+    path: Annotated[str, Query(description="File path")],
+) -> dict[str, Any]:
+    """Read a file from a host for the console editor."""
+    host_config = _get_console_host(host, path)
+
+    try:
+        if is_local(host_config):
+            content = await _read_file_local(path)
+        else:
+            content = await _read_file_remote(host_config, path)
+        return {"success": True, "content": content}
+    except FileNotFoundError:
+        raise HTTPException(status_code=404, detail=f"File not found: {path}") from None
+    except PermissionError:
+        raise HTTPException(status_code=403, detail=f"Permission denied: {path}") from None
+    except Exception as e:
+        logger.exception("Failed to read file %s from host %s", path, host)
+        raise HTTPException(status_code=500, detail=str(e)) from e
+
+
+@router.put("/console/file")
+async def write_console_file(
+    host: Annotated[str, Query(description="Host name")],
+    path: Annotated[str, Query(description="File path")],
+    content: Annotated[str, Body(media_type="text/plain")],
+) -> dict[str, Any]:
+    """Write a file to a host from the console editor."""
+    host_config = _get_console_host(host, path)
+
+    try:
+        if is_local(host_config):
+            saved = await _write_file_local(path, content)
+            msg = f"Saved: {path}" if saved else "No changes to save"
+        else:
+            await _write_file_remote(host_config, path, content)
+            msg = f"Saved: {path}"  # Remote doesn't track changes
+        return {"success": True, "message": msg}
+    except PermissionError:
+        raise HTTPException(status_code=403, detail=f"Permission denied: {path}") from None
+    except Exception as e:
+        logger.exception("Failed to write file %s to host %s", path, host)
+        raise HTTPException(status_code=500, detail=str(e)) from e
--- a/src/compose_farm/web/routes/pages.py
+++ b/src/compose_farm/web/routes/pages.py
@@ -5,45 +5,101 @@ from __future__ import annotations
 import yaml
 from fastapi import APIRouter, Request
 from fastapi.responses import HTMLResponse
+from pydantic import ValidationError

+from compose_farm.paths import find_config_path
 from compose_farm.state import (
-    get_orphaned_services,
-    get_service_host,
-    get_services_needing_migration,
-    get_services_not_in_state,
+    get_orphaned_stacks,
+    get_stack_host,
+    get_stacks_needing_migration,
+    get_stacks_not_in_state,
+    group_running_stacks_by_host,
    load_state,
 )
-from compose_farm.web.deps import get_config, get_templates
+from compose_farm.web.deps import (
+    extract_config_error,
+    get_config,
+    get_local_host,
+    get_templates,
+)

 router = APIRouter()


+@router.get("/console", response_class=HTMLResponse)
+async def console(request: Request) -> HTMLResponse:
+    """Console page with terminal and editor."""
+    config = get_config()
+    templates = get_templates()
+
+    # Sort hosts with local first
+    local_host = get_local_host(config)
+    hosts = sorted(config.hosts.keys())
+    if local_host:
+        hosts = [local_host] + [h for h in hosts if h != local_host]
+
+    # Get config path for default editor file
+    config_path = str(config.config_path) if config.config_path else ""
+
+    return templates.TemplateResponse(
+        "console.html",
+        {
+            "request": request,
+            "hosts": hosts,
+            "local_host": local_host,
+            "config_path": config_path,
+        },
+    )
+
+
@router.get("/", response_class=HTMLResponse)
 async def index(request: Request) -> HTMLResponse:
    """Dashboard page - combined view of all cluster info."""
-    config = get_config()
    templates = get_templates()

+    # Try to load config, handle errors gracefully
+    config_error = None
+    try:
+        config = get_config()
+    except (ValidationError, FileNotFoundError) as e:
+        config_error = extract_config_error(e)
+
+        # Read raw config content for the editor
+        config_path = find_config_path()
+        config_content = config_path.read_text() if config_path else ""
+
+        return templates.TemplateResponse(
+            "index.html",
+            {
+                "request": request,
+                "config_error": config_error,
+                "hosts": {},
+                "stacks": {},
+                "config_content": config_content,
+                "state_content": "",
+                "running_count": 0,
+                "stopped_count": 0,
+                "orphaned": [],
+                "migrations": [],
+                "not_started": [],
+                "stacks_by_host": {},
+            },
+        )
+
    # Get state
    deployed = load_state(config)

    # Stats
    running_count = len(deployed)
-    stopped_count = len(config.services) - running_count
+    stopped_count = len(config.stacks) - running_count

    # Pending operations
-    orphaned = get_orphaned_services(config)
-    migrations = get_services_needing_migration(config)
-    not_started = get_services_not_in_state(config)
+    orphaned = get_orphaned_stacks(config)
+    migrations = get_stacks_needing_migration(config)
+    not_started = get_stacks_not_in_state(config)

-    # Group services by host
-    services_by_host: dict[str, list[str]] = {}
-    for svc, host in deployed.items():
-        if isinstance(host, list):
-            for h in host:
-                services_by_host.setdefault(h, []).append(svc)
-        else:
-            services_by_host.setdefault(host, []).append(svc)
+    # Group stacks by host (filter out hosts with no running stacks)
+    stacks_by_host = group_running_stacks_by_host(deployed, config.hosts)

    # Config file content
    config_content = ""
@@ -57,9 +113,10 @@ async def index(request: Request) -> HTMLResponse:
        "index.html",
        {
            "request": request,
+            "config_error": None,
            # Config data
            "hosts": config.hosts,
-            "services": config.services,
+            "stacks": config.stacks,
            "config_content": config_content,
            # State data
            "state_content": state_content,
@@ -70,15 +127,15 @@ async def index(request: Request) -> HTMLResponse:
            "orphaned": orphaned,
            "migrations": migrations,
            "not_started": not_started,
-            # Services by host
-            "services_by_host": services_by_host,
+            # Stacks by host
+            "stacks_by_host": stacks_by_host,
        },
    )


-@router.get("/service/{name}", response_class=HTMLResponse)
-async def service_detail(request: Request, name: str) -> HTMLResponse:
-    """Service detail page."""
+@router.get("/stack/{name}", response_class=HTMLResponse)
+async def stack_detail(request: Request, name: str) -> HTMLResponse:
+    """Stack detail page."""
    config = get_config()
    templates = get_templates()

@@ -100,10 +157,10 @@ async def service_detail(request: Request, name: str) -> HTMLResponse:
    hosts = config.get_hosts(name)

    # Get state
-    current_host = get_service_host(config, name)
+    current_host = get_stack_host(config, name)

    return templates.TemplateResponse(
-        "service.html",
+        "stack.html",
        {
            "request": request,
            "name": name,
@@ -119,30 +176,45 @@ async def service_detail(request: Request, name: str) -> HTMLResponse:

@router.get("/partials/sidebar", response_class=HTMLResponse)
 async def sidebar_partial(request: Request) -> HTMLResponse:
-    """Sidebar service list partial."""
+    """Sidebar stack list partial."""
    config = get_config()
    templates = get_templates()

    state = load_state(config)

-    # Build service -> host mapping (empty string for multi-host services)
-    service_hosts = {
+    # Build stack -> host mapping (empty string for multi-host stacks)
+    stack_hosts = {
        svc: "" if host_val == "all" or isinstance(host_val, list) else host_val
-        for svc, host_val in config.services.items()
+        for svc, host_val in config.stacks.items()
    }

    return templates.TemplateResponse(
        "partials/sidebar.html",
        {
            "request": request,
-            "services": sorted(config.services.keys()),
-            "service_hosts": service_hosts,
+            "stacks": sorted(config.stacks.keys()),
+            "stack_hosts": stack_hosts,
            "hosts": sorted(config.hosts.keys()),
+            "local_host": get_local_host(config),
            "state": state,
        },
    )


+@router.get("/partials/config-error", response_class=HTMLResponse)
+async def config_error_partial(request: Request) -> HTMLResponse:
+    """Config error banner partial."""
+    templates = get_templates()
+    try:
+        get_config()
+        return HTMLResponse("")  # No error
+    except (ValidationError, FileNotFoundError) as e:
+        error = extract_config_error(e)
+        return templates.TemplateResponse(
+            "partials/config_error.html", {"request": request, "config_error": error}
+        )
+
+
@router.get("/partials/stats", response_class=HTMLResponse)
 async def stats_partial(request: Request) -> HTMLResponse:
    """Stats cards partial."""
@@ -151,14 +223,14 @@ async def stats_partial(request: Request) -> HTMLResponse:

    deployed = load_state(config)
    running_count = len(deployed)
-    stopped_count = len(config.services) - running_count
+    stopped_count = len(config.stacks) - running_count

    return templates.TemplateResponse(
        "partials/stats.html",
        {
            "request": request,
            "hosts": config.hosts,
-            "services": config.services,
+            "stacks": config.stacks,
            "running_count": running_count,
            "stopped_count": stopped_count,
        },
@@ -171,9 +243,9 @@ async def pending_partial(request: Request, expanded: bool = True) -> HTMLRespon
    config = get_config()
    templates = get_templates()

-    orphaned = get_orphaned_services(config)
-    migrations = get_services_needing_migration(config)
-    not_started = get_services_not_in_state(config)
+    orphaned = get_orphaned_stacks(config)
+    migrations = get_stacks_needing_migration(config)
+    not_started = get_stacks_not_in_state(config)

    return templates.TemplateResponse(
        "partials/pending.html",
@@ -187,29 +259,21 @@ async def pending_partial(request: Request, expanded: bool = True) -> HTMLRespon
    )


-@router.get("/partials/services-by-host", response_class=HTMLResponse)
-async def services_by_host_partial(request: Request, expanded: bool = True) -> HTMLResponse:
-    """Services by host partial."""
+@router.get("/partials/stacks-by-host", response_class=HTMLResponse)
+async def stacks_by_host_partial(request: Request, expanded: bool = True) -> HTMLResponse:
+    """Stacks by host partial."""
    config = get_config()
    templates = get_templates()

    deployed = load_state(config)
-
-    # Group services by host
-    services_by_host: dict[str, list[str]] = {}
-    for svc, host in deployed.items():
-        if isinstance(host, list):
-            for h in host:
-                services_by_host.setdefault(h, []).append(svc)
-        else:
-            services_by_host.setdefault(host, []).append(svc)
+    stacks_by_host = group_running_stacks_by_host(deployed, config.hosts)

    return templates.TemplateResponse(
-        "partials/services_by_host.html",
+        "partials/stacks_by_host.html",
        {
            "request": request,
            "hosts": config.hosts,
-            "services_by_host": services_by_host,
+            "stacks_by_host": stacks_by_host,
            "expanded": expanded,
        },
    )
--- a/src/compose_farm/web/static/app.css
+++ b/src/compose_farm/web/static/app.css
@@ -1,3 +1,17 @@
+/* Tooltips - ensure they appear above sidebar and other elements */
+.tooltip::before,
+.tooltip::after {
+    z-index: 1000;
+}
+
+/* Sidebar inputs - remove focus outline (DaisyUI 5 uses outline + outline-offset) */
+#sidebar .input:focus,
+#sidebar .input:focus-within,
+#sidebar .select:focus {
+    outline: none;
+    outline-offset: 0;
+}
+
 /* Editors (Monaco) - wrapper makes it resizable */
 .editor-wrapper {
    resize: vertical;
@@ -53,3 +67,65 @@
        background-position: 16em center;
    }
 }
+
+/* Command palette FAB - rainbow glow effect */
+@property --cmd-pos { syntax: "<number>"; inherits: true; initial-value: 100; }
+@property --cmd-blur { syntax: "<number>"; inherits: true; initial-value: 10; }
+@property --cmd-scale { syntax: "<number>"; inherits: true; initial-value: 1; }
+@property --cmd-opacity { syntax: "<number>"; inherits: true; initial-value: 0.3; }
+
+#cmd-fab {
+    --g: linear-gradient(to right, #fff, #fff, #0ff, #00f, #8000ff, #e066a3, #f00, #ff0, #bfff80, #fff, #fff);
+    all: unset;
+    position: fixed;
+    bottom: 1.5rem;
+    right: 1.5rem;
+    z-index: 50;
+    cursor: pointer;
+    transform: scale(var(--cmd-scale));
+    transition: --cmd-pos 3s, --cmd-blur 0.3s, --cmd-opacity 0.3s, --cmd-scale 0.2s cubic-bezier(.76,-.25,.51,1.13);
+}
+
+.cmd-fab-inner {
+    display: block;
+    padding: 0.6em 1em;
+    background: #1d232a;
+    border-radius: 8px;
+    font-size: 14px;
+    position: relative;
+}
+
+.cmd-fab-inner > span {
+    background: var(--g) no-repeat calc(var(--cmd-pos) * 1%) 0 / 900%;
+    -webkit-background-clip: text;
+    background-clip: text;
+    -webkit-text-fill-color: transparent;
+    letter-spacing: 0.15ch;
+    font-weight: 600;
+}
+
+.cmd-fab-inner::before, .cmd-fab-inner::after {
+    content: "";
+    position: absolute;
+    border-radius: 8px;
+}
+
+.cmd-fab-inner::before {
+    inset: -1.5px;
+    background: var(--g) no-repeat calc(var(--cmd-pos) * 1%) 0 / 900%;
+    border-radius: 9px;
+    z-index: -1;
+    opacity: var(--cmd-opacity);
+}
+
+.cmd-fab-inner::after {
+    inset: 0;
+    background: #000;
+    transform: translateY(10px);
+    z-index: -2;
+    filter: blur(calc(var(--cmd-blur) * 1px));
+}
+
+#cmd-fab:hover { --cmd-scale: 1.05; --cmd-pos: 0; --cmd-blur: 30; --cmd-opacity: 1; }
+#cmd-fab:hover .cmd-fab-inner::after { background: var(--g); opacity: 0.3; }
+#cmd-fab:active { --cmd-scale: 0.98; --cmd-blur: 15; }
--- a/src/compose_farm/web/static/app.js
+++ b/src/compose_farm/web/static/app.js
@@ -2,6 +2,10 @@
 * Compose Farm Web UI JavaScript
 */

+// ============================================================================
+// CONSTANTS
+// ============================================================================
+
 // ANSI escape codes for terminal output
 const ANSI = {
    RED: '\x1b[31m',
@@ -11,12 +15,6 @@ const ANSI = {
    CRLF: '\r\n'
 };

-// Store active terminals and editors
-const terminals = {};
-const editors = {};
-let monacoLoaded = false;
-let monacoLoading = false;
-
 // Terminal color theme (dark mode matching PicoCSS)
 const TERMINAL_THEME = {
    background: '#1a1a2e',
@@ -41,12 +39,97 @@ const TERMINAL_THEME = {
    brightWhite: '#fafafa'
 };

+// Language detection from file path
+const LANGUAGE_MAP = {
+    'yaml': 'yaml', 'yml': 'yaml',
+    'json': 'json',
+    'js': 'javascript', 'mjs': 'javascript',
+    'ts': 'typescript', 'tsx': 'typescript',
+    'py': 'python',
+    'sh': 'shell', 'bash': 'shell',
+    'md': 'markdown',
+    'html': 'html', 'htm': 'html',
+    'css': 'css',
+    'sql': 'sql',
+    'toml': 'toml',
+    'ini': 'ini', 'conf': 'ini',
+    'dockerfile': 'dockerfile',
+    'env': 'plaintext'
+};
+
+// ============================================================================
+// STATE
+// ============================================================================
+
+// Store active terminals and editors
+const terminals = {};
+const editors = {};
+let monacoLoaded = false;
+let monacoLoading = false;
+
+// LocalStorage key prefix for active tasks (scoped by page)
+const TASK_KEY_PREFIX = 'cf_task:';
+const getTaskKey = () => TASK_KEY_PREFIX + window.location.pathname;
+
+// Exec terminal state
+let execTerminalWrapper = null;  // {term, dispose}
+let execWs = null;
+
+// ============================================================================
+// UTILITIES
+// ============================================================================
+
+/**
+ * Get Monaco language from file path
+ * @param {string} path - File path
+ * @returns {string} Monaco language identifier
+ */
+function getLanguageFromPath(path) {
+    const ext = path.split('.').pop().toLowerCase();
+    return LANGUAGE_MAP[ext] || 'plaintext';
+}
+window.getLanguageFromPath = getLanguageFromPath;
+
+/**
+ * Create WebSocket connection with standard handlers
+ * @param {string} path - WebSocket path
+ * @returns {WebSocket}
+ */
+function createWebSocket(path) {
+    const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
+    return new WebSocket(`${protocol}//${window.location.host}${path}`);
+}
+window.createWebSocket = createWebSocket;
+
+/**
+ * Wait for xterm.js to load, then execute callback
+ * @param {function} callback - Function to call when xterm is ready
+ * @param {number} maxAttempts - Max attempts (default 20 = 2 seconds)
+ */
+function whenXtermReady(callback, maxAttempts = 20) {
+    const tryInit = (attempts) => {
+        if (typeof Terminal !== 'undefined' && typeof FitAddon !== 'undefined') {
+            callback();
+        } else if (attempts > 0) {
+            setTimeout(() => tryInit(attempts - 1), 100);
+        } else {
+            console.error('xterm.js failed to load');
+        }
+    };
+    tryInit(maxAttempts);
+}
+window.whenXtermReady = whenXtermReady;
+
+// ============================================================================
+// TERMINAL
+// ============================================================================
+
 /**
 * Create a terminal with fit addon and resize observer
 * @param {HTMLElement} container - Container element
 * @param {object} extraOptions - Additional terminal options
 * @param {function} onResize - Optional callback called with (cols, rows) after resize
- * @returns {{term: Terminal, fitAddon: FitAddon}}
+ * @returns {{term: Terminal, fitAddon: FitAddon, dispose: function}}
 */
 function createTerminal(container, extraOptions = {}, onResize = null) {
    container.innerHTML = '';
@@ -63,29 +146,26 @@ function createTerminal(container, extraOptions = {}, onResize = null) {
    const fitAddon = new FitAddon.FitAddon();
    term.loadAddon(fitAddon);
    term.open(container);
-    fitAddon.fit();

    const handleResize = () => {
        fitAddon.fit();
-        if (onResize) {
-            onResize(term.cols, term.rows);
-        }
+        onResize?.(term.cols, term.rows);
    };

-    window.addEventListener('resize', handleResize);
-    new ResizeObserver(handleResize).observe(container);
+    // Use ResizeObserver only (handles both container and window resize)
+    const resizeObserver = new ResizeObserver(handleResize);
+    resizeObserver.observe(container);

-    return { term, fitAddon };
-}
+    handleResize(); // Initial fit

-/**
- * Create WebSocket connection with standard handlers
- * @param {string} path - WebSocket path
- * @returns {WebSocket}
- */
-function createWebSocket(path) {
-    const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
-    return new WebSocket(`${protocol}//${window.location.host}${path}`);
+    return {
+        term,
+        fitAddon,
+        dispose() {
+            resizeObserver.disconnect();
+            term.dispose();
+        }
+    };
 }

 /**
@@ -98,14 +178,22 @@ function initTerminal(elementId, taskId) {
        return;
    }

-    const { term, fitAddon } = createTerminal(container);
+    const wrapper = createTerminal(container);
+    const { term } = wrapper;
    const ws = createWebSocket(`/ws/terminal/${taskId}`);

+    const taskKey = getTaskKey();
    ws.onopen = () => {
        term.write(`${ANSI.DIM}[Connected]${ANSI.RESET}${ANSI.CRLF}`);
        setTerminalLoading(true);
+        localStorage.setItem(taskKey, taskId);
+    };
+    ws.onmessage = (event) => {
+        term.write(event.data);
+        if (event.data.includes('[Done]') || event.data.includes('[Failed]')) {
+            localStorage.removeItem(taskKey);
+        }
    };
-    ws.onmessage = (event) => term.write(event.data);
    ws.onclose = () => setTerminalLoading(false);
    ws.onerror = (error) => {
        term.write(`${ANSI.RED}[WebSocket Error]${ANSI.RESET}${ANSI.CRLF}`);
@@ -113,7 +201,7 @@ function initTerminal(elementId, taskId) {
        setTerminalLoading(false);
    };

-    terminals[taskId] = { term, ws, fitAddon };
+    terminals[taskId] = { ...wrapper, ws };
    return { term, ws };
 }

@@ -122,10 +210,7 @@ window.initTerminal = initTerminal;
 /**
 * Initialize an interactive exec terminal
 */
-let execTerminal = null;
-let execWs = null;
-
-function initExecTerminal(service, container, host) {
+function initExecTerminal(stack, container, host) {
    const containerEl = document.getElementById('exec-terminal-container');
    const terminalEl = document.getElementById('exec-terminal');

@@ -136,12 +221,12 @@ function initExecTerminal(service, container, host) {

    containerEl.classList.remove('hidden');

-    // Clean up existing
+    // Clean up existing (use wrapper's dispose to clean up ResizeObserver)
    if (execWs) { execWs.close(); execWs = null; }
-    if (execTerminal) { execTerminal.dispose(); execTerminal = null; }
+    if (execTerminalWrapper) { execTerminalWrapper.dispose(); execTerminalWrapper = null; }

    // Create WebSocket first so resize callback can use it
-    execWs = createWebSocket(`/ws/exec/${service}/${container}/${host}`);
+    execWs = createWebSocket(`/ws/exec/${stack}/${container}/${host}`);

    // Resize callback sends size to WebSocket
    const sendSize = (cols, rows) => {
@@ -150,8 +235,8 @@ function initExecTerminal(service, container, host) {
        }
    };

-    const { term } = createTerminal(terminalEl, { cursorBlink: true }, sendSize);
-    execTerminal = term;
+    execTerminalWrapper = createTerminal(terminalEl, { cursorBlink: true }, sendSize);
+    const term = execTerminalWrapper.term;

    execWs.onopen = () => { sendSize(term.cols, term.rows); term.focus(); };
    execWs.onmessage = (event) => term.write(event.data);
@@ -171,16 +256,32 @@ function initExecTerminal(service, container, host) {
 window.initExecTerminal = initExecTerminal;

 /**
- * Refresh dashboard partials while preserving collapse states
+ * Expand terminal collapse and scroll to it
 */
-function refreshDashboard() {
-    const isExpanded = (id) => document.getElementById(id)?.checked ?? true;
-    htmx.ajax('GET', '/partials/sidebar', {target: '#sidebar nav', swap: 'innerHTML'});
-    htmx.ajax('GET', '/partials/stats', {target: '#stats-cards', swap: 'outerHTML'});
-    htmx.ajax('GET', `/partials/pending?expanded=${isExpanded('pending-collapse')}`, {target: '#pending-operations', swap: 'outerHTML'});
-    htmx.ajax('GET', `/partials/services-by-host?expanded=${isExpanded('services-by-host-collapse')}`, {target: '#services-by-host', swap: 'outerHTML'});
+function expandTerminal() {
+    const toggle = document.getElementById('terminal-toggle');
+    if (toggle) toggle.checked = true;
+
+    const collapse = document.getElementById('terminal-collapse');
+    if (collapse) {
+        collapse.scrollIntoView({ behavior: 'smooth', block: 'start' });
+    }
 }

+/**
+ * Show/hide terminal loading spinner
+ */
+function setTerminalLoading(loading) {
+    const spinner = document.getElementById('terminal-spinner');
+    if (spinner) {
+        spinner.classList.toggle('hidden', !loading);
+    }
+}
+
+// ============================================================================
+// EDITOR (Monaco)
+// ============================================================================
+
 /**
 * Load Monaco editor dynamically (only once)
 */
@@ -222,13 +323,15 @@ function loadMonaco(callback) {
 * @param {HTMLElement} container - Container element
 * @param {string} content - Initial content
 * @param {string} language - Editor language (yaml, plaintext, etc.)
- * @param {boolean} readonly - Whether editor is read-only
+ * @param {object} opts - Options: { readonly, onSave }
 * @returns {object} Monaco editor instance
 */
-function createEditor(container, content, language, readonly = false) {
+function createEditor(container, content, language, opts = {}) {
+    const { readonly = false, onSave = null } = opts;
+
    const options = {
        value: content,
-        language: language,
+        language,
        theme: 'vs-dark',
        minimap: { enabled: false },
        automaticLayout: true,
@@ -247,23 +350,26 @@ function createEditor(container, content, language, readonly = false) {

    // Add Command+S / Ctrl+S handler for editable editors
    if (!readonly) {
-        editor.addCommand(monaco.KeyMod.CtrlCmd | monaco.KeyCode.KeyS, function() {
-            saveAllEditors();
+        editor.addCommand(monaco.KeyMod.CtrlCmd | monaco.KeyCode.KeyS, () => {
+            if (onSave) {
+                onSave(editor);
+            } else {
+                saveAllEditors();
+            }
        });
    }

    return editor;
 }
+window.createEditor = createEditor;

 /**
 * Initialize all Monaco editors on the page
 */
 function initMonacoEditors() {
    // Dispose existing editors
-    Object.values(editors).forEach(ed => {
-        if (ed && ed.dispose) ed.dispose();
-    });
-    Object.keys(editors).forEach(key => delete editors[key]);
+    Object.values(editors).forEach(ed => ed?.dispose?.());
+    for (const key in editors) delete editors[key];

    const editorConfigs = [
        { id: 'compose-editor', language: 'yaml', readonly: false },
@@ -283,7 +389,7 @@ function initMonacoEditors() {
            if (!el) return;

            const content = el.dataset.content || '';
-            editors[id] = createEditor(el, content, language, readonly);
+            editors[id] = createEditor(el, content, language, { readonly });
            if (!readonly) {
                editors[id].saveUrl = el.dataset.saveUrl;
            }
@@ -309,7 +415,7 @@ async function saveAllEditors() {
                body: content
            });
            const data = await response.json();
-            if (!data.success) {
+            if (!response.ok || !data.success) {
                results.push({ id, success: false, error: data.detail || 'Unknown error' });
            } else {
                results.push({ id, success: true });
@@ -320,13 +426,9 @@ async function saveAllEditors() {
    }

    // Show result
-    const errors = results.filter(r => !r.success);
-    if (errors.length > 0) {
-        alert('Errors saving:\n' + errors.map(e => `${e.id}: ${e.error}`).join('\n'));
-    } else if (saveBtn && results.length > 0) {
+    if (saveBtn && results.length > 0) {
        saveBtn.textContent = 'Saved!';
        setTimeout(() => saveBtn.textContent = saveBtn.id === 'save-config-btn' ? 'Save Config' : 'Save All', 2000);
-
        refreshDashboard();
    }
 }
@@ -341,6 +443,294 @@ function initSaveButton() {
    saveBtn.onclick = saveAllEditors;
 }

+// ============================================================================
+// UI HELPERS
+// ============================================================================
+
+/**
+ * Refresh dashboard partials by dispatching a custom event.
+ * Elements with hx-trigger="cf:refresh from:body" will automatically refresh.
+ */
+function refreshDashboard() {
+    document.body.dispatchEvent(new CustomEvent('cf:refresh'));
+}
+
+/**
+ * Filter sidebar stacks by name and host
+ */
+function sidebarFilter() {
+    const q = (document.getElementById('sidebar-filter')?.value || '').toLowerCase();
+    const h = document.getElementById('sidebar-host-select')?.value || '';
+    let n = 0;
+    document.querySelectorAll('#sidebar-stacks li').forEach(li => {
+        const show = (!q || li.dataset.stack.includes(q)) && (!h || !li.dataset.h || li.dataset.h === h);
+        li.hidden = !show;
+        if (show) n++;
+    });
+    document.getElementById('sidebar-count').textContent = '(' + n + ')';
+}
+window.sidebarFilter = sidebarFilter;
+
+// Play intro animation on command palette button
+function playFabIntro() {
+    const fab = document.getElementById('cmd-fab');
+    if (!fab) return;
+    setTimeout(() => {
+        fab.style.setProperty('--cmd-pos', '0');
+        fab.style.setProperty('--cmd-opacity', '1');
+        fab.style.setProperty('--cmd-blur', '30');
+        setTimeout(() => {
+            fab.style.removeProperty('--cmd-pos');
+            fab.style.removeProperty('--cmd-opacity');
+            fab.style.removeProperty('--cmd-blur');
+        }, 3000);
+    }, 500);
+}
+
+// ============================================================================
+// COMMAND PALETTE
+// ============================================================================
+
+(function() {
+    const dialog = document.getElementById('cmd-palette');
+    const input = document.getElementById('cmd-input');
+    const list = document.getElementById('cmd-list');
+    const fab = document.getElementById('cmd-fab');
+    const themeBtn = document.getElementById('theme-btn');
+    if (!dialog || !input || !list) return;
+
+    // Load icons from template (rendered server-side from icons.html)
+    const iconTemplate = document.getElementById('cmd-icons');
+    const icons = {};
+    if (iconTemplate) {
+        iconTemplate.content.querySelectorAll('[data-icon]').forEach(el => {
+            icons[el.dataset.icon] = el.innerHTML;
+        });
+    }
+
+    // All available DaisyUI themes
+    const THEMES = ['light', 'dark', 'cupcake', 'bumblebee', 'emerald', 'corporate', 'synthwave', 'retro', 'cyberpunk', 'valentine', 'halloween', 'garden', 'forest', 'aqua', 'lofi', 'pastel', 'fantasy', 'wireframe', 'black', 'luxury', 'dracula', 'cmyk', 'autumn', 'business', 'acid', 'lemonade', 'night', 'coffee', 'winter', 'dim', 'nord', 'sunset', 'caramellatte', 'abyss', 'silk'];
+    const THEME_KEY = 'cf_theme';
+
+    const colors = { stack: '#22c55e', action: '#eab308', nav: '#3b82f6', app: '#a855f7', theme: '#ec4899' };
+    let commands = [];
+    let filtered = [];
+    let selected = 0;
+    let originalTheme = null; // Store theme when palette opens for preview/restore
+
+    const post = (url) => () => htmx.ajax('POST', url, {swap: 'none'});
+    const nav = (url) => () => {
+        htmx.ajax('GET', url, {target: '#main-content', select: '#main-content', swap: 'outerHTML'}).then(() => {
+            history.pushState({}, '', url);
+        });
+    };
+    // Navigate to dashboard (if needed) and trigger action
+    const dashboardAction = (endpoint) => async () => {
+        if (window.location.pathname !== '/') {
+            await htmx.ajax('GET', '/', {target: '#main-content', select: '#main-content', swap: 'outerHTML'});
+            history.pushState({}, '', '/');
+        }
+        htmx.ajax('POST', `/api/${endpoint}`, {swap: 'none'});
+    };
+    // Apply theme and save to localStorage
+    const setTheme = (theme) => () => {
+        document.documentElement.setAttribute('data-theme', theme);
+        localStorage.setItem(THEME_KEY, theme);
+    };
+    // Preview theme without saving (for hover)
+    const previewTheme = (theme) => {
+        document.documentElement.setAttribute('data-theme', theme);
+    };
+    // Restore original theme (when closing without selection)
+    const restoreTheme = () => {
+        if (originalTheme) {
+            document.documentElement.setAttribute('data-theme', originalTheme);
+        }
+    };
+    // Generate color swatch HTML for a theme
+    const themeSwatch = (theme) => `<span class="flex gap-0.5" data-theme="${theme}"><span class="w-2 h-4 rounded-l bg-primary"></span><span class="w-2 h-4 bg-secondary"></span><span class="w-2 h-4 bg-accent"></span><span class="w-2 h-4 rounded-r bg-neutral"></span></span>`;
+
+    const cmd = (type, name, desc, action, icon = null, themeId = null) => ({ type, name, desc, action, icon, themeId });
+
+    // Reopen palette with theme filter
+    const openThemePicker = () => {
+        // Small delay to let dialog close before reopening
+        setTimeout(() => open('theme:'), 50);
+    };
+
+    function buildCommands() {
+        const openExternal = (url) => () => window.open(url, '_blank');
+
+        const actions = [
+            cmd('action', 'Apply', 'Make reality match config', dashboardAction('apply'), icons.check),
+            cmd('action', 'Refresh', 'Update state from reality', dashboardAction('refresh'), icons.refresh_cw),
+            cmd('app', 'Theme', 'Change color theme', openThemePicker, icons.palette),
+            cmd('app', 'Dashboard', 'Go to dashboard', nav('/'), icons.home),
+            cmd('app', 'Console', 'Go to console', nav('/console'), icons.terminal),
+            cmd('app', 'Docs', 'Open documentation', openExternal('https://compose-farm.nijho.lt/'), icons.book_open),
+        ];
+
+        // Add stack-specific actions if on a stack page
+        const match = window.location.pathname.match(/^\/stack\/(.+)$/);
+        if (match) {
+            const stack = decodeURIComponent(match[1]);
+            const stackCmd = (name, desc, endpoint, icon) => cmd('stack', name, `${desc} ${stack}`, post(`/api/stack/${stack}/${endpoint}`), icon);
+            actions.unshift(
+                stackCmd('Up', 'Start', 'up', icons.play),
+                stackCmd('Down', 'Stop', 'down', icons.square),
+                stackCmd('Restart', 'Restart', 'restart', icons.rotate_cw),
+                stackCmd('Pull', 'Pull', 'pull', icons.cloud_download),
+                stackCmd('Update', 'Pull + restart', 'update', icons.refresh_cw),
+                stackCmd('Logs', 'View logs for', 'logs', icons.file_text),
+            );
+        }
+
+        // Add nav commands for all stacks from sidebar
+        const stacks = [...document.querySelectorAll('#sidebar-stacks li[data-stack] a[href]')].map(a => {
+            const name = a.getAttribute('href').replace('/stack/', '');
+            return cmd('nav', name, 'Go to stack', nav(`/stack/${name}`), icons.box);
+        });
+
+        // Add theme commands with color swatches
+        const currentTheme = document.documentElement.getAttribute('data-theme') || 'dark';
+        const themeCommands = THEMES.map(theme =>
+            cmd('theme', `theme: ${theme}`, theme === currentTheme ? '(current)' : 'Switch theme', setTheme(theme), themeSwatch(theme), theme)
+        );
+
+        commands = [...actions, ...stacks, ...themeCommands];
+    }
+
+    function filter() {
+        // Normalize: collapse spaces and ensure space after colon for matching
+        // This allows "theme:dark", "theme: dark", "theme:  dark" to all match "theme: dark"
+        const q = input.value.toLowerCase().replace(/\s+/g, ' ').replace(/:(\S)/g, ': $1');
+        filtered = commands.filter(c => c.name.toLowerCase().includes(q));
+        selected = Math.max(0, Math.min(selected, filtered.length - 1));
+    }
+
+    function render() {
+        list.innerHTML = filtered.map((c, i) => `
+            <a class="flex justify-between items-center px-3 py-2 rounded-r cursor-pointer hover:bg-base-200 border-l-4 ${i === selected ? 'bg-base-300' : ''}" style="border-left-color: ${colors[c.type] || '#666'}" data-idx="${i}"${c.themeId ? ` data-theme-id="${c.themeId}"` : ''}>
+                <span class="flex items-center gap-2">${c.icon || ''}<span>${c.name}</span></span>
+                <span class="opacity-40 text-xs">${c.desc}</span>
+            </a>
+        `).join('') || '<div class="opacity-50 p-2">No matches</div>';
+        // Scroll selected item into view
+        const sel = list.querySelector(`[data-idx="${selected}"]`);
+        if (sel) sel.scrollIntoView({ block: 'nearest' });
+        // Preview theme if selected item is a theme command
+        const selectedCmd = filtered[selected];
+        if (selectedCmd?.themeId) {
+            previewTheme(selectedCmd.themeId);
+        } else if (originalTheme) {
+            // Restore original when navigating away from theme commands
+            previewTheme(originalTheme);
+        }
+    }
+
+    function open(initialFilter = '') {
+        // Store original theme for preview/restore
+        originalTheme = document.documentElement.getAttribute('data-theme') || 'dark';
+        buildCommands();
+        selected = 0;
+        input.value = initialFilter;
+        filter();
+        // If opening theme picker, select current theme
+        if (initialFilter.startsWith('theme:')) {
+            const currentIdx = filtered.findIndex(c => c.themeId === originalTheme);
+            if (currentIdx >= 0) selected = currentIdx;
+        }
+        render();
+        dialog.showModal();
+        input.focus();
+    }
+
+    function close() {
+        dialog.close();
+        restoreTheme();
+    }
+
+    function exec() {
+        const cmd = filtered[selected];
+        if (cmd) {
+            if (cmd.themeId) {
+                // Theme command commits the previewed choice.
+                originalTheme = null;
+            }
+            dialog.close();
+            cmd.action();
+        }
+    }
+
+    // Keyboard: Cmd+K to open
+    document.addEventListener('keydown', e => {
+        if ((e.metaKey || e.ctrlKey) && e.key === 'k') {
+            e.preventDefault();
+            open();
+        }
+    });
+
+    // Input filtering
+    input.addEventListener('input', () => { filter(); render(); });
+
+    // Keyboard nav inside palette
+    dialog.addEventListener('keydown', e => {
+        if (!dialog.open) return;
+        if (e.key === 'ArrowDown') { e.preventDefault(); selected = Math.min(selected + 1, filtered.length - 1); render(); }
+        else if (e.key === 'ArrowUp') { e.preventDefault(); selected = Math.max(selected - 1, 0); render(); }
+        else if (e.key === 'Enter') { e.preventDefault(); exec(); }
+    });
+
+    // Click to execute
+    list.addEventListener('click', e => {
+        const a = e.target.closest('a[data-idx]');
+        if (a) {
+            selected = parseInt(a.dataset.idx, 10);
+            exec();
+        }
+    });
+
+    // Hover previews theme without changing selection
+    list.addEventListener('mouseover', e => {
+        const a = e.target.closest('a[data-theme-id]');
+        if (a) previewTheme(a.dataset.themeId);
+    });
+
+    // Mouse leaving list restores to selected item's theme (or original)
+    list.addEventListener('mouseleave', () => {
+        const cmd = filtered[selected];
+        previewTheme(cmd?.themeId || originalTheme);
+    });
+
+    // Restore theme when dialog closes without selection (Escape, backdrop click)
+    dialog.addEventListener('close', () => {
+        if (originalTheme) {
+            restoreTheme();
+            originalTheme = null;
+        }
+    });
+
+    // FAB click to open
+    if (fab) fab.addEventListener('click', () => open());
+
+    // Theme button opens palette with "theme:" filter
+    if (themeBtn) themeBtn.addEventListener('click', () => open('theme:'));
+})();
+
+// ============================================================================
+// THEME PERSISTENCE
+// ============================================================================
+
+// Restore saved theme on load (also handled in inline script to prevent flash)
+(function() {
+    const saved = localStorage.getItem('cf_theme');
+    if (saved) document.documentElement.setAttribute('data-theme', saved);
+})();
+
+// ============================================================================
+// INITIALIZATION
+// ============================================================================
+
 /**
 * Global keyboard shortcut handler
 */
@@ -351,7 +741,7 @@ function initKeyboardShortcuts() {
            // Only handle if we have editors and no Monaco editor is focused
            if (Object.keys(editors).length > 0) {
                // Check if any Monaco editor is focused
-                const focusedEditor = Object.values(editors).find(ed => ed && ed.hasTextFocus && ed.hasTextFocus());
+                const focusedEditor = Object.values(editors).find(ed => ed?.hasTextFocus?.());
                if (!focusedEditor) {
                    e.preventDefault();
                    saveAllEditors();
@@ -369,42 +759,47 @@ function initPage() {
    initSaveButton();
 }

+/**
+ * Attempt to reconnect to an active task from localStorage
+ * @param {string} [path] - Optional path to use for task key lookup.
+ *                          If not provided, uses current window.location.pathname.
+ *                          This is important for HTMX navigation where pushState
+ *                          hasn't happened yet when htmx:afterSwap fires.
+ */
+function tryReconnectToTask(path) {
+    const taskKey = TASK_KEY_PREFIX + (path || window.location.pathname);
+    const taskId = localStorage.getItem(taskKey);
+    if (!taskId) return;
+
+    whenXtermReady(() => {
+        expandTerminal();
+        initTerminal('terminal-output', taskId);
+    });
+}
+
 // Initialize on page load
 document.addEventListener('DOMContentLoaded', function() {
    initPage();
    initKeyboardShortcuts();
+    playFabIntro();
+
+    // Try to reconnect to any active task
+    tryReconnectToTask();
 });

 // Re-initialize after HTMX swaps main content
 document.body.addEventListener('htmx:afterSwap', function(evt) {
    if (evt.detail.target.id === 'main-content') {
        initPage();
+        // Try to reconnect to task for the TARGET page, not current URL.
+        // When using command palette navigation (htmx.ajax + manual pushState),
+        // window.location.pathname still reflects the OLD page at this point.
+        // Use pathInfo.requestPath to get the correct target path.
+        const targetPath = evt.detail.pathInfo?.requestPath?.split('?')[0] || window.location.pathname;
+        tryReconnectToTask(targetPath);
    }
 });

-/**
- * Expand terminal collapse and scroll to it
- */
-function expandTerminal() {
-    const toggle = document.getElementById('terminal-toggle');
-    if (toggle) toggle.checked = true;
-
-    const collapse = document.getElementById('terminal-collapse');
-    if (collapse) {
-        collapse.scrollIntoView({ behavior: 'smooth', block: 'start' });
-    }
-}
-
-/**
- * Show/hide terminal loading spinner
- */
-function setTerminalLoading(loading) {
-    const spinner = document.getElementById('terminal-spinner');
-    if (spinner) {
-        spinner.classList.toggle('hidden', !loading);
-    }
-}
-
 // Handle action responses (terminal streaming)
 document.body.addEventListener('htmx:afterRequest', function(evt) {
    if (!evt.detail.successful || !evt.detail.xhr) return;
@@ -416,20 +811,8 @@ document.body.addEventListener('htmx:afterRequest', function(evt) {
    try {
        const response = JSON.parse(text);
        if (response.task_id) {
-            // Expand terminal and scroll to it
            expandTerminal();
-
-            // Wait for xterm to be loaded if needed
-            const tryInit = (attempts) => {
-                if (typeof Terminal !== 'undefined' && typeof FitAddon !== 'undefined') {
-                    initTerminal('terminal-output', response.task_id);
-                } else if (attempts > 0) {
-                    setTimeout(() => tryInit(attempts - 1), 100);
-                } else {
-                    console.error('xterm.js failed to load');
-                }
-            };
-            tryInit(20); // Try for up to 2 seconds
+            whenXtermReady(() => initTerminal('terminal-output', response.task_id));
        }
    } catch (e) {
        // Not valid JSON, ignore
--- a/src/compose_farm/web/streaming.py
+++ b/src/compose_farm/web/streaming.py
@@ -4,12 +4,18 @@ from __future__ import annotations

 import asyncio
 import os
-from pathlib import Path
+import time
 from typing import TYPE_CHECKING, Any

+from compose_farm.executor import build_ssh_command
+from compose_farm.ssh_keys import get_ssh_auth_sock
+
 if TYPE_CHECKING:
    from compose_farm.config import Config

+# Environment variable to identify the web stack (for self-update detection)
+CF_WEB_STACK = os.environ.get("CF_WEB_STACK", "")
+
 # ANSI escape codes for terminal output
 RED = "\x1b[31m"
 GREEN = "\x1b[32m"
@@ -17,28 +23,28 @@ DIM = "\x1b[2m"
 RESET = "\x1b[0m"
 CRLF = "\r\n"

-
-def _get_ssh_auth_sock() -> str | None:
-    """Get SSH_AUTH_SOCK, auto-detecting forwarded agent if needed."""
-    sock = os.environ.get("SSH_AUTH_SOCK")
-    if sock and Path(sock).is_socket():
-        return sock
-
-    # Try to find a forwarded SSH agent socket
-    agent_dir = Path.home() / ".ssh" / "agent"
-    if agent_dir.is_dir():
-        sockets = sorted(
-            agent_dir.glob("s.*.sshd.*"), key=lambda p: p.stat().st_mtime, reverse=True
-        )
-        for s in sockets:
-            if s.is_socket():
-                return str(s)
-    return None
-
-
 # In-memory task registry
 tasks: dict[str, dict[str, Any]] = {}

+# How long to keep completed tasks (10 minutes)
+TASK_TTL_SECONDS = 600
+
+
+def cleanup_stale_tasks() -> int:
+    """Remove tasks that completed more than TASK_TTL_SECONDS ago.
+
+    Returns the number of tasks removed.
+    """
+    cutoff = time.time() - TASK_TTL_SECONDS
+    stale = [
+        tid
+        for tid, task in tasks.items()
+        if task.get("completed_at") and task["completed_at"] < cutoff
+    ]
+    for tid in stale:
+        tasks.pop(tid, None)
+    return len(stale)
+

 async def stream_to_task(task_id: str, message: str) -> None:
    """Send a message to a task's output buffer."""
@@ -46,60 +52,111 @@ async def stream_to_task(task_id: str, message: str) -> None:
        tasks[task_id]["output"].append(message)


+async def _stream_subprocess(task_id: str, args: list[str], env: dict[str, str]) -> int:
+    """Run subprocess and stream output to task buffer. Returns exit code."""
+    process = await asyncio.create_subprocess_exec(
+        *args,
+        stdout=asyncio.subprocess.PIPE,
+        stderr=asyncio.subprocess.STDOUT,
+        env=env,
+    )
+    if process.stdout:
+        async for line in process.stdout:
+            text = line.decode("utf-8", errors="replace")
+            # Convert \n to \r\n for xterm.js
+            if text.endswith("\n") and not text.endswith("\r\n"):
+                text = text[:-1] + "\r\n"
+            await stream_to_task(task_id, text)
+    return await process.wait()
+
+
 async def run_cli_streaming(
    config: Config,
    args: list[str],
    task_id: str,
 ) -> None:
-    """Run a cf CLI command as subprocess and stream output to task buffer.
-
-    This reuses all CLI logic including Rich formatting, progress bars, etc.
-    The subprocess gets a pseudo-TTY via FORCE_COLOR so Rich outputs ANSI codes.
-    """
+    """Run a cf CLI command as subprocess and stream output to task buffer."""
    try:
-        # Build command - config option goes after the subcommand
        cmd = ["cf", *args, f"--config={config.config_path}"]
+        await stream_to_task(task_id, f"{DIM}$ {' '.join(['cf', *args])}{RESET}{CRLF}")

-        # Show command being executed
-        cmd_display = " ".join(["cf", *args])
-        await stream_to_task(task_id, f"{DIM}$ {cmd_display}{RESET}{CRLF}")
-
-        # Force color output even though there's no real TTY
-        # Set COLUMNS for Rich/Typer to format output correctly
-        env = {"FORCE_COLOR": "1", "TERM": "xterm-256color", "COLUMNS": "120"}
-
-        # Ensure SSH agent is available (auto-detect if needed)
-        ssh_sock = _get_ssh_auth_sock()
-        if ssh_sock:
+        # Build environment with color support and SSH agent
+        env = {**os.environ, "FORCE_COLOR": "1", "TERM": "xterm-256color", "COLUMNS": "120"}
+        if ssh_sock := get_ssh_auth_sock():
            env["SSH_AUTH_SOCK"] = ssh_sock

-        process = await asyncio.create_subprocess_exec(
-            *cmd,
-            stdout=asyncio.subprocess.PIPE,
-            stderr=asyncio.subprocess.STDOUT,
-            env={**os.environ, **env},
-        )
-
-        # Stream output line by line
-        if process.stdout:
-            async for line in process.stdout:
-                text = line.decode("utf-8", errors="replace")
-                # Convert \n to \r\n for xterm.js
-                if text.endswith("\n") and not text.endswith("\r\n"):
-                    text = text[:-1] + "\r\n"
-                await stream_to_task(task_id, text)
-
-        exit_code = await process.wait()
+        exit_code = await _stream_subprocess(task_id, cmd, env)
        tasks[task_id]["status"] = "completed" if exit_code == 0 else "failed"
+        tasks[task_id]["completed_at"] = time.time()

    except Exception as e:
        await stream_to_task(task_id, f"{RED}Error: {e}{RESET}{CRLF}")
        tasks[task_id]["status"] = "failed"
+        tasks[task_id]["completed_at"] = time.time()
+
+
+def _is_self_update(stack: str, command: str) -> bool:
+    """Check if this is a self-update (updating the web stack itself).
+
+    Self-updates need special handling because running 'down' on the container
+    we're running in would kill the process before 'up' can execute.
+    """
+    if not CF_WEB_STACK or stack != CF_WEB_STACK:
+        return False
+    # Commands that involve 'down' need SSH: update, restart, down
+    return command in ("update", "restart", "down")
+
+
+async def _run_cli_via_ssh(
+    config: Config,
+    args: list[str],
+    task_id: str,
+) -> None:
+    """Run a cf CLI command via SSH for self-updates (survives container restart)."""
+    try:
+        host = config.get_host(CF_WEB_STACK)
+        cf_cmd = f"cf {' '.join(args)} --config={config.config_path}"
+        # Include task_id to prevent collision with concurrent updates
+        log_file = f"/tmp/cf-self-update-{task_id}.log"  # noqa: S108
+
+        # setsid detaches command; tail streams output until SSH dies
+        remote_cmd = (
+            f"rm -f {log_file} && "
+            f"PATH=$HOME/.local/bin:/usr/local/bin:$PATH "
+            f"setsid sh -c '{cf_cmd} > {log_file} 2>&1' & "
+            f"sleep 0.3 && tail -f {log_file} 2>/dev/null"
+        )
+
+        await stream_to_task(task_id, f"{DIM}$ {cf_cmd}{RESET}{CRLF}")
+        await stream_to_task(task_id, f"{GREEN}Running via SSH (detached with setsid){RESET}{CRLF}")
+
+        ssh_args = build_ssh_command(host, remote_cmd, tty=False)
+        env = {**os.environ}
+        if ssh_sock := get_ssh_auth_sock():
+            env["SSH_AUTH_SOCK"] = ssh_sock
+
+        exit_code = await _stream_subprocess(task_id, ssh_args, env)
+
+        # Exit code 255 = SSH closed (container died during down) - expected for self-updates
+        if exit_code == 255:  # noqa: PLR2004
+            await stream_to_task(
+                task_id,
+                f"{CRLF}{GREEN}Container restarting... refresh the page in a few seconds.{RESET}{CRLF}",
+            )
+            tasks[task_id]["status"] = "completed"
+        else:
+            tasks[task_id]["status"] = "completed" if exit_code == 0 else "failed"
+        tasks[task_id]["completed_at"] = time.time()
+
+    except Exception as e:
+        await stream_to_task(task_id, f"{RED}Error: {e}{RESET}{CRLF}")
+        tasks[task_id]["status"] = "failed"
+        tasks[task_id]["completed_at"] = time.time()


 async def run_compose_streaming(
    config: Config,
-    service: str,
+    stack: str,
    command: str,
    task_id: str,
 ) -> None:
@@ -110,5 +167,10 @@ async def run_compose_streaming(
    extra_args = args[1:]  # -d, etc.

    # Build CLI args
-    cli_args = [cli_cmd, service, *extra_args]
-    await run_cli_streaming(config, cli_args, task_id)
+    cli_args = [cli_cmd, stack, *extra_args]
+
+    # Use SSH for self-updates to survive container restart
+    if _is_self_update(stack, cli_cmd):
+        await _run_cli_via_ssh(config, cli_args, task_id)
+    else:
+        await run_cli_streaming(config, cli_args, task_id)
--- a/Show More
+++ b/Show More