fix: --host filter now limits multi-host stack operations to single host (#175 )

* fix: --host filter now limits multi-host stack operations to single host Previously, when using `-H host` with a multi-host stack like `glances: all`, the command would find the stack (correct) but then operate on ALL hosts for that stack (incorrect). For example, `cf down -H nas` with `glances` would stop glances on all 5 hosts instead of just nas. Now, when `--host` is specified: - `cf down -H nas` only stops stacks on nas, including only the nas instance of multi-host stacks - `cf up -H nas` only starts stacks on nas (skips migration logic since host is explicitly specified) Added tests for the new filter_host behavior in both executor and CLI. * fix: Apply filter_host to logs and ps commands as well Same bug as up/down: when using `-H host` with multi-host stacks, logs and ps would show results from all hosts instead of just the filtered host. * fix: Don't remove multi-host stacks from state when host-filtered When using `-H host` with a multi-host stack, we only stop one instance. The stack is still running on other hosts, so we shouldn't remove it from state entirely. This prevents issues where: - `cf apply` would try to re-start the stack - `cf ps` would show incorrect running status - Orphan detection would be confused Added tests to verify state is preserved for host-filtered multi-host operations and removed for full stack operations. * refactor: Introduce StackSelection dataclass for cleaner context passing Instead of passing filter_host separately through multiple layers, bundle the selection context into a StackSelection dataclass: - stacks: list of selected stack names - config: the loaded Config - host_filter: optional host filter from -H flag This provides: 1. Cleaner APIs - context travels together instead of being scattered 2. is_instance_level() method - encapsulates the check for whether this is an instance-level operation (host-filtered multi-host stack) 3. Future extensibility - can add more context (dry_run, verbose, etc.) Updated all callers of get_stacks() to use the new return type. * Revert "refactor: Introduce StackSelection dataclass for cleaner context passing" This reverts commit e6e9eed93e. * feat: Proper per-host state tracking for multi-host stacks - Add `remove_stack_host()` to remove a single host from a multi-host stack's state - Add `add_stack_host()` to add a single host to a stack's state - Update `down` command to use `remove_stack_host` for host-filtered multi-host stacks - Update `up` command to use `add_stack_host` for host-filtered operations This ensures the state file accurately reflects which hosts each stack is running on, rather than just tracking if it's running at all. * fix: Use set comparisons for host list tests Host lists may be reordered during YAML save/load, so test for set equality rather than list equality. * refactor: Merge remove_stack_host into remove_stack as optional parameter Instead of a separate function, `remove_stack` now takes an optional `host` parameter. When specified, it removes only that host from multi-host stacks. This reduces API surface and follows the existing pattern. * fix: Restore deterministic host list sorting and add filter_host test - Restore sorting of list values in _sorted_dict for consistent YAML output - Add test for logs --host passing filter_host to run_on_stacks
Sort host lists in state file for consistent output (#174 )
2026-02-03 14:13:26 +00:00 · 2026-02-01 13:43:17 -08:00 · 2026-01-30 15:29:13 -08:00 · 2026-01-20 01:01:34 +00:00 · 2026-01-18 20:23:21 +00:00 · 2026-01-18 20:21:37 +00:00
168 changed files with 19775 additions and 2769 deletions
--- a/.envrc.example
+++ b/.envrc.example
@@ -0,0 +1,6 @@
+# Run containers as current user (preserves file ownership on NFS mounts)
+# Copy this file to .envrc and run: direnv allow
+export CF_UID=$(id -u)
+export CF_GID=$(id -g)
+export CF_HOME=$HOME
+export CF_USER=$USER
--- a/.gitattributes
+++ b/.gitattributes
@@ -0,0 +1,2 @@
+*.gif filter=lfs diff=lfs merge=lfs -text
+*.webm filter=lfs diff=lfs merge=lfs -text
--- a/.github/check_readme_commands.py
+++ b/.github/check_readme_commands.py
@@ -24,7 +24,7 @@ def get_all_commands(typer_app: typer.Typer, prefix: str = "cf") -> set[str]:
            continue
        name = command.name
        if not name and command.callback:
-            name = command.callback.__name__
+            name = getattr(command.callback, "__name__", None)
        if name:
            commands.add(f"{prefix} {name}")

--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -12,7 +12,7 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        os: [ubuntu-latest, macos-latest, windows-latest]
+        os: [ubuntu-latest, macos-latest]
        python-version: ["3.11", "3.12", "3.13"]

    steps:
@@ -27,8 +27,8 @@ jobs:
      - name: Install dependencies
        run: uv sync --all-extras --dev

-      - name: Run tests
-        run: uv run pytest
+      - name: Run tests (excluding browser tests)
+        run: uv run pytest -m "not browser"

      - name: Upload coverage reports to Codecov
        if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.13'
@@ -36,6 +36,26 @@ jobs:
        env:
          CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}

+  browser-tests:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v6
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v7
+
+      - name: Set up Python
+        run: uv python install 3.13
+
+      - name: Install dependencies
+        run: uv sync --all-extras --dev
+
+      - name: Install Playwright browsers
+        run: uv run playwright install chromium --with-deps
+
+      - name: Run browser tests
+        run: uv run pytest -m browser -n auto -v
+
  lint:
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@@ -68,16 +68,35 @@ jobs:
          echo "✗ Timeout waiting for PyPI"
          exit 1

+      - name: Check if latest release
+        id: latest
+        run: |
+          VERSION="${{ steps.version.outputs.version }}"
+          # Get latest release tag from GitHub (strip 'v' prefix)
+          LATEST=$(gh release view --json tagName -q '.tagName' | sed 's/^v//')
+          echo "Building version: $VERSION"
+          echo "Latest release: $LATEST"
+          if [ "$VERSION" = "$LATEST" ]; then
+            echo "is_latest=true" >> $GITHUB_OUTPUT
+            echo "✓ This is the latest release, will tag as :latest"
+          else
+            echo "is_latest=false" >> $GITHUB_OUTPUT
+            echo "⚠ This is NOT the latest release, skipping :latest tag"
+          fi
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
      - name: Extract metadata
        id: meta
        uses: docker/metadata-action@v5
        with:
          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
+          # Only tag as 'latest' if this is the latest release (prevents re-runs of old releases from overwriting)
          tags: |
            type=semver,pattern={{version}},value=v${{ steps.version.outputs.version }}
            type=semver,pattern={{major}}.{{minor}},value=v${{ steps.version.outputs.version }}
            type=semver,pattern={{major}},value=v${{ steps.version.outputs.version }}
-            type=raw,value=latest
+            type=raw,value=latest,enable=${{ steps.latest.outputs.is_latest }}

      - name: Build and push
        uses: docker/build-push-action@v6
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -0,0 +1,66 @@
+name: Docs
+
+on:
+  push:
+    branches: [main]
+    paths:
+      - "docs/**"
+      - "zensical.toml"
+      - ".github/workflows/docs.yml"
+  pull_request:
+    paths:
+      - "docs/**"
+      - "zensical.toml"
+      - ".github/workflows/docs.yml"
+  workflow_dispatch:
+
+permissions:
+  contents: read
+  pages: write
+  id-token: write
+
+concurrency:
+  group: "pages-${{ github.ref }}"
+  cancel-in-progress: true
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          lfs: true
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+
+      - name: Set up Python
+        run: uv python install 3.12
+
+      - name: Install Zensical
+        run: uv tool install zensical
+
+      - name: Build docs
+        run: zensical build
+
+      - name: Setup Pages
+        if: github.event_name != 'pull_request'
+        uses: actions/configure-pages@v5
+
+      - name: Upload artifact
+        if: github.event_name != 'pull_request'
+        uses: actions/upload-pages-artifact@v4
+        with:
+          path: "./site"
+
+  deploy:
+    if: github.event_name != 'pull_request'
+    environment:
+      name: github-pages
+      url: ${{ steps.deployment.outputs.page_url }}
+    runs-on: ubuntu-latest
+    needs: build
+    steps:
+      - name: Deploy to GitHub Pages
+        id: deployment
+        uses: actions/deploy-pages@v4
--- a/.github/workflows/update-readme.yml
+++ b/.github/workflows/update-readme.yml
@@ -26,7 +26,9 @@ jobs:
        env:
          TERM: dumb
          NO_COLOR: 1
-          TERMINAL_WIDTH: 90
+          COLUMNS: 90  # POSIX terminal width for Rich
+          TERMINAL_WIDTH: 90  # Typer MAX_WIDTH for help panels
+          _TYPER_FORCE_DISABLE_TERMINAL: 1  # Prevent Typer forcing terminal mode in CI
        run: |
          uvx --with . markdown-code-runner README.md
          sed -i 's/[[:space:]]*$//' README.md
--- a/.gitignore
+++ b/.gitignore
@@ -37,6 +37,7 @@ ENV/
 .coverage
 .pytest_cache/
 htmlcov/
+.code/

 # Local config (don't commit real configs)
 compose-farm.yaml
@@ -44,3 +45,5 @@ compose-farm.yaml
 coverage.xml
 .env
 homepage/
+site/
+.playwright-mcp/
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -21,16 +21,22 @@ repos:
  - repo: https://github.com/astral-sh/ruff-pre-commit
    rev: v0.14.9
    hooks:
-      - id: ruff
+      - id: ruff-check
        args: [--fix]
      - id: ruff-format

-  - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v1.14.0
+  - repo: local
    hooks:
      - id: mypy
-        additional_dependencies:
-          - pydantic>=2.0.0
-          - typer>=0.9.0
-          - asyncssh>=2.14.0
-          - types-PyYAML
+        name: mypy (type checker)
+        entry: uv run mypy src tests
+        language: system
+        types: [python]
+        pass_filenames: false
+
+      - id: ty
+        name: ty (type checker)
+        entry: uv run ty check
+        language: system
+        types: [python]
+        pass_filenames: false
--- a/.prompts/docs-review.md
+++ b/.prompts/docs-review.md
@@ -0,0 +1,119 @@
+Review documentation for accuracy, completeness, and consistency. Focus on things that require judgment—automated checks handle the rest.
+
+## What's Already Automated
+
+Don't waste time on these—CI and pre-commit hooks handle them:
+
+- **README help output**: `markdown-code-runner` regenerates `cf --help` blocks in CI
+- **README command table**: Pre-commit hook verifies commands are listed
+- **Linting/formatting**: Handled by pre-commit
+
+## What This Review Is For
+
+Focus on things that require judgment:
+
+1. **Accuracy**: Does the documentation match what the code actually does?
+2. **Completeness**: Are there undocumented features, options, or behaviors?
+3. **Clarity**: Would a new user understand this? Are examples realistic?
+4. **Consistency**: Do different docs contradict each other?
+5. **Freshness**: Has the code changed in ways the docs don't reflect?
+
+## Review Process
+
+### 1. Check Recent Changes
+
+```bash
+# What changed recently that might need doc updates?
+git log --oneline -20 | grep -iE "feat|fix|add|remove|change|option"
+
+# What code files changed?
+git diff --name-only HEAD~20 | grep "\.py$"
+```
+
+Look for new features, changed defaults, renamed options, or removed functionality.
+
+### 2. Verify docs/commands.md Options Tables
+
+The README auto-updates help output, but `docs/commands.md` has **manually maintained options tables**. These can drift.
+
+For each command's options table, compare against `cf <command> --help`:
+- Are all options listed?
+- Are short flags correct?
+- Are defaults accurate?
+- Are descriptions accurate?
+
+**Pay special attention to subcommands** (`cf config *`, `cf ssh *`)—these have their own options that are easy to miss.
+
+### 3. Verify docs/configuration.md
+
+Compare against Pydantic models in the source:
+
+```bash
+# Find the config models
+grep -r "class.*BaseModel" src/ --include="*.py" -A 15
+```
+
+Check:
+- All config keys documented
+- Types and defaults match code
+- Config file search order is accurate
+- Example YAML would actually work
+
+### 4. Verify docs/architecture.md and CLAUDE.md
+
+```bash
+# What source files actually exist?
+git ls-files "src/**/*.py"
+```
+
+Check **both** `docs/architecture.md` and `CLAUDE.md` (Architecture section):
+- Listed files exist
+- No files are missing from the list
+- Descriptions match what the code does
+
+Both files have architecture listings that can drift independently.
+
+### 5. Check Examples
+
+For examples in any doc:
+- Would the YAML/commands actually work?
+- Are service names, paths, and options realistic?
+- Do examples use current syntax (not deprecated options)?
+
+### 6. Cross-Reference Consistency
+
+The same info appears in multiple places. Check for conflicts:
+- README.md vs docs/index.md
+- docs/commands.md vs CLAUDE.md command tables
+- Config examples across different docs
+
+### 7. Self-Check This Prompt
+
+This prompt can become outdated too. If you notice:
+- New automated checks that should be listed above
+- New doc files that need review guidelines
+- Patterns that caused issues
+
+Include prompt updates in your fixes.
+
+## Output Format
+
+Categorize findings:
+
+1. **Critical**: Wrong info that would break user workflows
+2. **Inaccuracy**: Technical errors (wrong defaults, paths, types)
+3. **Missing**: Undocumented features or options
+4. **Outdated**: Was true, no longer is
+5. **Inconsistency**: Docs contradict each other
+6. **Minor**: Typos, unclear wording
+
+For each issue, provide a ready-to-apply fix:
+
+```
+### Issue: [Brief description]
+
+- **File**: docs/commands.md:652
+- **Problem**: `cf ssh setup` has `--config` option but it's not documented
+- **Fix**: Add `--config, -c PATH` to the options table
+- **Verify**: `cf ssh setup --help`
+```
--- a/.prompts/duplication-audit.md
+++ b/.prompts/duplication-audit.md
@@ -0,0 +1,79 @@
+# Duplication audit and generalization prompt
+
+You are a coding agent working inside a repository. Your job is to find duplicated
+functionality (not just identical code) and propose a minimal, safe generalization.
+Keep it simple and avoid adding features.
+
+## First steps
+
+- Read project-specific instructions (AGENTS.md, CONTRIBUTING.md, or similar) and follow them.
+- If instructions mention tooling or style (e.g., preferred search tools), use those.
+- Ask a brief clarification if the request is ambiguous (for example: report only vs refactor).
+
+## Objective
+
+Identify and consolidate duplicated functionality across the codebase. Duplication includes:
+- Multiple functions that parse or validate the same data in slightly different ways
+- Repeated file reads or config parsing
+- Similar command building or subprocess execution paths
+- Near-identical error handling or logging patterns
+- Repeated data transforms that can become a shared helper
+
+The goal is to propose a general, reusable abstraction that reduces duplication while
+preserving behavior. Keep changes minimal and easy to review.
+
+## Search strategy
+
+1) Map the hot paths
+- Scan entry points (CLI, web handlers, tasks, jobs) to see what they do repeatedly.
+- Look for cross-module patterns: same steps, different files.
+
+2) Find duplicate operations
+- Use fast search tools (prefer `rg`) to find repeated keywords and patterns.
+- Check for repeated YAML/JSON parsing, env interpolation, file IO, command building,
+  data validation, or response formatting.
+
+3) Validate duplication is real
+- Confirm the functional intent matches (not just similar code).
+- Note any subtle differences that must be preserved.
+
+4) Propose a minimal generalization
+- Suggest a shared helper, utility, or wrapper.
+- Avoid over-engineering. If only two call sites exist, keep the helper small.
+- Prefer pure functions and centralized IO if that already exists.
+
+## Deliverables
+
+Provide a concise report with:
+
+1) Findings
+- List duplicated behaviors with file references and a short description of the
+  shared functionality.
+- Explain why these are functionally the same (or nearly the same).
+
+2) Proposed generalizations
+- For each duplication, propose a shared helper and where it should live.
+- Outline any behavior differences that need to be parameterized.
+
+3) Impact and risk
+- Note any behavior risks, test needs, or migration steps.
+
+If the user asked you to implement changes:
+- Make only the minimal edits needed to dedupe behavior.
+- Keep the public API stable unless explicitly requested.
+- Add small comments only when the logic is non-obvious.
+- Summarize what changed and why.
+
+## Output format
+
+- Start with a short summary of the top 1-3 duplications.
+- Then provide a list of findings, ordered by impact.
+- Include a small proposed refactor plan (step-by-step, no more than 5 steps).
+- End with any questions or assumptions.
+
+## Guardrails
+
+- Do not add new features or change behavior beyond deduplication.
+- Avoid deep refactors without explicit request.
+- Preserve existing style conventions and import rules.
+- If a duplication is better left alone (e.g., clarity, single usage), say so.
--- a/.prompts/pr-review.md
+++ b/.prompts/pr-review.md
@@ -0,0 +1,16 @@
+Review the pull request for:
+
+- **Code cleanliness**: Is the implementation clean and well-structured?
+- **DRY principle**: Does it avoid duplication?
+- **Code reuse**: Are there parts that should be reused from other places?
+- **Organization**: Is everything in the right place?
+- **Consistency**: Is it in the same style as other parts of the codebase?
+- **Simplicity**: Is it not over-engineered? Remember KISS and YAGNI. No dead code paths and NO defensive programming.
+- **No pointless wrappers**: Identify functions/methods that just call another function and return its result. Callers should call the underlying function directly instead of going through unnecessary indirection.
+- **User experience**: Does it provide a good user experience?
+- **PR**: Is the PR description and title clear and informative?
+- **Tests**: Are there tests, and do they cover the changes adequately? Are they testing something meaningful or are they just trivial?
+- **Live tests**: Test the changes in a REAL live environment to ensure they work as expected, use the config in `/opt/stacks/compose-farm.yaml`.
+- **Rules**: Does the code follow the project's coding standards and guidelines as laid out in @CLAUDE.md?
+
+Look at `git diff origin/main..HEAD` for the changes made in this pull request.
--- a/.prompts/update-demos.md
+++ b/.prompts/update-demos.md
@@ -0,0 +1,51 @@
+Update demo recordings to match the current compose-farm.yaml configuration.
+
+## Key Gotchas
+
+1. **Never `git checkout` without asking** - check for uncommitted changes first
+2. **Prefer `nas` stacks** - demos run locally on nas, SSH adds latency
+3. **Terminal captures keyboard** - use `blur()` to release focus before command palette
+4. **Clicking sidebar navigates away** - clicking h1 scrolls to top
+5. **Buttons have icons, not text** - use `[data-tip="..."]` selectors
+6. **`record.py` auto-restores config** - no manual cleanup needed after CLI demos
+
+## Stacks Used in Demos
+
+| Stack | CLI Demos | Web Demos | Notes |
+|-------|-----------|-----------|-------|
+| `audiobookshelf` | quickstart, migration, apply | - | Migrates nas→anton |
+| `grocy` | update | navigation, stack, workflow, console | - |
+| `immich` | logs, compose | shell | Multiple containers |
+| `dozzle` | - | workflow | - |
+
+## CLI Demos
+
+**Files:** `docs/demos/cli/*.tape`
+
+Check:
+- `quickstart.tape`: `bat -r` line ranges match current config structure
+- `migration.tape`: nvim keystrokes work, stack exists on nas
+- `compose.tape`: exec commands produce meaningful output
+
+Run: `python docs/demos/cli/record.py [demo]`
+
+## Web Demos
+
+**Files:** `docs/demos/web/demo_*.py`
+
+Check:
+- Stack names in demos still exist in config
+- Selectors match current templates (grep for IDs in `templates/`)
+- Shell demo uses command palette for ALL navigation
+
+Run: `python docs/demos/web/record.py [demo]`
+
+## Before Recording
+
+```bash
+# Check for uncommitted config changes
+git -C /opt/stacks diff compose-farm.yaml
+
+# Verify stacks are running
+cf ps audiobookshelf grocy immich dozzle
+```
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -9,40 +9,89 @@
 ## Architecture

 ```
-compose_farm/
+src/compose_farm/
 ├── cli/               # CLI subpackage
 │   ├── __init__.py    # Imports modules to trigger command registration
 │   ├── app.py         # Shared Typer app instance, version callback
 │   ├── common.py      # Shared helpers, options, progress bar utilities
-│   ├── config.py      # Config subcommand (init, show, path, validate, edit)
-│   ├── lifecycle.py   # up, down, pull, restart, update, apply commands
+│   ├── config.py      # Config subcommand (init, show, path, validate, edit, symlink)
+│   ├── lifecycle.py   # up, down, stop, pull, restart, update, apply, compose commands
 │   ├── management.py  # refresh, check, init-network, traefik-file commands
-│   └── monitoring.py  # logs, ps, stats commands
-├── config.py          # Pydantic models, YAML loading
+│   ├── monitoring.py  # logs, ps, stats, list commands
+│   ├── ssh.py         # SSH key management (setup, status, keygen)
+│   └── web.py         # Web UI server command
 ├── compose.py         # Compose file parsing (.env, ports, volumes, networks)
+├── config.py          # Pydantic models, YAML loading
 ├── console.py         # Shared Rich console instances
 ├── executor.py        # SSH/local command execution, streaming output
-├── operations.py      # Business logic (up, migrate, discover, preflight checks)
-├── state.py           # Deployment state tracking (which service on which host)
+├── glances.py         # Glances API integration for host resource stats
 ├── logs.py            # Image digest snapshots (dockerfarm-log.toml)
-└── traefik.py         # Traefik file-provider config generation from labels
+├── operations.py      # Business logic (up, migrate, discover, preflight checks)
+├── paths.py           # Path utilities, config file discovery
+├── registry.py        # Container registry client for update checking
+├── ssh_keys.py        # SSH key path constants and utilities
+├── state.py           # Deployment state tracking (which stack on which host)
+├── traefik.py         # Traefik file-provider config generation from labels
+└── web/               # Web UI (FastAPI + HTMX)
 ```

 ## Web UI Icons

 Icons use [Lucide](https://lucide.dev/). Add new icons as macros in `web/templates/partials/icons.html` by copying SVG paths from their site. The `action_btn`, `stat_card`, and `collapse` macros in `components.html` accept an optional `icon` parameter.

+## HTMX Patterns
+
+- **Multi-element refresh**: Use custom events, not `hx-swap-oob`. Elements have `hx-trigger="cf:refresh from:body"` and JS calls `document.body.dispatchEvent(new CustomEvent('cf:refresh'))`. Simpler to debug/test.
+- **SPA navigation**: Sidebar uses `hx-boost="true"` to AJAX-ify links.
+- **Attribute inheritance**: Set `hx-target`/`hx-swap` on parent elements.
+
 ## Key Design Decisions

 1. **Hybrid SSH approach**: asyncssh for parallel streaming with prefixes; native `ssh -t` for raw mode (progress bars)
-2. **Parallel by default**: Multiple services run concurrently via `asyncio.gather`
-3. **Streaming output**: Real-time stdout/stderr with `[service]` prefix using Rich
+2. **Parallel by default**: Multiple stacks run concurrently via `asyncio.gather`
+3. **Streaming output**: Real-time stdout/stderr with `[stack]` prefix using Rich
 4. **SSH key auth only**: Uses ssh-agent, no password handling (YAGNI)
 5. **NFS assumption**: Compose files at same path on all hosts
 6. **Local IP auto-detection**: Skips SSH when target host matches local machine's IP
-7. **State tracking**: Tracks where services are deployed for auto-migration
+7. **State tracking**: Tracks where stacks are deployed for auto-migration
 8. **Pre-flight checks**: Verifies NFS mounts and Docker networks exist before starting/migrating

+## Code Style
+
+- **Imports at top level**: Never add imports inside functions unless they are explicitly marked with `# noqa: PLC0415` and a comment explaining it speeds up CLI startup. Heavy modules like `pydantic`, `yaml`, and `rich.table` are lazily imported to keep `cf --help` fast.
+
+## Development Commands
+
+Use `just` for common tasks. Run `just` to list available commands:
+
+| Command | Description |
+|---------|-------------|
+| `just install` | Install dev dependencies |
+| `just test` | Run all tests |
+| `just test-cli` | Run CLI tests (parallel) |
+| `just test-web` | Run web UI tests (parallel) |
+| `just lint` | Lint, format, and type check |
+| `just web` | Start web UI (port 9001) |
+| `just doc` | Build and serve docs (port 9002) |
+| `just clean` | Clean build artifacts |
+
+## Testing
+
+Run tests with `just test` or `uv run pytest`. Browser tests require Chromium (system-installed or via `playwright install chromium`):
+
+```bash
+# Unit tests only (parallel)
+uv run pytest -m "not browser" -n auto
+
+# Browser tests only (parallel)
+uv run pytest -m browser -n auto
+
+# All tests
+uv run pytest
+```
+
+Browser tests are marked with `@pytest.mark.browser`. They use Playwright to test HTMX behavior, JavaScript functionality (sidebar filter, command palette, terminals), and content stability during navigation.
+
 ## Communication Notes

 - Clarify ambiguous wording (e.g., homophones like "right"/"write", "their"/"there").
@@ -53,23 +102,66 @@ Icons use [Lucide](https://lucide.dev/). Add new icons as macros in `web/templat
 - **NEVER merge anything into main.** Always commit directly or use fast-forward/rebase.
 - Never force push.

+## SSH Agent in Remote Sessions
+
+When pushing to GitHub via SSH fails with "Permission denied (publickey)", fix the SSH agent socket:
+
+```bash
+# Find and set the correct SSH agent socket
+SSH_AUTH_SOCK=$(ls -t ~/.ssh/agent/s.*.sshd.* 2>/dev/null | head -1) git push origin branch-name
+```
+
+This is needed because the SSH agent socket path changes between sessions.
+
+## Pull Requests
+
+- Never include unchecked checklists (e.g., `- [ ] ...`) in PR descriptions. Either omit the checklist or use checked items.
+- **NEVER run `gh pr merge`**. PRs are merged via the GitHub UI, not the CLI.
+
+## Releases
+
+Use `gh release create` to create releases. The tag is created automatically.
+
+```bash
+# IMPORTANT: Ensure you're on latest origin/main before releasing!
+git fetch origin
+git checkout origin/main
+
+# Check current version
+git tag --sort=-v:refname | head -1
+
+# Create release (minor version bump: v0.21.1 -> v0.22.0)
+gh release create v0.22.0 --title "v0.22.0" --notes "release notes here"
+```
+
+Versioning:
+- **Patch** (v0.21.0 → v0.21.1): Bug fixes
+- **Minor** (v0.21.1 → v0.22.0): New features, non-breaking changes
+
+Write release notes manually describing what changed. Group by features and bug fixes.
+
 ## Commands Quick Reference

 CLI available as `cf` or `compose-farm`.

 | Command | Description |
 |---------|-------------|
-| `up`    | Start services (`docker compose up -d`), auto-migrates if host changed |
-| `down`  | Stop services (`docker compose down`). Use `--orphaned` to stop services removed from config |
+| `up`    | Start stacks (`docker compose up -d`), auto-migrates if host changed |
+| `down`  | Stop stacks (`docker compose down`). Use `--orphaned` to stop stacks removed from config |
+| `stop`  | Stop services without removing containers (`docker compose stop`) |
 | `pull`  | Pull latest images |
-| `restart` | `down` + `up -d` |
-| `update` | `pull` + `down` + `up -d` |
-| `apply` | Make reality match config: migrate services + stop orphans. Use `--dry-run` to preview |
-| `logs`  | Show service logs |
-| `ps`    | Show status of all services |
-| `stats` | Show overview (hosts, services, pending migrations; `--live` for container counts) |
-| `refresh` | Update state from reality: discover running services, capture image digests |
+| `restart` | Restart running containers (`docker compose restart`) |
+| `update` | Pull, build, recreate only if changed (`up -d --pull always --build`) |
+| `apply` | Make reality match config: migrate stacks + stop orphans. Use `--dry-run` to preview |
+| `compose` | Run any docker compose command on a stack (passthrough) |
+| `logs`  | Show stack logs |
+| `ps`    | Show status of all stacks |
+| `stats` | Show overview (hosts, stacks, pending migrations; `--live` for container counts) |
+| `list`  | List stacks and hosts (`--simple` for scripting, `--host` to filter) |
+| `refresh` | Update state from reality: discover running stacks, capture image digests |
 | `check` | Validate config, traefik labels, mounts, networks; show host compatibility |
 | `init-network` | Create Docker network on hosts with consistent subnet/gateway |
 | `traefik-file` | Generate Traefik file-provider config from compose labels |
-| `config` | Manage config files (init, show, path, validate, edit) |
+| `config` | Manage config files (init, init-env, show, path, validate, edit, symlink) |
+| `ssh`   | Manage SSH keys (setup, status, keygen) |
+| `web`   | Start web UI server |
--- a/10
+++ b/10
@@ -16,5 +16,13 @@ RUN apk add --no-cache openssh-client
 COPY --from=builder /root/.local/share/uv/tools/compose-farm /root/.local/share/uv/tools/compose-farm
 COPY --from=builder /usr/local/bin/cf /usr/local/bin/compose-farm /usr/local/bin/

-ENTRYPOINT ["cf"]
+# Allow non-root users to access the installed tool
+# (required when running with user: "${CF_UID:-0}:${CF_GID:-0}")
+RUN chmod 755 /root
+
+# Allow non-root users to add passwd entries (required for SSH)
+RUN chmod 666 /etc/passwd
+
+# Entrypoint creates /etc/passwd entry for non-root UIDs (required for SSH)
+ENTRYPOINT ["sh", "-c", "[ $(id -u) != 0 ] && echo ${USER:-u}:x:$(id -u):$(id -g)::${HOME:-/}:/bin/sh >> /etc/passwd; exec cf \"$@\"", "--"]
 CMD ["--help"]
--- a/README.md
+++ b/README.md
--- a/compose-farm.example.yaml
+++ b/compose-farm.example.yaml
@@ -3,9 +3,9 @@

 compose_dir: /opt/compose

-# Optional: Auto-regenerate Traefik file-provider config after up/down/restart/update
+# Optional: Auto-regenerate Traefik file-provider config after up/down/update
 traefik_file: /opt/traefik/dynamic.d/compose-farm.yml
-traefik_service: traefik  # Skip services on same host (docker provider handles them)
+traefik_stack: traefik  # Skip stacks on same host (docker provider handles them)

 hosts:
  # Full form with all options
@@ -20,11 +20,11 @@ hosts:
  # Local execution (no SSH)
  local: localhost

-services:
-  # Map service names to hosts
-  # Compose file expected at: {compose_dir}/{service}/compose.yaml
+stacks:
+  # Map stack names to hosts
+  # Compose file expected at: {compose_dir}/{stack}/compose.yaml
  traefik: server-1    # Traefik runs here
-  plex: server-2       # Services on other hosts get file-provider entries
+  plex: server-2       # Stacks on other hosts get file-provider entries
  jellyfin: server-2
-  sonarr: server-1
-  radarr: local
+  grafana: server-1
+  nextcloud: local
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,24 +1,56 @@
 services:
  cf:
    image: ghcr.io/basnijholt/compose-farm:latest
+    # Run as current user to preserve file ownership on mounted volumes
+    # Set CF_UID=$(id -u) CF_GID=$(id -g) in your environment or .env file
+    # Defaults to root (0:0) for backwards compatibility
+    user: "${CF_UID:-0}:${CF_GID:-0}"
    volumes:
-      - ${SSH_AUTH_SOCK}:/ssh-agent:ro
      # Compose directory (contains compose files AND compose-farm.yaml config)
      - ${CF_COMPOSE_DIR:-/opt/stacks}:${CF_COMPOSE_DIR:-/opt/stacks}
+      # SSH keys for passwordless auth (generated by `cf ssh setup`)
+      # Choose ONE option below (use the same option for both cf and web services):
+      # Option 1: Host path (default) - keys at ~/.ssh/compose-farm/id_ed25519
+      - ${CF_SSH_DIR:-~/.ssh/compose-farm}:${CF_HOME:-/root}/.ssh/compose-farm
+      # Option 2: Named volume - managed by Docker, shared between services
+      # - cf-ssh:${CF_HOME:-/root}/.ssh
+      # Option 3: SSH agent forwarding (uncomment if using ssh-agent)
+      # - ${SSH_AUTH_SOCK}:/ssh-agent:ro
    environment:
      - SSH_AUTH_SOCK=/ssh-agent
      # Config file path (state stored alongside it)
      - CF_CONFIG=${CF_COMPOSE_DIR:-/opt/stacks}/compose-farm.yaml
+      # HOME must match the user running the container for SSH to find keys
+      - HOME=${CF_HOME:-/root}
+      # USER is required for SSH when running as non-root (UID not in /etc/passwd)
+      - USER=${CF_USER:-root}

  web:
    image: ghcr.io/basnijholt/compose-farm:latest
+    restart: unless-stopped
    command: web --host 0.0.0.0 --port 9000
+    # Run as current user to preserve file ownership on mounted volumes
+    user: "${CF_UID:-0}:${CF_GID:-0}"
    volumes:
-      - ${SSH_AUTH_SOCK}:/ssh-agent:ro
      - ${CF_COMPOSE_DIR:-/opt/stacks}:${CF_COMPOSE_DIR:-/opt/stacks}
+      # SSH keys - use the SAME option as cf service above
+      # Option 1: Host path (default)
+      - ${CF_SSH_DIR:-~/.ssh/compose-farm}:${CF_HOME:-/root}/.ssh/compose-farm
+      # Option 2: Named volume
+      # - cf-ssh:${CF_HOME:-/root}/.ssh
+      # Option 3: SSH agent forwarding (uncomment if using ssh-agent)
+      # - ${SSH_AUTH_SOCK}:/ssh-agent:ro
+      # XDG config dir for backups and image digest logs (persists across restarts)
+      - ${CF_XDG_CONFIG:-~/.config/compose-farm}:${CF_HOME:-/root}/.config/compose-farm
    environment:
      - SSH_AUTH_SOCK=/ssh-agent
      - CF_CONFIG=${CF_COMPOSE_DIR:-/opt/stacks}/compose-farm.yaml
+      # Used to detect self-updates and run via SSH to survive container restart
+      - CF_WEB_STACK=compose-farm
+      # HOME must match the user running the container for SSH to find keys
+      - HOME=${CF_HOME:-/root}
+      # USER is required for SSH when running as non-root (UID not in /etc/passwd)
+      - USER=${CF_USER:-root}
    labels:
      - traefik.enable=true
      - traefik.http.routers.compose-farm.rule=Host(`compose-farm.${DOMAIN}`)
@@ -32,3 +64,7 @@ services:
 networks:
  mynetwork:
    external: true
+
+volumes:
+  cf-ssh:
+    # Only used if Option 2 is selected above
--- a/docs/CNAME
+++ b/docs/CNAME
@@ -0,0 +1 @@
+compose-farm.nijho.lt
--- a/docs/architecture.md
+++ b/docs/architecture.md
@@ -0,0 +1,361 @@
+---
+icon: lucide/layers
+---
+
+# Architecture
+
+This document explains how Compose Farm works under the hood.
+
+## Design Philosophy
+
+Compose Farm follows three core principles:
+
+1. **KISS** - Keep it simple. It's a thin wrapper around `docker compose` over SSH.
+2. **YAGNI** - No orchestration, no service discovery, no health checks until needed.
+3. **Zero changes** - Your existing compose files work unchanged.
+
+## High-Level Architecture
+
+```
+┌─────────────────────────────────────────────────────────────────┐
+│                        Compose Farm CLI                         │
+│                                                                 │
+│  ┌──────────┐  ┌──────────┐  ┌──────────┐  ┌──────────────────┐ │
+│  │  Config  │  │  State   │  │Operations│  │   Executor       │ │
+│  │  Parser  │  │ Tracker  │  │  Logic   │  │  (SSH/Local)     │ │
+│  └────┬─────┘  └────┬─────┘  └────┬─────┘  └────────┬─────────┘ │
+└───────┼─────────────┼─────────────┼─────────────────┼───────────┘
+        │             │             │                 │
+        ▼             ▼             ▼                 ▼
+┌───────────────────────────────────────────────────────────────┐
+│                         SSH / Local                            │
+└───────────────────────────────────────────────────────────────┘
+        │                                             │
+        ▼                                             ▼
+┌───────────────┐                           ┌───────────────┐
+│   Host: nuc   │                           │   Host: hp    │
+│               │                           │               │
+│ docker compose│                           │ docker compose│
+│    up -d      │                           │    up -d      │
+└───────────────┘                           └───────────────┘
+```
+
+## Core Components
+
+### Configuration (`src/compose_farm/config.py`)
+
+Pydantic models for YAML configuration:
+
+- **Config** - Root configuration with compose_dir, hosts, stacks
+- **Host** - Host address, SSH user, and port
+
+Key features:
+- Validation with Pydantic
+- Multi-host stack expansion (`all` → list of hosts)
+- YAML loading with sensible defaults
+
+### State Tracking (`src/compose_farm/state.py`)
+
+Tracks deployment state in `compose-farm-state.yaml` (stored alongside the config file):
+
+```yaml
+deployed:
+  plex: nuc
+  grafana: nuc
+```
+
+Used for:
+- Detecting migrations (stack moved to different host)
+- Identifying orphans (stacks removed from config)
+- `cf ps` status display
+
+### Operations (`src/compose_farm/operations.py`)
+
+Business logic for stack operations:
+
+- **up** - Start stack, handle migration if needed
+- **down** - Stop stack
+- **preflight checks** - Verify mounts, networks exist before operations
+- **discover** - Find running stacks on hosts
+- **migrate** - Down on old host, up on new host
+
+### Executor (`src/compose_farm/executor.py`)
+
+SSH and local command execution:
+
+- **Hybrid SSH approach**: asyncssh for parallel streaming, native `ssh -t` for raw mode
+- **Parallel by default**: Multiple stacks via `asyncio.gather`
+- **Streaming output**: Real-time stdout/stderr with `[stack]` prefix
+- **Local detection**: Skips SSH when target matches local machine IP
+
+### CLI (`src/compose_farm/cli/`)
+
+Typer-based CLI with subcommand modules:
+
+```
+cli/
+├── app.py          # Shared Typer app, version callback
+├── common.py       # Shared helpers, options, progress utilities
+├── config.py       # config subcommand (init, init-env, show, path, validate, edit, symlink)
+├── lifecycle.py    # up, down, stop, pull, restart, update, apply, compose
+├── management.py   # refresh, check, init-network, traefik-file
+├── monitoring.py   # logs, ps, stats
+├── ssh.py          # SSH key management (setup, status, keygen)
+└── web.py          # Web UI server command
+```
+
+## Command Flow
+
+### cf up plex
+
+```
+1. Load configuration
+   └─► Parse compose-farm.yaml
+   └─► Validate stack exists
+
+2. Check state
+   └─► Load state.yaml
+   └─► Is plex already running?
+   └─► Is it on a different host? (migration needed)
+
+3. Pre-flight checks
+   └─► SSH to target host
+   └─► Check compose file exists
+   └─► Check required mounts exist
+   └─► Check required networks exist
+
+4. Execute migration (if needed)
+   └─► SSH to old host
+   └─► Run: docker compose down
+
+5. Start stack
+   └─► SSH to target host
+   └─► cd /opt/compose/plex
+   └─► Run: docker compose up -d
+
+6. Update state
+   └─► Write new state to state.yaml
+
+7. Generate Traefik config (if configured)
+   └─► Regenerate traefik file-provider
+```
+
+### cf apply
+
+```
+1. Load configuration and state
+
+2. Compute diff
+   ├─► Orphans: in state, not in config
+   ├─► Migrations: in both, different host
+   └─► Missing: in config, not in state
+
+3. Stop orphans
+   └─► For each orphan: cf down
+
+4. Migrate stacks
+   └─► For each migration: down old, up new
+
+5. Start missing
+   └─► For each missing: cf up
+
+6. Update state
+```
+
+## SSH Execution
+
+### Parallel Streaming (asyncssh)
+
+For most operations, Compose Farm uses asyncssh:
+
+```python
+async def run_command(host, command):
+    async with asyncssh.connect(host) as conn:
+        result = await conn.run(command)
+        return result.stdout, result.stderr
+```
+
+Multiple stacks run concurrently via `asyncio.gather`.
+
+### Raw Mode (native ssh)
+
+For commands needing PTY (progress bars, interactive):
+
+```bash
+ssh -t user@host "docker compose pull"
+```
+
+### Local Detection
+
+When target host IP matches local machine:
+
+```python
+if is_local(host_address):
+    # Run locally, no SSH
+    subprocess.run(command)
+else:
+    # SSH to remote
+    ssh.run(command)
+```
+
+## State Management
+
+### State File
+
+Location: `compose-farm-state.yaml` (stored alongside the config file)
+
+```yaml
+deployed:
+  plex: nuc
+  grafana: nuc
+```
+
+Image digests are stored separately in `dockerfarm-log.toml` (also in the config directory).
+
+### State Transitions
+
+```
+Config Change          State Change           Action
+─────────────────────────────────────────────────────
+Add stack            Missing                 cf up
+Remove stack         Orphaned                cf down
+Change host           Migration               down old, up new
+No change             No change               none (or refresh)
+```
+
+### cf refresh
+
+Syncs state with reality by querying Docker on each host:
+
+```bash
+docker ps --format '{{.Names}}'
+```
+
+Updates state.yaml to match what's actually running.
+
+## Compose File Discovery
+
+For each stack, Compose Farm looks for compose files in:
+
+```
+{compose_dir}/{stack}/
+├── compose.yaml         # preferred
+├── compose.yml
+├── docker-compose.yml
+└── docker-compose.yaml
+```
+
+First match wins.
+
+## Traefik Integration
+
+### Label Extraction
+
+Compose Farm parses Traefik labels from compose files:
+
+```yaml
+stacks:
+  plex:
+    labels:
+      - traefik.enable=true
+      - traefik.http.routers.plex.rule=Host(`plex.example.com`)
+      - traefik.http.services.plex.loadbalancer.server.port=32400
+```
+
+### File Provider Generation
+
+Converts labels to Traefik file-provider YAML:
+
+```yaml
+http:
+  routers:
+    plex:
+      rule: Host(`plex.example.com`)
+      service: plex
+  services:
+    plex:
+      loadBalancer:
+        servers:
+          - url: http://192.168.1.10:32400
+```
+
+### Variable Resolution
+
+Supports `${VAR}` and `${VAR:-default}` from:
+1. Service's `.env` file
+2. Current environment
+
+## Error Handling
+
+### Pre-flight Failures
+
+Before any operation, Compose Farm checks:
+- SSH connectivity
+- Compose file existence
+- Required mounts
+- Required networks
+
+If checks fail, operation aborts with clear error.
+
+### Partial Failures
+
+When operating on multiple stacks:
+- Each stack is independent
+- Failures are logged, but other stacks continue
+- Exit code reflects overall success/failure
+
+## Performance Considerations
+
+### Parallel Execution
+
+Services are started/stopped in parallel:
+
+```python
+await asyncio.gather(*[
+    up_stack(stack) for stack in stacks
+])
+```
+
+### SSH Multiplexing
+
+For repeated connections to the same host, SSH reuses connections.
+
+### Caching
+
+- Config is parsed once per command
+- State is loaded once, written once
+- Host discovery results are cached during command
+
+## Web UI Architecture
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│                         Web UI                               │
+│                                                             │
+│  ┌─────────────┐  ┌─────────────┐  ┌─────────────────────┐  │
+│  │   FastAPI   │  │    Jinja    │  │       HTMX          │  │
+│  │   Backend   │  │  Templates  │  │   Dynamic Updates   │  │
+│  └─────────────┘  └─────────────┘  └─────────────────────┘  │
+│                                                             │
+│  Pattern: Custom events, not hx-swap-oob                    │
+│  Elements trigger on: cf:refresh from:body                  │
+└─────────────────────────────────────────────────────────────┘
+```
+
+Icons use [Lucide](https://lucide.dev/). Add new icons as macros in `web/templates/partials/icons.html`.
+
+### Host Resource Monitoring (`src/compose_farm/glances.py`)
+
+Integration with [Glances](https://nicolargo.github.io/glances/) for real-time host stats:
+
+- Fetches CPU, memory, and load from Glances REST API on each host
+- Used by web UI dashboard to display host resource usage
+- Requires `glances_stack` config option pointing to a Glances stack running on all hosts
+
+### Container Registry Client (`src/compose_farm/registry.py`)
+
+OCI Distribution API client for checking image updates:
+
+- Parses image references (registry, namespace, name, tag, digest)
+- Fetches available tags from Docker Hub, GHCR, and other registries
+- Compares semantic versions to find newer releases
--- a/docs/assets/apply.gif
+++ b/docs/assets/apply.gif
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01dabdd8f62773823ba2b8dc74f9931f1a1b88215117e6a080004096025491b0
+size 901456
--- a/docs/assets/apply.webm
+++ b/docs/assets/apply.webm
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:134c903a6b3acfb933617b33755b0cdb9bac2a59e5e35b64236e248a141d396d
+size 206883
--- a/docs/assets/compose.gif
+++ b/docs/assets/compose.gif
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d8b3cdb3486ec79b3ddb2f7571c13d54ac9aed182edfe708eff76a966a90cfc7
+size 1132310
--- a/docs/assets/compose.webm
+++ b/docs/assets/compose.webm
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a3c4d4a62f062f717df4e6752efced3caea29004dc90fe97fd7633e7f0ded9db
+size 341057
--- a/docs/assets/install.gif
+++ b/docs/assets/install.gif
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6c1bb48cc2f364681515a4d8bd0c586d133f5a32789b7bb64524ad7d9ed0a8e9
+size 543135
--- a/docs/assets/install.webm
+++ b/docs/assets/install.webm
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5f82d96137f039f21964c15c1550aa1b1f0bb2d52c04d012d253dbfbd6fad096
+size 268086
--- a/docs/assets/logs.gif
+++ b/docs/assets/logs.gif
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2a4045b00d90928f42c7764b3c24751576cfb68a34c6e84d12b4e282d2e67378
+size 146467
--- a/docs/assets/logs.webm
+++ b/docs/assets/logs.webm
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f1b94416ed3740853f863e19bf45f26241a203fb0d7d187160a537f79aa544fa
+size 60353
--- a/docs/assets/migration.gif
+++ b/docs/assets/migration.gif
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:848d9c48fb7511da7996149277c038589fad1ee406ff2f30c28f777fc441d919
+size 1183641
--- a/docs/assets/migration.webm
+++ b/docs/assets/migration.webm
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e747ee71bb38b19946005d5a4def4d423dadeaaade452dec875c4cb2d24a5b77
+size 407373
--- a/docs/assets/quickstart.gif
+++ b/docs/assets/quickstart.gif
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d32c9a3eec06e57df085ad347e6bf61e323f8bd8322d0c540f0b9d4834196dfd
+size 3589776
--- a/docs/assets/quickstart.webm
+++ b/docs/assets/quickstart.webm
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6c54eda599389dac74c24c83527f95cd1399e653d7faf2972c2693d90e590597
+size 1085344
--- a/docs/assets/update.gif
+++ b/docs/assets/update.gif
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:62f9b5ec71496197a3f1c3e3bca8967d603838804279ea7dbf00a70d3391ff6c
+size 127123
--- a/docs/assets/update.webm
+++ b/docs/assets/update.webm
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ac2b93d3630af87b44a135723c5d10e8287529bed17c28301b2802cd9593e9e8
+size 98748
--- a/docs/assets/web-console.gif
+++ b/docs/assets/web-console.gif
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7b50a7e9836c496c0989363d1440fa0a6ccdaa38ee16aae92b389b3cf3c3732f
+size 2385110
--- a/docs/assets/web-console.webm
+++ b/docs/assets/web-console.webm
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ccbb3d5366c7734377e12f98cca0b361028f5722124f1bb7efa231f6aeffc116
+size 2208044
--- a/docs/assets/web-live_stats.gif
+++ b/docs/assets/web-live_stats.gif
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4135888689a10c5ae2904825d98f2a6d215c174a4bd823e25761f619590f04ff
+size 3990104
--- a/docs/assets/web-live_stats.webm
+++ b/docs/assets/web-live_stats.webm
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:87739cd6f6576a81100392d8d1e59d3e776fecc8f0721a31332df89e7fc8593d
+size 5814274
--- a/docs/assets/web-navigation.gif
+++ b/docs/assets/web-navigation.gif
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:269993b52721ce70674d3aab2a4cd8c58aa621d4ba0739afedae661c90965b26
+size 3678371
--- a/docs/assets/web-navigation.webm
+++ b/docs/assets/web-navigation.webm
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0098b55bb6a52fa39f807a01fa352ce112bcb446e2a2acb963fb02d21b28c934
+size 3088813
--- a/docs/assets/web-shell.gif
+++ b/docs/assets/web-shell.gif
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4bf9d8c247d278799d1daea784fc662a22f12b1bd7883f808ef30f35025ebca6
+size 4166443
--- a/docs/assets/web-shell.webm
+++ b/docs/assets/web-shell.webm
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:02d5124217a94849bf2971d6d13d28da18c557195a81b9cca121fb7c07f0501b
+size 3523244
--- a/docs/assets/web-stack.gif
+++ b/docs/assets/web-stack.gif
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:412a0e68f8e52801cafbb9a703ca9577e7c14cc7c0e439160b9185961997f23c
+size 4435697
--- a/docs/assets/web-stack.webm
+++ b/docs/assets/web-stack.webm
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0e600a1d3216b44497a889f91eac94d62ef7207b4ed0471465dcb72408caa28e
+size 3764693
--- a/docs/assets/web-themes.gif
+++ b/docs/assets/web-themes.gif
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3c07a283f4f70c4ab205b0f0acb5d6f55e3ced4c12caa7a8d5914ffe3548233a
+size 5768166
--- a/docs/assets/web-themes.webm
+++ b/docs/assets/web-themes.webm
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:562228841de976d70ee80999b930eadf3866a13ff2867d900279993744c44671
+size 6667918
--- a/docs/assets/web-workflow.gif
+++ b/docs/assets/web-workflow.gif
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:845746ac1cb101c3077d420c4f3fda3ca372492582dc123ac8a031a68ae9b6b1
+size 12943150
--- a/docs/assets/web-workflow.webm
+++ b/docs/assets/web-workflow.webm
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:189259558b5760c02583885168d7b0b47cf476cba81c7c028ec770f9d6033129
+size 12415357
--- a/docs/best-practices.md
+++ b/docs/best-practices.md
@@ -0,0 +1,372 @@
+---
+icon: lucide/lightbulb
+---
+
+# Best Practices
+
+Tips, limitations, and recommendations for using Compose Farm effectively.
+
+## Limitations
+
+### No Cross-Host Networking
+
+Compose Farm moves containers between hosts but **does not provide cross-host networking**. Docker's internal DNS and networks don't span hosts.
+
+**What breaks when you move a stack:**
+
+| Feature | Works? | Why |
+|---------|--------|-----|
+| `http://redis:6379` | No | Docker DNS doesn't cross hosts |
+| Docker network names | No | Networks are per-host |
+| `DATABASE_URL=postgres://db:5432` | No | Container name won't resolve |
+| Host IP addresses | Yes | Use `192.168.1.10:5432` |
+
+### What Compose Farm Doesn't Do
+
+- No overlay networking (use Swarm/Kubernetes)
+- No service discovery across hosts
+- No automatic dependency tracking between compose files
+- No health checks or restart policies beyond Docker's
+- No secrets management beyond Docker's
+
+## Stack Organization
+
+### Keep Dependencies Together
+
+If services talk to each other, keep them in the same compose file on the same host:
+
+```yaml
+# /opt/compose/myapp/docker-compose.yml
+services:
+  app:
+    image: myapp
+    depends_on:
+      - db
+      - redis
+
+  db:
+    image: postgres
+
+  redis:
+    image: redis
+```
+
+```yaml
+# compose-farm.yaml
+stacks:
+  myapp: nuc  # All three containers stay together
+```
+
+### Separate Standalone Stacks
+
+Stacks whose services don't talk to other containers can be anywhere:
+
+```yaml
+stacks:
+  # These can run on any host
+  plex: nuc
+  jellyfin: hp
+  homeassistant: nas
+
+  # These should stay together
+  myapp: nuc  # includes app + db + redis
+```
+
+### Cross-Host Communication
+
+If services MUST communicate across hosts, publish ports:
+
+```yaml
+# Instead of
+DATABASE_URL=postgres://db:5432
+
+# Use
+DATABASE_URL=postgres://192.168.1.10:5432
+```
+
+```yaml
+# And publish the port
+services:
+  db:
+    ports:
+      - "5432:5432"
+```
+
+## Multi-Host Stacks
+
+### When to Use `all`
+
+Use `all` for stacks that need local access to each host:
+
+```yaml
+stacks:
+  # Need Docker socket
+  dozzle: all          # Log viewer
+  portainer-agent: all  # Portainer agents
+  autokuma: all        # Auto-creates monitors
+
+  # Need host metrics
+  node-exporter: all   # Prometheus metrics
+  promtail: all        # Log shipping
+```
+
+### Host-Specific Lists
+
+For stacks on specific hosts only:
+
+```yaml
+stacks:
+  # Only on compute nodes
+  gitlab-runner: [nuc, hp]
+
+  # Only on storage nodes
+  minio: [nas-1, nas-2]
+```
+
+## Migration Safety
+
+### Pre-flight Checks
+
+Before migrating, Compose Farm verifies:
+- Compose file is accessible on new host
+- Required mounts exist on new host
+- Required networks exist on new host
+
+### Data Considerations
+
+**Compose Farm doesn't move data.** Ensure:
+
+1. **Shared storage**: Data volumes on NFS/shared storage
+2. **External databases**: Data in external DB, not container
+3. **Backup first**: Always backup before migration
+
+### Safe Migration Pattern
+
+```bash
+# 1. Preview changes
+cf apply --dry-run
+
+# 2. Verify target host can run the stack
+cf check myservice
+
+# 3. Apply changes
+cf apply
+```
+
+## State Management
+
+### When to Refresh
+
+Run `cf refresh` after:
+- Manual `docker compose` commands
+- Container restarts
+- Host reboots
+- Any changes outside Compose Farm
+
+```bash
+cf refresh --dry-run  # Preview
+cf refresh            # Sync
+```
+
+### State Conflicts
+
+If state doesn't match reality:
+
+```bash
+# See what's actually running
+cf refresh --dry-run
+
+# Sync state
+cf refresh
+
+# Then apply config
+cf apply
+```
+
+## Shared Storage
+
+### NFS Best Practices
+
+```bash
+# Mount options for Docker compatibility
+nas:/compose /opt/compose nfs rw,hard,intr,rsize=8192,wsize=8192 0 0
+```
+
+### Directory Ownership
+
+Ensure consistent UID/GID across hosts:
+
+```yaml
+services:
+  myapp:
+    environment:
+      - PUID=1000
+      - PGID=1000
+```
+
+### Config vs Data
+
+Keep config and data separate:
+
+```
+/opt/compose/          # Shared: compose files + config
+├── plex/
+│   ├── docker-compose.yml
+│   └── config/        # Small config files OK
+
+/mnt/data/             # Shared: large media files
+├── movies/
+├── tv/
+└── music/
+
+/opt/appdata/          # Local: per-host app data
+├── plex/
+└── grafana/
+```
+
+## Performance
+
+### Parallel Operations
+
+Compose Farm runs operations in parallel. For large deployments:
+
+```bash
+# Good: parallel by default
+cf up --all
+
+# Avoid: sequential updates when possible
+for svc in plex grafana nextcloud; do
+  cf update $svc
+done
+```
+
+### SSH Connection Reuse
+
+SSH connections are reused within a command. For many operations:
+
+```bash
+# One command, one connection per host
+cf update --all
+
+# Multiple commands, multiple connections (slower)
+cf update plex && cf update grafana && cf update nextcloud
+```
+
+## Traefik Setup
+
+### Stack Placement
+
+Put Traefik on a reliable host:
+
+```yaml
+stacks:
+  traefik: nuc  # Primary host with good uptime
+```
+
+### Same-Host Stacks
+
+Stacks on the same host as Traefik use Docker provider:
+
+```yaml
+traefik_stack: traefik
+
+stacks:
+  traefik: nuc
+  portainer: nuc   # Docker provider handles this
+  plex: hp         # File provider handles this
+```
+
+### Middleware in Separate File
+
+Define middlewares outside Compose Farm's generated file:
+
+```yaml
+# /opt/traefik/dynamic.d/middlewares.yml
+http:
+  middlewares:
+    redirect-https:
+      redirectScheme:
+        scheme: https
+```
+
+## Backup Strategy
+
+### What to Backup
+
+| Item | Location | Method |
+|------|----------|--------|
+| Compose Farm config | `~/.config/compose-farm/` | Git or copy |
+| Compose files | `/opt/compose/` | Git |
+| State file | `~/.config/compose-farm/compose-farm-state.yaml` | Optional (can refresh) |
+| App data | `/opt/appdata/` | Backup solution |
+
+### Disaster Recovery
+
+```bash
+# Restore config
+cp backup/compose-farm.yaml ~/.config/compose-farm/
+
+# Refresh state from running containers
+cf refresh
+
+# Or start fresh
+cf apply
+```
+
+## Troubleshooting
+
+### Common Issues
+
+**Stack won't start:**
+```bash
+cf check myservice      # Verify mounts/networks
+cf logs myservice       # Check container logs
+```
+
+**Migration fails:**
+```bash
+cf check myservice      # Verify new host is ready
+cf init-network newhost # Create network if missing
+```
+
+**State out of sync:**
+```bash
+cf refresh --dry-run    # See differences
+cf refresh              # Sync state
+```
+
+**SSH issues:**
+```bash
+cf ssh status           # Check key status
+cf ssh setup            # Re-setup keys
+```
+
+## Security Considerations
+
+### SSH Keys
+
+- Use dedicated SSH key for Compose Farm
+- Limit key to specific hosts if possible
+- Don't store keys in Docker images
+
+### Network Exposure
+
+- Published ports are accessible from network
+- Use firewalls for sensitive services
+- Consider VPN for cross-host communication
+
+### Secrets
+
+- Don't commit `.env` files with secrets
+- Use Docker secrets or external secret management
+- Avoid secrets in compose file labels
+
+## Comparison: When to Use Alternatives
+
+| Scenario | Solution |
+|----------|----------|
+| 2-10 hosts, static stacks | **Compose Farm** |
+| Cross-host container networking | Docker Swarm |
+| Auto-scaling, self-healing | Kubernetes |
+| Infrastructure as code | Ansible + Compose Farm |
+| High availability requirements | Kubernetes or Swarm |
--- a/docs/commands.md
+++ b/docs/commands.md
@@ -0,0 +1,842 @@
+---
+icon: lucide/terminal
+---
+
+# Commands Reference
+
+The Compose Farm CLI is available as both `compose-farm` and the shorter alias `cf`.
+
+## Command Overview
+
+Commands are either **Docker Compose wrappers** (`up`, `down`, `stop`, `restart`, `pull`, `logs`, `ps`, `compose`) with multi-host superpowers, or **Compose Farm originals** (`apply`, `update`, `refresh`, `check`) for orchestration Docker Compose can't do.
+
+| Category | Command | Description |
+|----------|---------|-------------|
+| **Lifecycle** | `apply` | Make reality match config |
+| | `up` | Start stacks |
+| | `down` | Stop stacks |
+| | `stop` | Stop services without removing containers |
+| | `restart` | Restart running containers |
+| | `update` | Shorthand for `up --pull --build` |
+| | `pull` | Pull latest images |
+| | `compose` | Run any docker compose command |
+| **Monitoring** | `ps` | Show stack status |
+| | `logs` | Show stack logs |
+| | `stats` | Show overview statistics |
+| | `list` | List stacks and hosts |
+| **Configuration** | `check` | Validate config and mounts |
+| | `refresh` | Sync state from reality |
+| | `init-network` | Create Docker network |
+| | `traefik-file` | Generate Traefik config |
+| | `config` | Manage config files |
+| | `ssh` | Manage SSH keys |
+| **Server** | `web` | Start web UI |
+
+## Global Options
+
+```bash
+cf --version, -v    # Show version
+cf --help, -h       # Show help
+```
+
+## Command Aliases
+
+Short aliases for frequently used commands:
+
+| Alias | Command | Alias | Command |
+|-------|---------|-------|---------|
+| `cf a` | `apply` | `cf s` | `stats` |
+| `cf l` | `logs` | `cf ls` | `list` |
+| `cf r` | `restart` | `cf rf` | `refresh` |
+| `cf u` | `update` | `cf ck` | `check` |
+| `cf p` | `pull` | `cf tf` | `traefik-file` |
+| `cf c` | `compose` | | |
+
+---
+
+## Lifecycle Commands
+
+### cf apply
+
+Make reality match your configuration. The primary reconciliation command.
+
+<video autoplay loop muted playsinline>
+  <source src="/assets/apply.webm" type="video/webm">
+</video>
+
+```bash
+cf apply [OPTIONS]
+```
+
+**Options:**
+
+| Option | Description |
+|--------|-------------|
+| `--dry-run, -n` | Preview changes without executing |
+| `--no-orphans` | Skip stopping orphaned stacks |
+| `--no-strays` | Skip stopping stray stacks (running on wrong host) |
+| `--full, -f` | Also run up on all stacks (applies compose/env changes, triggers migrations) |
+| `--config, -c PATH` | Path to config file |
+
+**What it does:**
+
+1. Stops orphaned stacks (in state but removed from config)
+2. Stops stray stacks (running on unauthorized hosts)
+3. Migrates stacks on wrong host
+4. Starts missing stacks (in config but not running)
+
+**Examples:**
+
+```bash
+# Preview what would change
+cf apply --dry-run
+
+# Apply all changes
+cf apply
+
+# Only start/migrate, don't stop orphans
+cf apply --no-orphans
+
+# Don't stop stray stacks
+cf apply --no-strays
+
+# Also run up on all stacks (applies compose/env changes, triggers migrations)
+cf apply --full
+```
+
+---
+
+### cf up
+
+Start stacks. Auto-migrates if host assignment changed.
+
+```bash
+cf up [OPTIONS] [STACKS]...
+```
+
+**Options:**
+
+| Option | Description |
+|--------|-------------|
+| `--all, -a` | Start all stacks |
+| `--host, -H TEXT` | Filter to stacks on this host |
+| `--service, -s TEXT` | Target a specific service within the stack |
+| `--pull` | Pull images before starting (`--pull always`) |
+| `--build` | Build images before starting |
+| `--config, -c PATH` | Path to config file |
+
+**Examples:**
+
+```bash
+# Start specific stacks
+cf up plex grafana
+
+# Start all stacks
+cf up --all
+
+# Start all stacks on a specific host
+cf up --all --host nuc
+
+# Start a specific service within a stack
+cf up immich --service database
+```
+
+**Auto-migration:**
+
+If you change a stack's host in config and run `cf up`:
+
+1. Verifies mounts/networks exist on new host
+2. Runs `down` on old host
+3. Runs `up -d` on new host
+4. Updates state
+
+---
+
+### cf down
+
+Stop stacks.
+
+```bash
+cf down [OPTIONS] [STACKS]...
+```
+
+**Options:**
+
+| Option | Description |
+|--------|-------------|
+| `--all, -a` | Stop all stacks |
+| `--orphaned` | Stop orphaned stacks only |
+| `--host, -H TEXT` | Filter to stacks on this host |
+| `--config, -c PATH` | Path to config file |
+
+**Examples:**
+
+```bash
+# Stop specific stacks
+cf down plex
+
+# Stop all stacks
+cf down --all
+
+# Stop stacks removed from config
+cf down --orphaned
+
+# Stop all stacks on a host
+cf down --all --host nuc
+```
+
+---
+
+### cf stop
+
+Stop services without removing containers.
+
+```bash
+cf stop [OPTIONS] [STACKS]...
+```
+
+**Options:**
+
+| Option | Description |
+|--------|-------------|
+| `--all, -a` | Stop all stacks |
+| `--service, -s TEXT` | Target a specific service within the stack |
+| `--config, -c PATH` | Path to config file |
+
+**Examples:**
+
+```bash
+# Stop specific stacks
+cf stop plex
+
+# Stop all stacks
+cf stop --all
+
+# Stop a specific service within a stack
+cf stop immich --service database
+```
+
+---
+
+### cf restart
+
+Restart running containers (`docker compose restart`). With `--service`, restarts just that service.
+
+```bash
+cf restart [OPTIONS] [STACKS]...
+```
+
+**Options:**
+
+| Option | Description |
+|--------|-------------|
+| `--all, -a` | Restart all stacks |
+| `--service, -s TEXT` | Target a specific service within the stack |
+| `--config, -c PATH` | Path to config file |
+
+**Examples:**
+
+```bash
+cf restart plex
+cf restart --all
+
+# Restart a specific service
+cf restart immich --service database
+```
+
+---
+
+### cf update
+
+Update stacks (pull + build + up). Shorthand for `up --pull --build`. With `--service`, updates just that service.
+
+<video autoplay loop muted playsinline>
+  <source src="/assets/update.webm" type="video/webm">
+</video>
+
+```bash
+cf update [OPTIONS] [STACKS]...
+```
+
+**Options:**
+
+| Option | Description |
+|--------|-------------|
+| `--all, -a` | Update all stacks |
+| `--service, -s TEXT` | Target a specific service within the stack |
+| `--config, -c PATH` | Path to config file |
+
+**Examples:**
+
+```bash
+# Update specific stack
+cf update plex
+
+# Update all stacks
+cf update --all
+
+# Update a specific service
+cf update immich --service database
+```
+
+---
+
+### cf pull
+
+Pull latest images.
+
+```bash
+cf pull [OPTIONS] [STACKS]...
+```
+
+**Options:**
+
+| Option | Description |
+|--------|-------------|
+| `--all, -a` | Pull for all stacks |
+| `--service, -s TEXT` | Target a specific service within the stack |
+| `--config, -c PATH` | Path to config file |
+
+**Examples:**
+
+```bash
+cf pull plex
+cf pull --all
+
+# Pull a specific service
+cf pull immich --service database
+```
+
+---
+
+### cf compose
+
+Run any docker compose command on a stack. This is a passthrough to docker compose for commands not wrapped by cf.
+
+<video autoplay loop muted playsinline>
+  <source src="/assets/compose.webm" type="video/webm">
+</video>
+
+```bash
+cf compose [OPTIONS] STACK COMMAND [ARGS]...
+```
+
+**Arguments:**
+
+| Argument | Description |
+|----------|-------------|
+| `STACK` | Stack to operate on (use `.` for current dir) |
+| `COMMAND` | Docker compose command to run |
+| `ARGS` | Additional arguments passed to docker compose |
+
+**Options:**
+
+| Option | Description |
+|--------|-------------|
+| `--host, -H TEXT` | Filter to stacks on this host (required for multi-host stacks) |
+| `--config, -c PATH` | Path to config file |
+
+**Examples:**
+
+```bash
+# Show docker compose help
+cf compose mystack --help
+
+# View running processes
+cf compose mystack top
+
+# List images
+cf compose mystack images
+
+# Interactive shell
+cf compose mystack exec web bash
+
+# View parsed config
+cf compose mystack config
+
+# Use current directory as stack
+cf compose . ps
+```
+
+---
+
+## Monitoring Commands
+
+### cf ps
+
+Show status of stacks.
+
+```bash
+cf ps [OPTIONS] [STACKS]...
+```
+
+**Options:**
+
+| Option | Description |
+|--------|-------------|
+| `--all, -a` | Show all stacks (default) |
+| `--host, -H TEXT` | Filter to stacks on this host |
+| `--service, -s TEXT` | Target a specific service within the stack |
+| `--config, -c PATH` | Path to config file |
+
+**Examples:**
+
+```bash
+# Show all stacks
+cf ps
+
+# Show specific stacks
+cf ps plex grafana
+
+# Filter by host
+cf ps --host nuc
+
+# Show status of a specific service
+cf ps immich --service database
+```
+
+---
+
+### cf logs
+
+Show stack logs.
+
+<video autoplay loop muted playsinline>
+  <source src="/assets/logs.webm" type="video/webm">
+</video>
+
+```bash
+cf logs [OPTIONS] [STACKS]...
+```
+
+**Options:**
+
+| Option | Description |
+|--------|-------------|
+| `--all, -a` | Show logs for all stacks |
+| `--host, -H TEXT` | Filter to stacks on this host |
+| `--service, -s TEXT` | Target a specific service within the stack |
+| `--follow, -f` | Follow logs (live stream) |
+| `--tail, -n INTEGER` | Number of lines (default: 20 for --all, 100 otherwise) |
+| `--config, -c PATH` | Path to config file |
+
+**Examples:**
+
+```bash
+# Show last 100 lines
+cf logs plex
+
+# Follow logs
+cf logs -f plex
+
+# Show last 50 lines of multiple stacks
+cf logs -n 50 plex grafana
+
+# Show last 20 lines of all stacks
+cf logs --all
+
+# Show logs for a specific service
+cf logs immich --service database
+```
+
+---
+
+### cf stats
+
+Show overview statistics.
+
+```bash
+cf stats [OPTIONS]
+```
+
+**Options:**
+
+| Option | Description |
+|--------|-------------|
+| `--live, -l` | Query Docker for live container counts |
+| `--config, -c PATH` | Path to config file |
+
+**Examples:**
+
+```bash
+# Config/state overview
+cf stats
+
+# Include live container counts
+cf stats --live
+```
+
+---
+
+### cf list
+
+List all stacks and their assigned hosts.
+
+```bash
+cf list [OPTIONS]
+```
+
+**Options:**
+
+| Option | Description |
+|--------|-------------|
+| `--host, -H TEXT` | Filter to stacks on this host |
+| `--simple, -s` | Plain output for scripting (one stack per line) |
+| `--config, -c PATH` | Path to config file |
+
+**Examples:**
+
+```bash
+# List all stacks
+cf list
+
+# Filter by host
+cf list --host nas
+
+# Plain output for scripting
+cf list --simple
+
+# Combine: list stack names on a specific host
+cf list --host nuc --simple
+```
+
+---
+
+## Configuration Commands
+
+### cf check
+
+Validate configuration, mounts, and networks.
+
+```bash
+cf check [OPTIONS] [STACKS]...
+```
+
+**Options:**
+
+| Option | Description |
+|--------|-------------|
+| `--local` | Skip SSH-based checks (faster) |
+| `--config, -c PATH` | Path to config file |
+
+**Examples:**
+
+```bash
+# Full validation with SSH
+cf check
+
+# Fast local-only validation
+cf check --local
+
+# Check specific stack and show host compatibility
+cf check jellyfin
+```
+
+---
+
+### cf refresh
+
+Update local state from running stacks.
+
+```bash
+cf refresh [OPTIONS] [STACKS]...
+```
+
+**Options:**
+
+| Option | Description |
+|--------|-------------|
+| `--all, -a` | Refresh all stacks |
+| `--dry-run, -n` | Show what would change |
+| `--log-path, -l PATH` | Path to Dockerfarm TOML log |
+| `--config, -c PATH` | Path to config file |
+
+Without arguments, refreshes all stacks (same as `--all`). With stack names, refreshes only those stacks.
+
+**Examples:**
+
+```bash
+# Sync state with reality (all stacks)
+cf refresh
+
+# Preview changes
+cf refresh --dry-run
+
+# Refresh specific stacks only
+cf refresh plex sonarr
+```
+
+---
+
+### cf init-network
+
+Create Docker network on hosts with consistent settings.
+
+```bash
+cf init-network [OPTIONS] [HOSTS]...
+```
+
+**Options:**
+
+| Option | Description |
+|--------|-------------|
+| `--network, -n TEXT` | Network name (default: mynetwork) |
+| `--subnet, -s TEXT` | Network subnet (default: 172.20.0.0/16) |
+| `--gateway, -g TEXT` | Network gateway (default: 172.20.0.1) |
+| `--config, -c PATH` | Path to config file |
+
+**Examples:**
+
+```bash
+# Create on all hosts
+cf init-network
+
+# Create on specific hosts
+cf init-network nuc hp
+
+# Custom network settings
+cf init-network -n production -s 10.0.0.0/16 -g 10.0.0.1
+```
+
+---
+
+### cf traefik-file
+
+Generate Traefik file-provider config from compose labels.
+
+```bash
+cf traefik-file [OPTIONS] [STACKS]...
+```
+
+**Options:**
+
+| Option | Description |
+|--------|-------------|
+| `--all, -a` | Generate for all stacks |
+| `--output, -o PATH` | Output file (stdout if omitted) |
+| `--config, -c PATH` | Path to config file |
+
+**Examples:**
+
+```bash
+# Preview to stdout
+cf traefik-file --all
+
+# Write to file
+cf traefik-file --all -o /opt/traefik/dynamic.d/cf.yml
+
+# Specific stacks
+cf traefik-file plex jellyfin -o /opt/traefik/cf.yml
+```
+
+---
+
+### cf config
+
+Manage configuration files.
+
+```bash
+cf config COMMAND
+```
+
+**Subcommands:**
+
+| Command | Description |
+|---------|-------------|
+| `init` | Create new config with examples |
+| `init-env` | Generate .env file for Docker deployment |
+| `show` | Display config with highlighting |
+| `path` | Print config file path |
+| `validate` | Validate syntax and schema |
+| `edit` | Open in $EDITOR |
+| `symlink` | Create symlink from default location |
+
+**Options by subcommand:**
+
+| Subcommand | Options |
+|------------|---------|
+| `init` | `--path/-p PATH`, `--force/-f` |
+| `init-env` | `--path/-p PATH`, `--output/-o PATH`, `--force/-f` |
+| `show` | `--path/-p PATH`, `--raw/-r` |
+| `edit` | `--path/-p PATH` |
+| `path` | `--path/-p PATH` |
+| `validate` | `--path/-p PATH` |
+| `symlink` | `--force/-f` |
+
+**Examples:**
+
+```bash
+# Create config at default location
+cf config init
+
+# Create config at custom path
+cf config init --path /opt/compose-farm/config.yaml
+
+# Show config with syntax highlighting
+cf config show
+
+# Show raw config (for copy-paste)
+cf config show --raw
+
+# Validate config
+cf config validate
+
+# Edit config in $EDITOR
+cf config edit
+
+# Print config path
+cf config path
+
+# Create symlink to local config
+cf config symlink
+
+# Create symlink to specific file
+cf config symlink /opt/compose-farm/config.yaml
+
+# Generate .env file in current directory
+cf config init-env
+
+# Generate .env at specific path
+cf config init-env -o /opt/stacks/.env
+```
+
+---
+
+### cf ssh
+
+Manage SSH keys for passwordless authentication.
+
+```bash
+cf ssh COMMAND
+```
+
+**Subcommands:**
+
+| Command | Description |
+|---------|-------------|
+| `setup` | Generate key and copy to all hosts |
+| `status` | Show SSH key status and host connectivity |
+| `keygen` | Generate key without distributing |
+
+**Options for `cf ssh setup`:**
+
+| Option | Description |
+|--------|-------------|
+| `--config, -c PATH` | Path to config file |
+| `--force, -f` | Regenerate key even if it exists |
+
+**Options for `cf ssh status`:**
+
+| Option | Description |
+|--------|-------------|
+| `--config, -c PATH` | Path to config file |
+
+**Options for `cf ssh keygen`:**
+
+| Option | Description |
+|--------|-------------|
+| `--force, -f` | Regenerate key even if it exists |
+
+**Examples:**
+
+```bash
+# Set up SSH keys (generates and distributes)
+cf ssh setup
+
+# Check status and connectivity
+cf ssh status
+
+# Generate key only (don't distribute)
+cf ssh keygen
+```
+
+---
+
+## Server Commands
+
+### cf web
+
+Start the web UI server.
+
+```bash
+cf web [OPTIONS]
+```
+
+**Options:**
+
+| Option | Description |
+|--------|-------------|
+| `--host, -H TEXT` | Host to bind to (default: 0.0.0.0) |
+| `--port, -p INTEGER` | Port to listen on (default: 8000) |
+| `--reload, -r` | Enable auto-reload for development |
+
+**Note:** Requires web dependencies: `pip install compose-farm[web]`
+
+**Examples:**
+
+```bash
+# Start on default port
+cf web
+
+# Start on custom port
+cf web --port 3000
+
+# Development mode with auto-reload
+cf web --reload
+```
+
+---
+
+## Common Patterns
+
+### Daily Operations
+
+```bash
+# Morning: check status
+cf ps
+cf stats --live
+
+# Update a specific stack
+cf update plex
+
+# View logs
+cf logs -f plex
+```
+
+### Maintenance
+
+```bash
+# Update all stacks
+cf update --all
+
+# Refresh state after manual changes
+cf refresh
+```
+
+### Migration
+
+```bash
+# Preview what would change
+cf apply --dry-run
+
+# Move a stack: edit config, then
+cf up plex  # auto-migrates
+
+# Or reconcile everything
+cf apply
+```
+
+### Troubleshooting
+
+```bash
+# Validate config
+cf check --local
+cf check
+
+# Check specific stack
+cf check jellyfin
+
+# Sync state
+cf refresh --dry-run
+cf refresh
+```
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -0,0 +1,447 @@
+---
+icon: lucide/settings
+---
+
+# Configuration Reference
+
+Compose Farm uses a YAML configuration file to define hosts and stack assignments.
+
+## Config File Location
+
+Compose Farm looks for configuration in this order:
+
+1. `-c` / `--config` flag (if provided)
+2. `CF_CONFIG` environment variable
+3. `./compose-farm.yaml` (current directory)
+4. `$XDG_CONFIG_HOME/compose-farm/compose-farm.yaml` (defaults to `~/.config`)
+
+Use `-c` / `--config` to specify a custom path:
+
+```bash
+cf ps -c /path/to/config.yaml
+```
+
+Or set the environment variable:
+
+```bash
+export CF_CONFIG=/path/to/config.yaml
+```
+
+## Examples
+
+### Single host (local-only)
+
+```yaml
+# Required: directory containing compose files
+compose_dir: /opt/stacks
+
+# Define local host
+hosts:
+  local: localhost
+
+# Map stacks to the local host
+stacks:
+  plex: local
+  grafana: local
+  nextcloud: local
+```
+
+### Multi-host (full example)
+
+```yaml
+# Required: directory containing compose files (same path on all hosts)
+compose_dir: /opt/compose
+
+# Optional: auto-regenerate Traefik config
+traefik_file: /opt/traefik/dynamic.d/compose-farm.yml
+traefik_stack: traefik
+
+# Define Docker hosts
+hosts:
+  nuc:
+    address: 192.168.1.10
+    user: docker
+  hp:
+    address: 192.168.1.11
+    user: admin
+
+# Map stacks to hosts
+stacks:
+  # Single-host stacks
+  plex: nuc
+  grafana: nuc
+  nextcloud: hp
+
+  # Multi-host stacks
+  dozzle: all                    # Run on ALL hosts
+  node-exporter: [nuc, hp]       # Run on specific hosts
+```
+
+## Settings Reference
+
+### compose_dir (required)
+
+Directory containing your compose stack folders. Must be the same path on all hosts.
+
+```yaml
+compose_dir: /opt/compose
+```
+
+**Directory structure:**
+
+```
+/opt/compose/
+├── plex/
+│   ├── docker-compose.yml    # or compose.yaml
+│   └── .env                  # optional environment file
+├── grafana/
+│   └── docker-compose.yml
+└── ...
+```
+
+Supported compose file names (checked in order):
+- `compose.yaml`
+- `compose.yml`
+- `docker-compose.yml`
+- `docker-compose.yaml`
+
+### traefik_file
+
+Path to auto-generated Traefik file-provider config. When set, Compose Farm regenerates this file after `up`, `down`, and `update` commands.
+
+```yaml
+traefik_file: /opt/traefik/dynamic.d/compose-farm.yml
+```
+
+### traefik_stack
+
+Stack name running Traefik. Stacks on the same host are skipped in file-provider config (Traefik's docker provider handles them).
+
+```yaml
+traefik_stack: traefik
+```
+
+### glances_stack
+
+Stack name running [Glances](https://nicolargo.github.io/glances/) for host resource monitoring. When set, the CLI (`cf stats --containers`) and web UI display CPU, memory, and container stats for all hosts.
+
+```yaml
+glances_stack: glances
+```
+
+The Glances stack should run on all hosts and expose port 61208. See the README for full setup instructions.
+
+## Hosts Configuration
+
+### Basic Host
+
+```yaml
+hosts:
+  myserver:
+    address: 192.168.1.10
+```
+
+### With SSH User
+
+```yaml
+hosts:
+  myserver:
+    address: 192.168.1.10
+    user: docker
+```
+
+If `user` is omitted, the current user is used.
+
+### With Custom SSH Port
+
+```yaml
+hosts:
+  myserver:
+    address: 192.168.1.10
+    user: docker
+    port: 2222  # SSH port (default: 22)
+```
+
+### Localhost
+
+For stacks running on the same machine where you invoke Compose Farm:
+
+```yaml
+hosts:
+  local: localhost
+```
+
+No SSH is used for localhost stacks.
+
+### Multiple Hosts
+
+```yaml
+hosts:
+  nuc:
+    address: 192.168.1.10
+    user: docker
+  hp:
+    address: 192.168.1.11
+    user: admin
+  truenas:
+    address: 192.168.1.100
+  local: localhost
+```
+
+## Stacks Configuration
+
+### Single-Host Stack
+
+```yaml
+stacks:
+  plex: nuc
+  grafana: nuc
+  nextcloud: hp
+```
+
+### Multi-Host Stack
+
+For stacks that need to run on every host (e.g., log shippers, monitoring agents):
+
+```yaml
+stacks:
+  # Run on ALL configured hosts
+  dozzle: all
+  promtail: all
+
+  # Run on specific hosts
+  node-exporter: [nuc, hp, truenas]
+```
+
+**Common multi-host stacks:**
+- **Dozzle** - Docker log viewer (needs local socket)
+- **Promtail/Alloy** - Log shipping (needs local socket)
+- **node-exporter** - Host metrics (needs /proc, /sys)
+- **AutoKuma** - Uptime Kuma monitors (needs local socket)
+
+### Stack Names
+
+Stack names must match directory names in `compose_dir`:
+
+```yaml
+compose_dir: /opt/compose
+stacks:
+  plex: nuc      # expects /opt/compose/plex/docker-compose.yml
+  my-app: hp     # expects /opt/compose/my-app/docker-compose.yml
+```
+
+## State File
+
+Compose Farm tracks deployment state in `compose-farm-state.yaml`, stored alongside the config file.
+
+For example, if your config is at `~/.config/compose-farm/compose-farm.yaml`, the state file will be at `~/.config/compose-farm/compose-farm-state.yaml`.
+
+```yaml
+deployed:
+  plex: nuc
+  grafana: nuc
+```
+
+This file records which stacks are deployed and on which host.
+
+**Don't edit manually.** Use `cf refresh` to sync state with reality.
+
+## Environment Variables
+
+### In Compose Files
+
+Your compose files can use `.env` files as usual:
+
+```
+/opt/compose/plex/
+├── docker-compose.yml
+└── .env
+```
+
+Compose Farm runs `docker compose` which handles `.env` automatically.
+
+### In Traefik Labels
+
+When generating Traefik config, Compose Farm resolves `${VAR}` and `${VAR:-default}` from:
+
+1. The stack's `.env` file
+2. Current environment
+
+### Compose Farm Environment Variables
+
+These environment variables configure Compose Farm itself:
+
+| Variable | Description |
+|----------|-------------|
+| `CF_CONFIG` | Path to config file |
+| `CF_WEB_STACK` | Web UI stack name (Docker only, enables self-update detection and local host inference) |
+
+**Docker deployment variables** (used in docker-compose.yml):
+
+| Variable | Description | Generated by |
+|----------|-------------|--------------|
+| `CF_COMPOSE_DIR` | Compose files directory | `cf config init-env` |
+| `CF_UID` / `CF_GID` | User/group ID for containers | `cf config init-env` |
+| `CF_HOME` / `CF_USER` | Home directory and username | `cf config init-env` |
+| `CF_SSH_DIR` | SSH keys volume mount | Manual |
+| `CF_XDG_CONFIG` | Config backup volume mount | Manual |
+
+## Config Commands
+
+### Initialize Config
+
+```bash
+cf config init
+```
+
+Creates a new config file with documented examples.
+
+### Validate Config
+
+```bash
+cf config validate
+```
+
+Checks syntax and schema.
+
+### Show Config
+
+```bash
+cf config show
+```
+
+Displays current config with syntax highlighting.
+
+### Edit Config
+
+```bash
+cf config edit
+```
+
+Opens config in `$EDITOR`.
+
+### Show Config Path
+
+```bash
+cf config path
+```
+
+Prints the config file location (useful for scripting).
+
+### Create Symlink
+
+```bash
+cf config symlink                          # Link to ./compose-farm.yaml
+cf config symlink /path/to/my-config.yaml  # Link to specific file
+```
+
+Creates a symlink from the default location (`~/.config/compose-farm/compose-farm.yaml`) to your config file. Use `--force` to overwrite an existing symlink.
+
+## Validation
+
+### Local Validation
+
+Fast validation without SSH:
+
+```bash
+cf check --local
+```
+
+Checks:
+- Config syntax
+- Stack-to-host mappings
+- Compose file existence
+
+### Full Validation
+
+```bash
+cf check
+```
+
+Additional SSH-based checks:
+- Host connectivity
+- Mount point existence
+- Docker network existence
+- Traefik label validation
+
+### Stack-Specific Check
+
+```bash
+cf check jellyfin
+```
+
+Shows which hosts can run the stack (have required mounts/networks).
+
+## Example Configurations
+
+### Minimal
+
+```yaml
+compose_dir: /opt/compose
+
+hosts:
+  server: 192.168.1.10
+
+stacks:
+  myapp: server
+```
+
+### Home Lab
+
+```yaml
+compose_dir: /opt/compose
+
+hosts:
+  nuc:
+    address: 192.168.1.10
+    user: docker
+  nas:
+    address: 192.168.1.100
+    user: admin
+
+stacks:
+  # Media
+  plex: nuc
+  jellyfin: nuc
+  immich: nuc
+
+  # Infrastructure
+  traefik: nuc
+  portainer: nuc
+
+  # Monitoring (on all hosts)
+  dozzle: all
+```
+
+### Production
+
+```yaml
+compose_dir: /opt/compose
+traefik_file: /opt/traefik/dynamic.d/cf.yml
+traefik_stack: traefik
+
+hosts:
+  web-1:
+    address: 10.0.1.10
+    user: deploy
+  web-2:
+    address: 10.0.1.11
+    user: deploy
+  db:
+    address: 10.0.1.20
+    user: deploy
+
+stacks:
+  # Load balanced
+  api: [web-1, web-2]
+
+  # Single instance
+  postgres: db
+  redis: db
+
+  # Infrastructure
+  traefik: web-1
+
+  # Monitoring
+  promtail: all
+```
--- a/docs/demos/README.md
+++ b/docs/demos/README.md
@@ -0,0 +1,17 @@
+# Demo Recordings
+
+Demo recording infrastructure for Compose Farm documentation.
+
+## Structure
+
+```
+docs/demos/
+├── cli/        # VHS-based CLI terminal recordings
+└── web/        # Playwright-based web UI recordings
+```
+
+## Output
+
+All recordings output to `docs/assets/` as WebM (primary) and GIF (fallback).
+
+See subdirectory READMEs for usage.
--- a/docs/demos/cli/README.md
+++ b/docs/demos/cli/README.md
@@ -0,0 +1,33 @@
+# CLI Demo Recordings
+
+VHS-based terminal demo recordings for Compose Farm CLI.
+
+## Requirements
+
+- [VHS](https://github.com/charmbracelet/vhs): `go install github.com/charmbracelet/vhs@latest`
+
+## Usage
+
+```bash
+# Record all demos
+python docs/demos/cli/record.py
+
+# Record specific demos
+python docs/demos/cli/record.py quickstart migration
+```
+
+## Demos
+
+| Tape | Description |
+|------|-------------|
+| `install.tape` | Installing with `uv tool install` |
+| `quickstart.tape` | `cf ps`, `cf up`, `cf logs` |
+| `logs.tape` | Viewing logs |
+| `compose.tape` | `cf compose` passthrough (--help, images, exec) |
+| `update.tape` | `cf update` |
+| `migration.tape` | Service migration |
+| `apply.tape` | `cf apply` |
+
+## Output
+
+GIF and WebM files saved to `docs/assets/`.
--- a/docs/demos/cli/apply.tape
+++ b/docs/demos/cli/apply.tape
@@ -0,0 +1,39 @@
+# Apply Demo
+# Shows cf apply previewing and reconciling state
+
+Output docs/assets/apply.gif
+Output docs/assets/apply.webm
+
+Set Shell "bash"
+Set FontSize 14
+Set Width 900
+Set Height 600
+Set Theme "Catppuccin Mocha"
+Set TypingSpeed 50ms
+
+Type "# Preview what would change"
+Enter
+Sleep 500ms
+
+Type "cf apply --dry-run"
+Enter
+Wait
+
+Type "# Check current status"
+Enter
+Sleep 500ms
+
+Type "cf stats"
+Enter
+Wait+Screen /Summary/
+Sleep 2s
+
+Type "# Apply the changes"
+Enter
+Sleep 500ms
+
+Type "cf apply"
+Enter
+# Wait for shell prompt (command complete)
+Wait
+Sleep 4s
--- a/docs/demos/cli/compose.tape
+++ b/docs/demos/cli/compose.tape
@@ -0,0 +1,50 @@
+# Compose Demo
+# Shows that cf compose passes through ANY docker compose command
+
+Output docs/assets/compose.gif
+Output docs/assets/compose.webm
+
+Set Shell "bash"
+Set FontSize 14
+Set Width 900
+Set Height 550
+Set Theme "Catppuccin Mocha"
+Set TypingSpeed 50ms
+
+Type "# cf compose runs ANY docker compose command on the right host"
+Enter
+Sleep 500ms
+
+Type "# See ALL available compose commands"
+Enter
+Sleep 500ms
+
+Type "cf compose immich --help"
+Enter
+Sleep 4s
+
+Type "# Show images"
+Enter
+Sleep 500ms
+
+Type "cf compose immich images"
+Enter
+Wait+Screen /immich/
+Sleep 2s
+
+Type "# Open shell in a container"
+Enter
+Sleep 500ms
+
+Type "cf compose immich exec immich-machine-learning sh"
+Enter
+Wait+Screen /#/
+Sleep 1s
+
+Type "python3 --version"
+Enter
+Sleep 1s
+
+Type "exit"
+Enter
+Sleep 500ms
--- a/docs/demos/cli/install.tape
+++ b/docs/demos/cli/install.tape
@@ -0,0 +1,42 @@
+# Installation Demo
+# Shows installing compose-farm with uv
+
+Output docs/assets/install.gif
+Output docs/assets/install.webm
+
+Set Shell "bash"
+Set FontSize 14
+Set Width 900
+Set Height 600
+Set Theme "Catppuccin Mocha"
+Set TypingSpeed 50ms
+Env FORCE_COLOR "1"
+
+Hide
+Type "export PATH=$HOME/.local/bin:$PATH && uv tool uninstall compose-farm 2>/dev/null; clear"
+Enter
+Show
+Type "# Install with uv (recommended)"
+Enter
+Sleep 500ms
+
+Type "uv tool install compose-farm"
+Enter
+Wait+Screen /Installed|already installed/
+
+Type "# Verify installation"
+Enter
+Sleep 500ms
+
+Type "cf --version"
+Enter
+Wait+Screen /compose-farm/
+Sleep 1s
+
+Type "cf --help | less"
+Enter
+Sleep 2s
+PageDown
+Sleep 2s
+Type "q"
+Sleep 2s
--- a/docs/demos/cli/logs.tape
+++ b/docs/demos/cli/logs.tape
@@ -0,0 +1,21 @@
+# Logs Demo
+# Shows viewing stack logs
+
+Output docs/assets/logs.gif
+Output docs/assets/logs.webm
+
+Set Shell "bash"
+Set FontSize 14
+Set Width 900
+Set Height 550
+Set Theme "Catppuccin Mocha"
+Set TypingSpeed 50ms
+
+Type "# View recent logs"
+Enter
+Sleep 500ms
+
+Type "cf logs immich --tail 20"
+Enter
+Wait+Screen /immich/
+Sleep 2s
--- a/docs/demos/cli/migration.tape
+++ b/docs/demos/cli/migration.tape
@@ -0,0 +1,71 @@
+# Migration Demo
+# Shows automatic stack migration when host changes
+
+Output docs/assets/migration.gif
+Output docs/assets/migration.webm
+
+Set Shell "bash"
+Set FontSize 14
+Set Width 1000
+Set Height 600
+Set Theme "Catppuccin Mocha"
+Set TypingSpeed 50ms
+
+Type "# Current status: audiobookshelf on 'nas'"
+Enter
+Sleep 500ms
+
+Type "cf ps audiobookshelf"
+Enter
+Wait+Screen /PORTS/
+
+Type "# Edit config to move it to 'anton'"
+Enter
+Sleep 1s
+
+Type "nvim /opt/stacks/compose-farm.yaml"
+Enter
+Wait+Screen /stacks:/
+
+# Search for audiobookshelf
+Type "/audiobookshelf"
+Enter
+Sleep 1s
+
+# Move to the host value (nas) and change it
+Type "f:"
+Sleep 500ms
+Type "w"
+Sleep 500ms
+Type "ciw"
+Sleep 500ms
+Type "anton"
+Escape
+Sleep 1s
+
+# Save and quit
+Type ":wq"
+Enter
+Sleep 1s
+
+Type "# Run up - automatically migrates!"
+Enter
+Sleep 500ms
+
+Type "cf up audiobookshelf"
+Enter
+# Wait for migration phases: first the stop on old host
+Wait+Screen /Migrating|down/
+# Then wait for start on new host
+Wait+Screen /Starting|up/
+# Finally wait for completion
+Wait
+
+Type "# Verify: audiobookshelf now on 'anton'"
+Enter
+Sleep 500ms
+
+Type "cf ps audiobookshelf"
+Enter
+Wait+Screen /PORTS/
+Sleep 3s
--- a/docs/demos/cli/quickstart.tape
+++ b/docs/demos/cli/quickstart.tape
@@ -0,0 +1,91 @@
+# Quick Start Demo
+# Shows basic cf commands
+
+Output docs/assets/quickstart.gif
+Output docs/assets/quickstart.webm
+
+Set Shell "bash"
+Set FontSize 14
+Set Width 900
+Set Height 600
+Set Theme "Catppuccin Mocha"
+Set FontFamily "FiraCode Nerd Font"
+Set TypingSpeed 50ms
+Env BAT_PAGING "always"
+
+Type "# Config is just: stack  host"
+Enter
+Sleep 500ms
+
+Type "# First, define your hosts..."
+Enter
+Sleep 500ms
+
+Type "bat -r 1:16 compose-farm.yaml"
+Enter
+Sleep 3s
+Type "q"
+Sleep 500ms
+
+Type "# Then map each stack to a host"
+Enter
+Sleep 500ms
+
+Type "bat -r 17:35 compose-farm.yaml"
+Enter
+Sleep 3s
+Type "q"
+Sleep 500ms
+
+Type "# Check stack status"
+Enter
+Sleep 500ms
+
+Type "cf ps immich"
+Enter
+Wait+Screen /PORTS/
+
+Type "# Start a stack"
+Enter
+Sleep 500ms
+
+Type "cf up immich"
+Enter
+Wait
+
+Type "# View logs"
+Enter
+Sleep 500ms
+
+Type "cf logs immich --tail 5"
+Enter
+Wait+Screen /immich/
+Sleep 2s
+
+Type "#  The magic: move between hosts (nas  anton)"
+Enter
+Sleep 500ms
+
+Type "# Change host in config (using sed)"
+Enter
+Sleep 500ms
+
+Type "sed -i 's/audiobookshelf: nas/audiobookshelf: anton/' compose-farm.yaml"
+Enter
+Sleep 500ms
+
+Type "# Apply changes - auto-migrates!"
+Enter
+Sleep 500ms
+
+Type "cf apply"
+Enter
+Sleep 15s
+
+Type "# Verify: now on anton"
+Enter
+Sleep 500ms
+
+Type "cf ps audiobookshelf"
+Enter
+Sleep 5s
--- a/docs/demos/cli/record.py
+++ b/docs/demos/cli/record.py
@@ -0,0 +1,134 @@
+#!/usr/bin/env python3
+"""Record CLI demos using VHS."""
+
+import shutil
+import subprocess
+import sys
+from pathlib import Path
+
+from rich.console import Console
+
+from compose_farm.config import load_config
+from compose_farm.state import load_state
+
+console = Console()
+SCRIPT_DIR = Path(__file__).parent
+STACKS_DIR = Path("/opt/stacks")
+CONFIG_FILE = STACKS_DIR / "compose-farm.yaml"
+OUTPUT_DIR = SCRIPT_DIR.parent.parent / "assets"
+
+DEMOS = ["install", "quickstart", "logs", "compose", "update", "migration", "apply"]
+
+
+def _run(cmd: list[str], **kw) -> bool:
+    return subprocess.run(cmd, check=False, **kw).returncode == 0
+
+
+def _set_config(host: str) -> None:
+    """Set audiobookshelf host in config file."""
+    _run(["sed", "-i", f"s/audiobookshelf: .*/audiobookshelf: {host}/", str(CONFIG_FILE)])
+
+
+def _get_hosts() -> tuple[str | None, str | None]:
+    """Return (config_host, state_host) for audiobookshelf."""
+    config = load_config()
+    state = load_state(config)
+    return config.stacks.get("audiobookshelf"), state.get("audiobookshelf")
+
+
+def _setup_state(demo: str) -> bool:
+    """Set up required state for demo. Returns False on failure."""
+    if demo not in ("migration", "apply"):
+        return True
+
+    config_host, state_host = _get_hosts()
+
+    if demo == "migration":
+        # Migration needs audiobookshelf on nas in BOTH config and state
+        if config_host != "nas":
+            console.print("[yellow]Setting up: config → nas[/yellow]")
+            _set_config("nas")
+        if state_host != "nas":
+            console.print("[yellow]Setting up: state → nas[/yellow]")
+            if not _run(["cf", "apply"], cwd=STACKS_DIR):
+                return False
+
+    elif demo == "apply":
+        # Apply needs config=nas, state=anton (so there's something to apply)
+        if config_host != "nas":
+            console.print("[yellow]Setting up: config → nas[/yellow]")
+            _set_config("nas")
+        if state_host == "nas":
+            console.print("[yellow]Setting up: state → anton[/yellow]")
+            _set_config("anton")
+            if not _run(["cf", "apply"], cwd=STACKS_DIR):
+                return False
+            _set_config("nas")
+
+    return True
+
+
+def _record(name: str, index: int, total: int) -> bool:
+    """Record a single demo."""
+    console.print(f"[cyan][{index}/{total}][/cyan] [green]Recording:[/green] {name}")
+    if _run(["vhs", str(SCRIPT_DIR / f"{name}.tape")], cwd=STACKS_DIR):
+        console.print("[green]  ✓ Done[/green]")
+        return True
+    console.print("[red]  ✗ Failed[/red]")
+    return False
+
+
+def _reset_after(demo: str, next_demo: str | None) -> None:
+    """Reset state after demos that modify audiobookshelf."""
+    if demo not in ("quickstart", "migration"):
+        return
+    _set_config("nas")
+    if next_demo != "apply":  # Let apply demo show the migration
+        _run(["cf", "apply"], cwd=STACKS_DIR)
+
+
+def _restore_config(original: str) -> None:
+    """Restore original config and sync state."""
+    console.print("[yellow]Restoring original config...[/yellow]")
+    CONFIG_FILE.write_text(original)
+    _run(["cf", "apply"], cwd=STACKS_DIR)
+
+
+def _main() -> int:
+    if not shutil.which("vhs"):
+        console.print("[red]VHS not found. Install: brew install vhs[/red]")
+        return 1
+
+    if not _run(["git", "-C", str(STACKS_DIR), "diff", "--quiet", "compose-farm.yaml"]):
+        console.print("[red]compose-farm.yaml has uncommitted changes[/red]")
+        return 1
+
+    demos = [d for d in sys.argv[1:] if d in DEMOS] or DEMOS
+    if sys.argv[1:] and not demos:
+        console.print(f"[red]Unknown demo. Available: {', '.join(DEMOS)}[/red]")
+        return 1
+
+    # Save original config to restore after recording
+    original_config = CONFIG_FILE.read_text()
+
+    try:
+        for i, demo in enumerate(demos, 1):
+            if not _setup_state(demo):
+                return 1
+            if not _record(demo, i, len(demos)):
+                return 1
+            _reset_after(demo, demos[i] if i < len(demos) else None)
+    finally:
+        _restore_config(original_config)
+
+    # Move outputs
+    OUTPUT_DIR.mkdir(exist_ok=True)
+    for f in (STACKS_DIR / "docs/assets").glob("*.[gw]*"):
+        shutil.move(str(f), str(OUTPUT_DIR / f.name))
+
+    console.print(f"\n[green]Done![/green] Saved to {OUTPUT_DIR}")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(_main())
--- a/docs/demos/cli/update.tape
+++ b/docs/demos/cli/update.tape
@@ -0,0 +1,32 @@
+# Update Demo
+# Shows updating stacks (only recreates containers if images changed)
+
+Output docs/assets/update.gif
+Output docs/assets/update.webm
+
+Set Shell "bash"
+Set FontSize 14
+Set Width 900
+Set Height 500
+Set Theme "Catppuccin Mocha"
+Set TypingSpeed 50ms
+
+Type "# Update a single stack"
+Enter
+Sleep 500ms
+
+Type "cf update grocy"
+Enter
+# Wait for command to complete (chain waits for longer timeout)
+Wait+Screen /pull/
+Wait+Screen /grocy/
+Wait@60s
+
+Type "# Check current status"
+Enter
+Sleep 500ms
+
+Type "cf ps grocy"
+Enter
+Wait+Screen /PORTS/
+Sleep 1s
--- a/docs/demos/web/README.md
+++ b/docs/demos/web/README.md
@@ -0,0 +1,45 @@
+# Web UI Demo Recordings
+
+Playwright-based demo recording for Compose Farm web UI.
+
+## Requirements
+
+- Chromium: `playwright install chromium`
+- ffmpeg: `apt install ffmpeg` or `brew install ffmpeg`
+
+## Usage
+
+```bash
+# Record all demos
+python docs/demos/web/record.py
+
+# Record specific demo
+python docs/demos/web/record.py navigation
+```
+
+## Demos
+
+| Demo | Description |
+|------|-------------|
+| `navigation` | Command palette fuzzy search and navigation |
+| `stack` | Stack restart/logs via command palette |
+| `themes` | Theme switching with arrow key preview |
+| `workflow` | Full workflow: filter, navigate, logs, themes |
+| `console` | Console terminal running cf commands |
+| `shell` | Container shell exec with top |
+
+## Output
+
+WebM and GIF files saved to `docs/assets/web-{demo}.{webm,gif}`.
+
+## Files
+
+- `record.py` - Orchestration script
+- `conftest.py` - Playwright fixtures, helper functions
+- `demo_*.py` - Individual demo scripts
+
+## Notes
+
+- Uses real config at `/opt/stacks/compose-farm.yaml`
+- Adjust `pause(page, ms)` calls to control timing
+- Viewport: 1280x720
--- a/docs/demos/web/init.py
+++ b/docs/demos/web/init.py
@@ -0,0 +1 @@
+"""Web UI demo recording scripts."""
--- a/docs/demos/web/conftest.py
+++ b/docs/demos/web/conftest.py
@@ -0,0 +1,302 @@
+"""Shared fixtures for web UI demo recordings.
+
+Based on tests/web/test_htmx_browser.py patterns for consistency.
+"""
+
+from __future__ import annotations
+
+import os
+import re
+import shutil
+import socket
+import threading
+import time
+import urllib.request
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
+from unittest.mock import patch
+
+import pytest
+import uvicorn
+
+from compose_farm.config import Config as CFConfig
+from compose_farm.config import load_config
+from compose_farm.executor import (
+    get_container_compose_labels as _original_get_compose_labels,
+)
+from compose_farm.glances import ContainerStats
+from compose_farm.glances import fetch_container_stats as _original_fetch_container_stats
+from compose_farm.state import load_state as _original_load_state
+from compose_farm.web.cdn import CDN_ASSETS, ensure_vendor_cache
+
+# NOTE: Do NOT import create_app here - it must be imported AFTER patches are applied
+# to ensure the patched get_config is used by all route modules
+
+if TYPE_CHECKING:
+    from collections.abc import Generator
+
+    from playwright.sync_api import BrowserContext, Page, Route
+
+# Substrings to exclude from demo recordings (case-insensitive)
+DEMO_EXCLUDE_PATTERNS = {"arr", "vpn", "tash"}
+
+
+def _should_exclude(name: str) -> bool:
+    """Check if a stack/container name should be excluded from demo."""
+    name_lower = name.lower()
+    return any(pattern in name_lower for pattern in DEMO_EXCLUDE_PATTERNS)
+
+
+def _get_filtered_config() -> CFConfig:
+    """Load config but filter out excluded stacks."""
+    config = load_config()
+    filtered_stacks = {
+        name: host for name, host in config.stacks.items() if not _should_exclude(name)
+    }
+    return CFConfig(
+        compose_dir=config.compose_dir,
+        hosts=config.hosts,
+        stacks=filtered_stacks,
+        traefik_file=config.traefik_file,
+        traefik_stack=config.traefik_stack,
+        glances_stack=config.glances_stack,
+        config_path=config.config_path,
+    )
+
+
+def _get_filtered_state(config: CFConfig) -> dict[str, str | list[str]]:
+    """Load state but filter out excluded stacks."""
+    state = _original_load_state(config)
+    return {name: host for name, host in state.items() if not _should_exclude(name)}
+
+
+async def _filtered_fetch_container_stats(
+    host_name: str,
+    host_address: str,
+    port: int = 61208,
+    request_timeout: float = 10.0,
+) -> tuple[list[ContainerStats] | None, str | None]:
+    """Fetch container stats but filter out excluded containers."""
+    containers, error = await _original_fetch_container_stats(
+        host_name, host_address, port, request_timeout
+    )
+    if containers:
+        # Filter by container name (stack is empty at this point)
+        containers = [c for c in containers if not _should_exclude(c.name)]
+    return containers, error
+
+
+async def _filtered_get_compose_labels(
+    config: CFConfig,
+    host_name: str,
+) -> dict[str, tuple[str, str]]:
+    """Get compose labels but filter out excluded stacks."""
+    labels = await _original_get_compose_labels(config, host_name)
+    # Filter out containers whose stack (project) name should be excluded
+    return {
+        name: (stack, service)
+        for name, (stack, service) in labels.items()
+        if not _should_exclude(stack)
+    }
+
+
+@pytest.fixture(scope="session")
+def vendor_cache(request: pytest.FixtureRequest) -> Path:
+    """Download CDN assets once and cache to disk for faster recordings."""
+    cache_dir = Path(str(request.config.rootdir)) / ".pytest_cache" / "vendor"
+    return ensure_vendor_cache(cache_dir)
+
+
+@pytest.fixture(scope="session")
+def browser_type_launch_args() -> dict[str, str]:
+    """Configure Playwright to use system Chromium if available."""
+    for name in ["chromium", "chromium-browser", "google-chrome", "chrome"]:
+        path = shutil.which(name)
+        if path:
+            return {"executable_path": path}
+    return {}
+
+
+# Path to real compose-farm config
+REAL_CONFIG_PATH = Path("/opt/stacks/compose-farm.yaml")
+
+
+@pytest.fixture(scope="module")
+def server_url() -> Generator[str, None, None]:
+    """Start demo server using real config (with filtered stacks) and return URL."""
+    os.environ["CF_CONFIG"] = str(REAL_CONFIG_PATH)
+
+    # Patch at source module level so all callers get filtered versions
+    patches = [
+        # Patch load_config at source - get_config() calls this internally
+        patch("compose_farm.config.load_config", _get_filtered_config),
+        # Patch load_state at source and where imported
+        patch("compose_farm.state.load_state", _get_filtered_state),
+        patch("compose_farm.web.routes.pages.load_state", _get_filtered_state),
+        # Patch container fetch to filter out excluded containers (Live Stats page)
+        patch("compose_farm.glances.fetch_container_stats", _filtered_fetch_container_stats),
+        # Patch compose labels to filter out excluded stacks
+        patch("compose_farm.executor.get_container_compose_labels", _filtered_get_compose_labels),
+    ]
+
+    for p in patches:
+        p.start()
+
+    # Import create_app AFTER patches are started so route modules see patched get_config
+    from compose_farm.web.app import create_app  # noqa: PLC0415
+
+    with socket.socket() as s:
+        s.bind(("127.0.0.1", 0))
+        port = s.getsockname()[1]
+
+    app = create_app()
+    uvicorn_config = uvicorn.Config(app, host="127.0.0.1", port=port, log_level="error")
+    server = uvicorn.Server(uvicorn_config)
+
+    thread = threading.Thread(target=server.run, daemon=True)
+    thread.start()
+
+    url = f"http://127.0.0.1:{port}"
+    server_ready = False
+    for _ in range(50):
+        try:
+            urllib.request.urlopen(url, timeout=0.5)  # noqa: S310
+            server_ready = True
+            break
+        except Exception:
+            time.sleep(0.1)
+
+    if not server_ready:
+        msg = f"Demo server failed to start on {url}"
+        raise RuntimeError(msg)
+
+    yield url
+
+    server.should_exit = True
+    thread.join(timeout=2)
+    os.environ.pop("CF_CONFIG", None)
+
+    for p in patches:
+        p.stop()
+
+
+@pytest.fixture(scope="module")
+def recording_output_dir(tmp_path_factory: pytest.TempPathFactory) -> Path:
+    """Directory for video recordings."""
+    return Path(tmp_path_factory.mktemp("recordings"))
+
+
+@pytest.fixture
+def recording_context(
+    browser: Any,  # pytest-playwright's browser fixture
+    vendor_cache: Path,
+    recording_output_dir: Path,
+) -> Generator[BrowserContext, None, None]:
+    """Browser context with video recording enabled."""
+    context = browser.new_context(
+        viewport={"width": 1280, "height": 720},
+        record_video_dir=str(recording_output_dir),
+        record_video_size={"width": 1280, "height": 720},
+    )
+
+    # Set up CDN interception
+    cache = {url: (vendor_cache / f, ct) for url, (f, ct) in CDN_ASSETS.items()}
+
+    def handle_cdn(route: Route) -> None:
+        url = route.request.url
+        for url_prefix, (filepath, content_type) in cache.items():
+            if url.startswith(url_prefix):
+                route.fulfill(status=200, content_type=content_type, body=filepath.read_bytes())
+                return
+        print(f"UNCACHED CDN request: {url}")
+        route.abort("failed")
+
+    context.route(re.compile(r"https://(cdn\.jsdelivr\.net|unpkg\.com)/.*"), handle_cdn)
+
+    yield context
+    context.close()
+
+
+@pytest.fixture
+def recording_page(recording_context: BrowserContext) -> Generator[Page, None, None]:
+    """Page with recording and slow motion enabled."""
+    page = recording_context.new_page()
+    yield page
+    page.close()
+
+
+@pytest.fixture
+def wide_recording_context(
+    browser: Any,  # pytest-playwright's browser fixture
+    recording_output_dir: Path,
+) -> Generator[BrowserContext, None, None]:
+    """Browser context with wider viewport for demos needing more horizontal space.
+
+    NOTE: This fixture does NOT use CDN interception (unlike recording_context).
+    CDN interception was causing inline scripts from containers.html to be
+    removed from the DOM, likely due to Tailwind's browser plugin behavior.
+    """
+    context = browser.new_context(
+        viewport={"width": 1920, "height": 1080},
+        record_video_dir=str(recording_output_dir),
+        record_video_size={"width": 1920, "height": 1080},
+    )
+
+    yield context
+    context.close()
+
+
+@pytest.fixture
+def wide_recording_page(wide_recording_context: BrowserContext) -> Generator[Page, None, None]:
+    """Page with wider viewport for demos needing more horizontal space."""
+    page = wide_recording_context.new_page()
+    yield page
+    page.close()
+
+
+# Demo helper functions
+
+
+def pause(page: Page, ms: int = 500) -> None:
+    """Pause for visibility in recording."""
+    page.wait_for_timeout(ms)
+
+
+def slow_type(page: Page, selector: str, text: str, delay: int = 100) -> None:
+    """Type with visible delay between keystrokes."""
+    page.type(selector, text, delay=delay)
+
+
+def open_command_palette(page: Page) -> None:
+    """Open command palette with Ctrl+K."""
+    page.keyboard.press("Control+k")
+    page.wait_for_selector("#cmd-palette[open]", timeout=2000)
+    pause(page, 300)
+
+
+def close_command_palette(page: Page) -> None:
+    """Close command palette with Escape."""
+    page.keyboard.press("Escape")
+    page.wait_for_selector("#cmd-palette:not([open])", timeout=2000)
+    pause(page, 200)
+
+
+def wait_for_sidebar(page: Page) -> None:
+    """Wait for sidebar to load with stacks."""
+    page.wait_for_selector("#sidebar-stacks", timeout=5000)
+    pause(page, 300)
+
+
+def navigate_to_stack(page: Page, stack: str) -> None:
+    """Navigate to a stack page via sidebar click."""
+    page.locator("#sidebar-stacks a", has_text=stack).click()
+    page.wait_for_url(f"**/stack/{stack}", timeout=5000)
+    pause(page, 500)
+
+
+def select_command(page: Page, command: str) -> None:
+    """Filter and select a command from the palette."""
+    page.locator("#cmd-input").fill(command)
+    pause(page, 300)
+    page.keyboard.press("Enter")
+    pause(page, 200)
--- a/docs/demos/web/demo_console.py
+++ b/docs/demos/web/demo_console.py
@@ -0,0 +1,77 @@
+"""Demo: Console terminal.
+
+Records a ~30 second demo showing:
+- Navigating to Console page
+- Running cf commands in the terminal
+- Showing the Compose Farm config in Monaco editor
+
+Run: pytest docs/demos/web/demo_console.py -v --no-cov
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import pytest
+from conftest import (
+    pause,
+    slow_type,
+    wait_for_sidebar,
+)
+
+if TYPE_CHECKING:
+    from playwright.sync_api import Page
+
+
+@pytest.mark.browser  # type: ignore[misc]
+def test_demo_console(recording_page: Page, server_url: str) -> None:
+    """Record console terminal demo."""
+    page = recording_page
+
+    # Start on dashboard
+    page.goto(server_url)
+    wait_for_sidebar(page)
+    pause(page, 800)
+
+    # Navigate to Console page via sidebar menu
+    page.locator(".menu a", has_text="Console").click()
+    page.wait_for_url("**/console", timeout=5000)
+    pause(page, 1000)
+
+    # Wait for terminal to be ready (auto-connects)
+    page.wait_for_selector("#console-terminal .xterm", timeout=10000)
+    pause(page, 1500)
+
+    # Run fastfetch first
+    slow_type(page, "#console-terminal .xterm-helper-textarea", "fastfetch", delay=80)
+    pause(page, 300)
+    page.keyboard.press("Enter")
+    pause(page, 2500)  # Wait for output
+
+    # Type cf stats command
+    slow_type(page, "#console-terminal .xterm-helper-textarea", "cf stats", delay=80)
+    pause(page, 300)
+    page.keyboard.press("Enter")
+    pause(page, 3000)  # Wait for output
+
+    # Type cf ps command
+    slow_type(page, "#console-terminal .xterm-helper-textarea", "cf ps grocy", delay=80)
+    pause(page, 300)
+    page.keyboard.press("Enter")
+    pause(page, 2500)  # Wait for output
+
+    # Smoothly scroll down to show the Editor section with Compose Farm config
+    page.evaluate("""
+        const editor = document.getElementById('console-editor');
+        if (editor) {
+            editor.scrollIntoView({ behavior: 'smooth', block: 'center' });
+        }
+    """)
+    pause(page, 1200)  # Wait for smooth scroll animation
+
+    # Wait for Monaco editor to load with config content
+    page.wait_for_selector("#console-editor .monaco-editor", timeout=10000)
+    pause(page, 2500)  # Let viewer see the Compose Farm config file
+
+    # Final pause
+    pause(page, 800)
--- a/docs/demos/web/demo_live_stats.py
+++ b/docs/demos/web/demo_live_stats.py
@@ -0,0 +1,85 @@
+"""Demo: Live Stats page.
+
+Records a ~20 second demo showing:
+- Navigating to Live Stats via command palette
+- Container table with real-time stats
+- Filtering containers
+- Sorting by different columns
+- Auto-refresh countdown
+
+Run: pytest docs/demos/web/demo_live_stats.py -v --no-cov
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import pytest
+from conftest import (
+    open_command_palette,
+    pause,
+    slow_type,
+    wait_for_sidebar,
+)
+
+if TYPE_CHECKING:
+    from playwright.sync_api import Page
+
+
+@pytest.mark.browser  # type: ignore[misc]
+def test_demo_live_stats(wide_recording_page: Page, server_url: str) -> None:
+    """Record Live Stats page demo."""
+    page = wide_recording_page
+
+    # Start on dashboard
+    page.goto(server_url)
+    wait_for_sidebar(page)
+    pause(page, 1000)
+
+    # Navigate to Live Stats via command palette
+    open_command_palette(page)
+    pause(page, 400)
+    slow_type(page, "#cmd-input", "live", delay=100)
+    pause(page, 500)
+    page.keyboard.press("Enter")
+    page.wait_for_url("**/live-stats", timeout=5000)
+
+    # Wait for containers to load (may take ~10s on first load due to SSH)
+    page.wait_for_selector("#container-rows tr:not(:has(.loading))", timeout=30000)
+    pause(page, 2000)  # Let viewer see the full table with timer
+
+    # Demonstrate filtering
+    slow_type(page, "#filter-input", "grocy", delay=100)
+    pause(page, 1500)  # Show filtered results
+
+    # Clear filter
+    page.fill("#filter-input", "")
+    pause(page, 1000)
+
+    # Sort by memory (click header)
+    page.click("th:has-text('Mem')")
+    pause(page, 1500)
+
+    # Sort by CPU
+    page.click("th:has-text('CPU')")
+    pause(page, 1500)
+
+    # Sort by host
+    page.click("th:has-text('Host')")
+    pause(page, 1500)
+
+    # Watch auto-refresh timer count down
+    pause(page, 3500)  # Wait for refresh to happen
+
+    # Hover on action menu to show pause behavior
+    action_btn = page.locator('button[onclick^="openActionMenu"]').first
+    action_btn.scroll_into_view_if_needed()
+    action_btn.hover()
+    pause(page, 2000)  # Show paused state (timer shows ⏸) and action menu
+
+    # Move away to close menu and resume refresh
+    page.locator("h2").first.hover()  # Move to header
+    pause(page, 3500)  # Watch countdown resume and refresh happen
+
+    # Final pause
+    pause(page, 1000)
--- a/docs/demos/web/demo_navigation.py
+++ b/docs/demos/web/demo_navigation.py
@@ -0,0 +1,74 @@
+"""Demo: Command palette navigation.
+
+Records a ~15 second demo showing:
+- Opening command palette with Ctrl+K
+- Fuzzy search filtering
+- Arrow key navigation
+- Stack and page navigation
+
+Run: pytest docs/demos/web/demo_navigation.py -v --no-cov
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import pytest
+from conftest import (
+    open_command_palette,
+    pause,
+    slow_type,
+    wait_for_sidebar,
+)
+
+if TYPE_CHECKING:
+    from playwright.sync_api import Page
+
+
+@pytest.mark.browser  # type: ignore[misc]
+def test_demo_navigation(recording_page: Page, server_url: str) -> None:
+    """Record command palette navigation demo."""
+    page = recording_page
+
+    # Start on dashboard
+    page.goto(server_url)
+    wait_for_sidebar(page)
+    pause(page, 1000)  # Let viewer see dashboard
+
+    # Open command palette with keyboard shortcut
+    open_command_palette(page)
+    pause(page, 500)
+
+    # Type partial stack name for fuzzy search
+    slow_type(page, "#cmd-input", "grocy", delay=120)
+    pause(page, 800)
+
+    # Arrow down to show selection movement
+    page.keyboard.press("ArrowDown")
+    pause(page, 400)
+    page.keyboard.press("ArrowUp")
+    pause(page, 400)
+
+    # Press Enter to navigate to stack
+    page.keyboard.press("Enter")
+    page.wait_for_url("**/stack/grocy", timeout=5000)
+    pause(page, 1500)  # Show stack page
+
+    # Open palette again to navigate elsewhere
+    open_command_palette(page)
+    pause(page, 400)
+
+    # Navigate to another stack (immich) to show more navigation
+    slow_type(page, "#cmd-input", "imm", delay=120)
+    pause(page, 600)
+    page.keyboard.press("Enter")
+    page.wait_for_url("**/stack/immich", timeout=5000)
+    pause(page, 1200)  # Show immich stack page
+
+    # Open palette one more time, navigate back to dashboard
+    open_command_palette(page)
+    slow_type(page, "#cmd-input", "dashb", delay=120)
+    pause(page, 500)
+    page.keyboard.press("Enter")
+    page.wait_for_url(server_url, timeout=5000)
+    pause(page, 1000)  # Final dashboard view
--- a/docs/demos/web/demo_shell.py
+++ b/docs/demos/web/demo_shell.py
@@ -0,0 +1,106 @@
+"""Demo: Container shell exec via command palette.
+
+Records a ~35 second demo showing:
+- Navigating to immich stack (multiple containers)
+- Using command palette with fuzzy matching ("sh mach") to open shell
+- Running a command
+- Using command palette to switch to server container shell
+- Running another command
+
+Run: pytest docs/demos/web/demo_shell.py -v --no-cov
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import pytest
+from conftest import (
+    open_command_palette,
+    pause,
+    slow_type,
+    wait_for_sidebar,
+)
+
+if TYPE_CHECKING:
+    from playwright.sync_api import Page
+
+
+@pytest.mark.browser  # type: ignore[misc]
+def test_demo_shell(recording_page: Page, server_url: str) -> None:
+    """Record container shell demo."""
+    page = recording_page
+
+    # Start on dashboard
+    page.goto(server_url)
+    wait_for_sidebar(page)
+    pause(page, 800)
+
+    # Navigate to immich via command palette (has multiple containers)
+    open_command_palette(page)
+    pause(page, 400)
+    slow_type(page, "#cmd-input", "immich", delay=100)
+    pause(page, 600)
+    page.keyboard.press("Enter")
+    page.wait_for_url("**/stack/immich", timeout=5000)
+    pause(page, 1500)
+
+    # Wait for containers list to load (so shell commands are available)
+    page.wait_for_selector("#containers-list button", timeout=10000)
+    pause(page, 800)
+
+    # Use command palette with fuzzy matching: "sh mach" -> "Shell: immich-machine-learning"
+    open_command_palette(page)
+    pause(page, 400)
+    slow_type(page, "#cmd-input", "sh mach", delay=100)
+    pause(page, 600)
+    page.keyboard.press("Enter")
+    pause(page, 1000)
+
+    # Wait for exec terminal to appear
+    page.wait_for_selector("#exec-terminal .xterm", timeout=10000)
+
+    # Smoothly scroll down to make the terminal visible
+    page.evaluate("""
+        const terminal = document.getElementById('exec-terminal');
+        if (terminal) {
+            terminal.scrollIntoView({ behavior: 'smooth', block: 'center' });
+        }
+    """)
+    pause(page, 1200)
+
+    # Run python version command
+    slow_type(page, "#exec-terminal .xterm-helper-textarea", "python3 --version", delay=60)
+    pause(page, 300)
+    page.keyboard.press("Enter")
+    pause(page, 1500)
+
+    # Blur the terminal to release focus (won't scroll)
+    page.evaluate("document.activeElement?.blur()")
+    pause(page, 500)
+
+    # Use command palette to switch to server container: "sh serv" -> "Shell: immich-server"
+    open_command_palette(page)
+    pause(page, 400)
+    slow_type(page, "#cmd-input", "sh serv", delay=100)
+    pause(page, 600)
+    page.keyboard.press("Enter")
+    pause(page, 1000)
+
+    # Wait for new terminal
+    page.wait_for_selector("#exec-terminal .xterm", timeout=10000)
+
+    # Scroll to terminal
+    page.evaluate("""
+        const terminal = document.getElementById('exec-terminal');
+        if (terminal) {
+            terminal.scrollIntoView({ behavior: 'smooth', block: 'center' });
+        }
+    """)
+    pause(page, 1200)
+
+    # Run ls command
+    slow_type(page, "#exec-terminal .xterm-helper-textarea", "ls /usr/src/app", delay=60)
+    pause(page, 300)
+    page.keyboard.press("Enter")
+    pause(page, 2000)
--- a/docs/demos/web/demo_stack.py
+++ b/docs/demos/web/demo_stack.py
@@ -0,0 +1,101 @@
+"""Demo: Stack actions.
+
+Records a ~30 second demo showing:
+- Navigating to a stack page
+- Viewing compose file in Monaco editor
+- Triggering Restart action via command palette
+- Watching terminal output stream
+- Triggering Logs action
+
+Run: pytest docs/demos/web/demo_stack.py -v --no-cov
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import pytest
+from conftest import (
+    open_command_palette,
+    pause,
+    slow_type,
+    wait_for_sidebar,
+)
+
+if TYPE_CHECKING:
+    from playwright.sync_api import Page
+
+
+@pytest.mark.browser  # type: ignore[misc]
+def test_demo_stack(recording_page: Page, server_url: str) -> None:
+    """Record stack actions demo."""
+    page = recording_page
+
+    # Start on dashboard
+    page.goto(server_url)
+    wait_for_sidebar(page)
+    pause(page, 800)
+
+    # Navigate to grocy via command palette
+    open_command_palette(page)
+    pause(page, 400)
+    slow_type(page, "#cmd-input", "grocy", delay=100)
+    pause(page, 500)
+    page.keyboard.press("Enter")
+    page.wait_for_url("**/stack/grocy", timeout=5000)
+    pause(page, 1000)  # Show stack page
+
+    # Click on Compose File collapse to show the Monaco editor
+    # The collapse uses a checkbox input, click it via the parent collapse div
+    compose_collapse = page.locator(".collapse", has_text="Compose File").first
+    compose_collapse.locator("input[type=checkbox]").click(force=True)
+    pause(page, 500)
+
+    # Wait for Monaco editor to load and show content
+    page.wait_for_selector("#compose-editor .monaco-editor", timeout=10000)
+    pause(page, 2000)  # Let viewer see the compose file
+
+    # Smoothly scroll down to show more of the editor
+    page.evaluate("""
+        const editor = document.getElementById('compose-editor');
+        if (editor) {
+            editor.scrollIntoView({ behavior: 'smooth', block: 'center' });
+        }
+    """)
+    pause(page, 1200)  # Wait for smooth scroll animation
+
+    # Close the compose file section
+    compose_collapse.locator("input[type=checkbox]").click(force=True)
+    pause(page, 500)
+
+    # Open command palette for stack actions
+    open_command_palette(page)
+    pause(page, 400)
+
+    # Filter to Restart action
+    slow_type(page, "#cmd-input", "restart", delay=120)
+    pause(page, 600)
+
+    # Execute Restart
+    page.keyboard.press("Enter")
+    pause(page, 300)
+
+    # Wait for terminal to expand and show output
+    page.wait_for_selector("#terminal-output .xterm", timeout=5000)
+    pause(page, 2500)  # Let viewer see terminal streaming
+
+    # Open palette again for Logs
+    open_command_palette(page)
+    pause(page, 400)
+
+    # Filter to Logs action
+    slow_type(page, "#cmd-input", "logs", delay=120)
+    pause(page, 600)
+
+    # Execute Logs
+    page.keyboard.press("Enter")
+    pause(page, 300)
+
+    # Show log output
+    page.wait_for_selector("#terminal-output .xterm", timeout=5000)
+    pause(page, 2500)  # Final view of logs
--- a/docs/demos/web/demo_themes.py
+++ b/docs/demos/web/demo_themes.py
@@ -0,0 +1,81 @@
+"""Demo: Theme switching.
+
+Records a ~15 second demo showing:
+- Opening theme picker via theme button
+- Live theme preview on arrow navigation
+- Selecting different themes
+- Theme persistence
+
+Run: pytest docs/demos/web/demo_themes.py -v --no-cov
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import pytest
+from conftest import (
+    pause,
+    slow_type,
+    wait_for_sidebar,
+)
+
+if TYPE_CHECKING:
+    from playwright.sync_api import Page
+
+
+@pytest.mark.browser  # type: ignore[misc]
+def test_demo_themes(recording_page: Page, server_url: str) -> None:
+    """Record theme switching demo."""
+    page = recording_page
+
+    # Start on dashboard
+    page.goto(server_url)
+    wait_for_sidebar(page)
+    pause(page, 1000)  # Show initial theme
+
+    # Click theme button to open theme picker
+    page.locator("#theme-btn").click()
+    page.wait_for_selector("#cmd-palette[open]", timeout=2000)
+    pause(page, 600)
+
+    # Arrow through many themes to show live preview effect
+    for _ in range(12):
+        page.keyboard.press("ArrowDown")
+        pause(page, 350)  # Show each preview
+
+    # Go back up through a few (land on valentine, not cyberpunk)
+    for _ in range(4):
+        page.keyboard.press("ArrowUp")
+        pause(page, 350)
+
+    # Select current theme with Enter
+    page.keyboard.press("Enter")
+    pause(page, 1000)
+
+    # Close palette with Escape
+    page.keyboard.press("Escape")
+    pause(page, 800)
+
+    # Open again and use search to find specific theme
+    page.locator("#theme-btn").click()
+    page.wait_for_selector("#cmd-palette[open]", timeout=2000)
+    pause(page, 400)
+
+    # Type to filter to a light theme (theme button pre-populates "theme:")
+    slow_type(page, "#cmd-input", "cup", delay=100)
+    pause(page, 500)
+    page.keyboard.press("Enter")
+    pause(page, 1000)
+
+    # Close and return to dark
+    page.keyboard.press("Escape")
+    pause(page, 500)
+    page.locator("#theme-btn").click()
+    page.wait_for_selector("#cmd-palette[open]", timeout=2000)
+    pause(page, 300)
+
+    slow_type(page, "#cmd-input", "dark", delay=100)
+    pause(page, 400)
+    page.keyboard.press("Enter")
+    pause(page, 800)
--- a/docs/demos/web/demo_workflow.py
+++ b/docs/demos/web/demo_workflow.py
@@ -0,0 +1,189 @@
+"""Demo: Full workflow.
+
+Records a comprehensive demo (~60 seconds) combining all major features:
+1. Console page: terminal with fastfetch, cf pull command
+2. Editor showing Compose Farm YAML config
+3. Command palette navigation to grocy stack
+4. Stack actions: up, logs
+5. Switch to dozzle stack via command palette, run update
+6. Dashboard overview
+7. Theme cycling via command palette
+
+This demo is used on the homepage and Web UI page as the main showcase.
+
+Run: pytest docs/demos/web/demo_workflow.py -v --no-cov
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import pytest
+from conftest import open_command_palette, pause, slow_type, wait_for_sidebar
+
+if TYPE_CHECKING:
+    from playwright.sync_api import Page
+
+
+def _demo_console_terminal(page: Page, server_url: str) -> None:
+    """Demo part 1: Console page with terminal and editor."""
+    # Start on dashboard briefly
+    page.goto(server_url)
+    wait_for_sidebar(page)
+    pause(page, 800)
+
+    # Navigate to Console page via command palette
+    open_command_palette(page)
+    pause(page, 300)
+    slow_type(page, "#cmd-input", "cons", delay=100)
+    pause(page, 400)
+    page.keyboard.press("Enter")
+    page.wait_for_url("**/console", timeout=5000)
+    pause(page, 800)
+
+    # Wait for terminal to be ready
+    page.wait_for_selector("#console-terminal .xterm", timeout=10000)
+    pause(page, 1000)
+
+    # Run fastfetch first
+    slow_type(page, "#console-terminal .xterm-helper-textarea", "fastfetch", delay=60)
+    pause(page, 200)
+    page.keyboard.press("Enter")
+    pause(page, 2000)  # Wait for output
+
+    # Run cf pull on a stack to show Compose Farm in action
+    slow_type(page, "#console-terminal .xterm-helper-textarea", "cf pull grocy", delay=60)
+    pause(page, 200)
+    page.keyboard.press("Enter")
+    pause(page, 3000)  # Wait for pull output
+
+
+def _demo_config_editor(page: Page) -> None:
+    """Demo part 2: Show the Compose Farm config in editor."""
+    # Smoothly scroll down to show the Editor section
+    # Use JavaScript for smooth scrolling animation
+    page.evaluate("""
+        const editor = document.getElementById('console-editor');
+        if (editor) {
+            editor.scrollIntoView({ behavior: 'smooth', block: 'center' });
+        }
+    """)
+    pause(page, 1200)  # Wait for smooth scroll animation
+
+    # Wait for Monaco editor to load with config content
+    page.wait_for_selector("#console-editor .monaco-editor", timeout=10000)
+    pause(page, 2000)  # Let viewer see the Compose Farm config file
+
+
+def _demo_stack_actions(page: Page) -> None:
+    """Demo part 3: Navigate to stack and run actions."""
+    # Click on sidebar to take focus away from terminal, then use command palette
+    page.locator("#sidebar-stacks").click()
+    pause(page, 300)
+
+    # Navigate to grocy via command palette
+    open_command_palette(page)
+    pause(page, 300)
+    slow_type(page, "#cmd-input", "grocy", delay=100)
+    pause(page, 400)
+    page.keyboard.press("Enter")
+    page.wait_for_url("**/stack/grocy", timeout=5000)
+    pause(page, 1000)
+
+    # Open Compose File editor to show the compose.yaml
+    compose_collapse = page.locator(".collapse", has_text="Compose File").first
+    compose_collapse.locator("input[type=checkbox]").click(force=True)
+    pause(page, 500)
+
+    # Wait for Monaco editor to load and show content
+    page.wait_for_selector("#compose-editor .monaco-editor", timeout=10000)
+    pause(page, 2000)  # Let viewer see the compose file
+
+    # Close the compose file section
+    compose_collapse.locator("input[type=checkbox]").click(force=True)
+    pause(page, 500)
+
+    # Run Up action via command palette
+    open_command_palette(page)
+    pause(page, 300)
+    slow_type(page, "#cmd-input", "up", delay=100)
+    pause(page, 400)
+    page.keyboard.press("Enter")
+    pause(page, 200)
+
+    # Wait for terminal output
+    page.wait_for_selector("#terminal-output .xterm", timeout=5000)
+    pause(page, 2500)
+
+    # Show logs
+    open_command_palette(page)
+    pause(page, 300)
+    slow_type(page, "#cmd-input", "logs", delay=100)
+    pause(page, 400)
+    page.keyboard.press("Enter")
+    pause(page, 200)
+
+    page.wait_for_selector("#terminal-output .xterm", timeout=5000)
+    pause(page, 2500)
+
+    # Switch to dozzle via command palette (on nas for lower latency)
+    open_command_palette(page)
+    pause(page, 300)
+    slow_type(page, "#cmd-input", "dozzle", delay=100)
+    pause(page, 400)
+    page.keyboard.press("Enter")
+    page.wait_for_url("**/stack/dozzle", timeout=5000)
+    pause(page, 1000)
+
+    # Run update action
+    open_command_palette(page)
+    pause(page, 300)
+    slow_type(page, "#cmd-input", "upda", delay=100)
+    pause(page, 400)
+    page.keyboard.press("Enter")
+    pause(page, 200)
+
+    page.wait_for_selector("#terminal-output .xterm", timeout=5000)
+    pause(page, 2500)
+
+
+def _demo_dashboard_and_themes(page: Page, server_url: str) -> None:
+    """Demo part 4: Dashboard and theme cycling."""
+    # Navigate to dashboard via command palette
+    open_command_palette(page)
+    pause(page, 300)
+    slow_type(page, "#cmd-input", "dash", delay=100)
+    pause(page, 400)
+    page.keyboard.press("Enter")
+    page.wait_for_url(server_url, timeout=5000)
+    pause(page, 800)
+
+    # Scroll to top of page to ensure dashboard is fully visible
+    page.evaluate("window.scrollTo(0, 0)")
+    pause(page, 600)
+
+    # Open theme picker and arrow down to Dracula (shows live preview)
+    page.locator("#theme-btn").click()
+    page.wait_for_selector("#cmd-palette[open]", timeout=2000)
+    pause(page, 400)
+
+    # Arrow down through themes with live preview until we reach Dracula
+    for _ in range(19):
+        page.keyboard.press("ArrowDown")
+        pause(page, 180)
+
+    # Select Dracula theme and end on it
+    pause(page, 400)
+    page.keyboard.press("Enter")
+    pause(page, 1500)
+
+
+@pytest.mark.browser  # type: ignore[misc]
+def test_demo_workflow(recording_page: Page, server_url: str) -> None:
+    """Record full workflow demo."""
+    page = recording_page
+
+    _demo_console_terminal(page, server_url)
+    _demo_config_editor(page)
+    _demo_stack_actions(page)
+    _demo_dashboard_and_themes(page, server_url)
--- a/docs/demos/web/record.py
+++ b/docs/demos/web/record.py
@@ -0,0 +1,259 @@
+#!/usr/bin/env python3
+"""Record all web UI demos.
+
+This script orchestrates recording of web UI demos using Playwright,
+then converts the WebM recordings to GIF format.
+
+Usage:
+    python docs/demos/web/record.py           # Record all demos
+    python docs/demos/web/record.py navigation  # Record specific demo
+
+Requirements:
+    - Playwright with Chromium: playwright install chromium
+    - ffmpeg for GIF conversion: apt install ffmpeg / brew install ffmpeg
+"""
+
+from __future__ import annotations
+
+import os
+import re
+import shutil
+import subprocess
+import sys
+from pathlib import Path
+
+from rich.console import Console
+
+console = Console()
+
+SCRIPT_DIR = Path(__file__).parent
+REPO_DIR = SCRIPT_DIR.parent.parent.parent
+OUTPUT_DIR = REPO_DIR / "docs" / "assets"
+
+DEMOS = [
+    "navigation",
+    "stack",
+    "themes",
+    "workflow",
+    "console",
+    "shell",
+    "live_stats",
+]
+
+# High-quality ffmpeg settings for VP8 encoding
+# See: https://github.com/microsoft/playwright/issues/10855
+# See: https://github.com/microsoft/playwright/issues/31424
+#
+# MAX_QUALITY: Lossless-like, largest files
+# BALANCED_QUALITY: ~43% file size, nearly indistinguishable quality
+MAX_QUALITY_ARGS = "-c:v vp8 -qmin 0 -qmax 0 -crf 0 -deadline best -speed 0 -b:v 0 -threads 0"
+BALANCED_QUALITY_ARGS = "-c:v vp8 -qmin 0 -qmax 10 -crf 4 -deadline best -speed 0 -b:v 0 -threads 0"
+
+# Choose which quality to use
+VIDEO_QUALITY_ARGS = MAX_QUALITY_ARGS
+
+
+def patch_playwright_video_quality() -> None:
+    """Patch Playwright's videoRecorder.js to use high-quality encoding settings."""
+    from playwright._impl._driver import compute_driver_executable  # noqa: PLC0415
+
+    # compute_driver_executable returns (node_path, cli_path)
+    result = compute_driver_executable()
+    node_path = result[0] if isinstance(result, tuple) else result
+    driver_path = Path(node_path).parent
+
+    video_recorder = driver_path / "package" / "lib" / "server" / "chromium" / "videoRecorder.js"
+
+    if not video_recorder.exists():
+        msg = f"videoRecorder.js not found at {video_recorder}"
+        raise FileNotFoundError(msg)
+
+    content = video_recorder.read_text()
+
+    # Check if already patched
+    if "deadline best" in content:
+        return  # Already patched
+
+    # Pattern to match the ffmpeg args line
+    pattern = (
+        r"-c:v vp8 -qmin \d+ -qmax \d+ -crf \d+ -deadline \w+ -speed \d+ -b:v \w+ -threads \d+"
+    )
+
+    if not re.search(pattern, content):
+        msg = "Could not find ffmpeg args pattern in videoRecorder.js"
+        raise ValueError(msg)
+
+    # Replace with high-quality settings
+    new_content = re.sub(pattern, VIDEO_QUALITY_ARGS, content)
+    video_recorder.write_text(new_content)
+    console.print("[green]Patched Playwright for high-quality video recording[/green]")
+
+
+def record_demo(name: str, index: int, total: int) -> Path | None:
+    """Run a single demo and return the video path."""
+    console.print(f"[cyan][{index}/{total}][/cyan] [green]Recording:[/green] web-{name}")
+
+    demo_file = SCRIPT_DIR / f"demo_{name}.py"
+    if not demo_file.exists():
+        console.print(f"[red]  Demo file not found: {demo_file}[/red]")
+        return None
+
+    # Create temp output dir for this recording
+    temp_dir = SCRIPT_DIR / ".recordings"
+    temp_dir.mkdir(exist_ok=True)
+
+    # Run pytest with video recording
+    # Set PYTHONPATH so conftest.py imports work
+    env = {**os.environ, "PYTHONPATH": str(SCRIPT_DIR)}
+    result = subprocess.run(
+        [
+            sys.executable,
+            "-m",
+            "pytest",
+            str(demo_file),
+            "-v",
+            "--no-cov",
+            "-x",  # Stop on first failure
+            f"--basetemp={temp_dir}",
+        ],
+        check=False,
+        cwd=REPO_DIR,
+        capture_output=True,
+        text=True,
+        env=env,
+    )
+
+    if result.returncode != 0:
+        console.print(f"[red]  Failed to record {name}[/red]")
+        console.print(result.stdout)
+        console.print(result.stderr)
+        return None
+
+    # Find the recorded video
+    videos = list(temp_dir.rglob("*.webm"))
+    if not videos:
+        console.print(f"[red]  No video found for {name}[/red]")
+        return None
+
+    # Use the most recent video
+    video = max(videos, key=lambda p: p.stat().st_mtime)
+    console.print(f"[green]  Recorded: {video.name}[/green]")
+    return video
+
+
+def convert_to_gif(webm_path: Path, output_name: str) -> Path:
+    """Convert WebM to GIF using ffmpeg with palette optimization."""
+    gif_path = OUTPUT_DIR / f"{output_name}.gif"
+    palette_path = webm_path.parent / "palette.png"
+
+    # Two-pass approach for better quality
+    # Pass 1: Generate palette
+    subprocess.run(
+        [  # noqa: S607
+            "ffmpeg",
+            "-y",
+            "-i",
+            str(webm_path),
+            "-vf",
+            "fps=10,scale=1280:-1:flags=lanczos,palettegen=stats_mode=diff",
+            str(palette_path),
+        ],
+        check=True,
+        capture_output=True,
+    )
+
+    # Pass 2: Generate GIF with palette
+    subprocess.run(
+        [  # noqa: S607
+            "ffmpeg",
+            "-y",
+            "-i",
+            str(webm_path),
+            "-i",
+            str(palette_path),
+            "-lavfi",
+            "fps=10,scale=1280:-1:flags=lanczos[x];[x][1:v]paletteuse=dither=bayer:bayer_scale=5:diff_mode=rectangle",
+            str(gif_path),
+        ],
+        check=True,
+        capture_output=True,
+    )
+
+    palette_path.unlink(missing_ok=True)
+    return gif_path
+
+
+def move_recording(video_path: Path, name: str) -> tuple[Path, Path]:
+    """Move WebM and convert to GIF, returning both paths."""
+    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
+
+    output_name = f"web-{name}"
+    webm_dest = OUTPUT_DIR / f"{output_name}.webm"
+
+    shutil.copy2(video_path, webm_dest)
+    console.print(f"[blue]  WebM: {webm_dest.relative_to(REPO_DIR)}[/blue]")
+
+    gif_path = convert_to_gif(video_path, output_name)
+    console.print(f"[blue]  GIF:  {gif_path.relative_to(REPO_DIR)}[/blue]")
+
+    return webm_dest, gif_path
+
+
+def cleanup() -> None:
+    """Clean up temporary recording files."""
+    temp_dir = SCRIPT_DIR / ".recordings"
+    if temp_dir.exists():
+        shutil.rmtree(temp_dir)
+
+
+def main() -> int:
+    """Record all web UI demos."""
+    console.print("[blue]Recording web UI demos...[/blue]")
+    console.print(f"Output directory: {OUTPUT_DIR}")
+    console.print()
+
+    # Patch Playwright for high-quality video recording
+    patch_playwright_video_quality()
+
+    # Determine which demos to record
+    if len(sys.argv) > 1:
+        demos_to_record = [d for d in sys.argv[1:] if d in DEMOS]
+        if not demos_to_record:
+            console.print(f"[red]Unknown demo(s). Available: {', '.join(DEMOS)}[/red]")
+            return 1
+    else:
+        demos_to_record = DEMOS
+
+    results: dict[str, tuple[Path | None, Path | None]] = {}
+
+    try:
+        for i, demo in enumerate(demos_to_record, 1):
+            video_path = record_demo(demo, i, len(demos_to_record))
+            if video_path:
+                webm, gif = move_recording(video_path, demo)
+                results[demo] = (webm, gif)
+            else:
+                results[demo] = (None, None)
+            console.print()
+    finally:
+        cleanup()
+
+    # Summary
+    console.print("[blue]=== Summary ===[/blue]")
+    success_count = sum(1 for w, _ in results.values() if w is not None)
+    console.print(f"Recorded: {success_count}/{len(demos_to_record)} demos")
+    console.print()
+
+    for demo, (webm, gif) in results.items():  # type: ignore[assignment]
+        status = "[green]OK[/green]" if webm else "[red]FAILED[/red]"
+        console.print(f"  {demo}: {status}")
+        if webm:
+            console.print(f"    {webm.relative_to(REPO_DIR)}")
+        if gif:
+            console.print(f"    {gif.relative_to(REPO_DIR)}")
+
+    return 0 if success_count == len(demos_to_record) else 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/docs/dev/docker-swarm-network.md
+++ b/docs/dev/docker-swarm-network.md
@@ -1,90 +0,0 @@
-# Docker Swarm Overlay Networks with Compose Farm
-
-Notes from testing Docker Swarm's attachable overlay networks as a way to get cross-host container networking while still using `docker compose`.
-
-## The Idea
-
-Docker Swarm overlay networks can be made "attachable", allowing regular `docker compose` containers (not just swarm services) to join them. This would give us:
-
- Cross-host Docker DNS (containers find each other by name)
- No need to publish ports for inter-container communication
- Keep using `docker compose up` instead of `docker stack deploy`
-
-## Setup Steps
-
-```bash
-# On manager node
-docker swarm init --advertise-addr <manager-ip>
-
-# On worker nodes (use token from init output)
-docker swarm join --token <token> <manager-ip>:2377
-
-# Create attachable overlay network (on manager)
-docker network create --driver overlay --attachable my-network
-
-# In compose files, add the network
-networks:
-  my-network:
-    external: true
-```
-
-## Required Ports
-
-Docker Swarm requires these ports open **bidirectionally** between all nodes:
-
-| Port | Protocol | Purpose |
-|------|----------|---------|
-| 2377 | TCP | Cluster management |
-| 7946 | TCP + UDP | Node communication |
-| 4789 | UDP | Overlay network traffic (VXLAN) |
-
-## Test Results (2024-12-13)
-
- docker-debian (192.168.1.66) as manager
- dev-lxc (192.168.1.167) as worker
-
-### What worked
-
- Swarm init and join
- Overlay network creation
- Nodes showed as Ready
-
-### What failed
-
- Container on dev-lxc couldn't attach to overlay network
- Error: `attaching to network failed... context deadline exceeded`
- Cause: Port 7946 blocked from docker-debian → dev-lxc
-
-### Root cause
-
-Firewall on dev-lxc wasn't configured to allow swarm ports. Opening these ports requires sudo access on each node.
-
-## Conclusion
-
-Docker Swarm overlay networks are **not plug-and-play**. Requirements:
-
-1. Swarm init/join on all nodes
-2. Firewall rules on all nodes (needs sudo/root)
-3. All nodes must have bidirectional connectivity on 3 ports
-
-For a simpler alternative, consider:
-
- **Tailscale**: VPN mesh, containers use host's Tailscale IP
- **Host networking + published ports**: What compose-farm does today
- **Keep dependent services together**: Avoid cross-host networking entirely
-
-## Future Work
-
-If we decide to support overlay networks:
-
-1. Add a `compose-farm network create` command that:
-   - Initializes swarm if needed
-   - Creates attachable overlay network
-   - Documents required firewall rules
-
-2. Add network config to compose-farm.yaml:
-   ```yaml
-   overlay_network: compose-farm-net
-   ```
-
-3. Auto-inject network into compose files (or document manual setup)
--- a/docs/dev/future-improvements.md
+++ b/docs/dev/future-improvements.md
@@ -1,128 +0,0 @@
-# Future Improvements
-
-Low-priority improvements identified during code review. These are not currently causing issues but could be addressed if they become pain points.
-
-## 1. State Module Efficiency (LOW)
-
-**Current:** Every state operation reads and writes the entire file.
-
-```python
-def set_service_host(config, service, host):
-    state = load_state(config)   # Read file
-    state[service] = host
-    save_state(config, state)    # Write file
-```
-
-**Impact:** With 87 services, this is fine. With 1000+, it would be slow.
-
-**Potential fix:** Add batch operations:
-```python
-def update_state(config, updates: dict[str, str | None]) -> None:
-    """Batch update: set services to hosts, None means remove."""
-    state = load_state(config)
-    for service, host in updates.items():
-        if host is None:
-            state.pop(service, None)
-        else:
-            state[service] = host
-    save_state(config, state)
-```
-
-**When to do:** Only if state operations become noticeably slow.
-
---
-
-## 2. Remote-Aware Compose Path Resolution (LOW)
-
-**Current:** `config.get_compose_path()` checks if files exist on the local filesystem:
-
-```python
-def get_compose_path(self, service: str) -> Path:
-    for filename in ("compose.yaml", "compose.yml", ...):
-        candidate = service_dir / filename
-        if candidate.exists():  # Local check!
-            return candidate
-```
-
-**Why this works:** NFS/shared storage means local = remote.
-
-**Why it could break:** If running compose-farm from a machine without the NFS mount, it returns `compose.yaml` (the default) even if `docker-compose.yml` exists on the remote host.
-
-**Potential fix:** Query the remote host for file existence, or accept this limitation and document it.
-
-**When to do:** Only if users need to run compose-farm from non-NFS machines.
-
---
-
-## 3. Add Integration Tests for CLI Commands (MEDIUM)
-
-**Current:** No integration tests for the actual CLI commands. Tests cover the underlying functions but not the Typer commands themselves.
-
-**Potential fix:** Add integration tests using `CliRunner` from Typer:
-
-```python
-from typer.testing import CliRunner
-from compose_farm.cli import app
-
-runner = CliRunner()
-
-def test_check_command_validates_config():
-    result = runner.invoke(app, ["check", "--local"])
-    assert result.exit_code == 0
-```
-
-**When to do:** When CLI behavior becomes complex enough to warrant dedicated testing.
-
---
-
-## 4. Add Tests for operations.py (MEDIUM)
-
-**Current:** Operations module has 30% coverage. Most logic is tested indirectly through test_sync.py.
-
-**Potential fix:** Add dedicated tests for:
- `up_services()` with migration scenarios
- `preflight_check()`
- `check_host_compatibility()`
-
-**When to do:** When adding new operations or modifying migration logic.
-
---
-
-## 5. Consider Structured Logging (LOW)
-
-**Current:** Operations print directly to console using Rich. This couples the operations module to the Rich library.
-
-**Potential fix:** Use Python's logging module with a custom Rich handler:
-
-```python
-import logging
-
-logger = logging.getLogger(__name__)
-
-# In operations:
-logger.info("Migrating %s from %s to %s", service, old_host, new_host)
-
-# In cli.py - configure Rich handler:
-from rich.logging import RichHandler
-logging.basicConfig(handlers=[RichHandler()])
-```
-
-**Benefits:**
- Operations become testable without capturing stdout
- Logs can be redirected to files
- Log levels provide filtering
-
-**When to do:** Only if console output coupling becomes a problem for testing or extensibility.
-
---
-
-## Design Decisions to Keep
-
-These patterns are working well and should be preserved:
-
-1. **asyncio + asyncssh** - Solid async foundation
-2. **Pydantic models** - Clean validation
-3. **Rich for output** - Good UX
-4. **Test structure** - Good coverage
-5. **Module separation** - cli/operations/executor/compose pattern
-6. **KISS principle** - Don't over-engineer
--- a/docs/docker-deployment.md
+++ b/docs/docker-deployment.md
@@ -0,0 +1,101 @@
+---
+icon: lucide/container
+---
+
+# Docker Deployment
+
+Run the Compose Farm web UI in Docker.
+
+## Quick Start
+
+**1. Get the compose file:**
+
+```bash
+curl -O https://raw.githubusercontent.com/basnijholt/compose-farm/main/docker-compose.yml
+```
+
+**2. Generate `.env` file:**
+
+```bash
+cf config init-env
+```
+
+This auto-detects settings from your `compose-farm.yaml`:
+- `DOMAIN` from existing traefik labels
+- `CF_COMPOSE_DIR` from config
+- `CF_UID/GID/HOME/USER` from current user
+
+Review the output and edit if needed.
+
+**3. Set up SSH keys:**
+
+```bash
+docker compose run --rm cf ssh setup
+```
+
+**4. Start the web UI:**
+
+```bash
+docker compose up -d web
+```
+
+Open `http://localhost:9000` (or `https://compose-farm.example.com` if using Traefik).
+
+---
+
+## Configuration
+
+The `cf config init-env` command auto-detects most settings. After running it, review the generated `.env` file and edit if needed:
+
+```bash
+$EDITOR .env
+```
+
+### What init-env detects
+
+| Variable | How it's detected |
+|----------|-------------------|
+| `DOMAIN` | Extracted from traefik labels in your stacks |
+| `CF_COMPOSE_DIR` | From `compose_dir` in your config |
+| `CF_UID/GID/HOME/USER` | From current user (for NFS compatibility) |
+
+If auto-detection fails for any value, edit the `.env` file manually.
+
+### Glances Monitoring
+
+To show host CPU/memory stats in the dashboard, deploy [Glances](https://nicolargo.github.io/glances/) on your hosts. When running the web UI container, Compose Farm infers the local host from `CF_WEB_STACK` and uses the Glances container name for that host.
+
+See [Host Resource Monitoring](https://github.com/basnijholt/compose-farm#host-resource-monitoring-glances) in the README.
+
+---
+
+## Troubleshooting
+
+### SSH "Permission denied" or "Host key verification failed"
+
+Regenerate keys:
+
+```bash
+docker compose run --rm cf ssh setup
+```
+
+### Files created as root
+
+Add the non-root variables above and restart.
+
+---
+
+## All Environment Variables
+
+For advanced users, here's the complete reference:
+
+| Variable | Description | Default |
+|----------|-------------|---------|
+| `DOMAIN` | Domain for Traefik labels | *(required)* |
+| `CF_COMPOSE_DIR` | Compose files directory | `/opt/stacks` |
+| `CF_UID` / `CF_GID` | User/group ID | `0` (root) |
+| `CF_HOME` | Home directory | `/root` |
+| `CF_USER` | Username for SSH | `root` |
+| `CF_WEB_STACK` | Web UI stack name (enables self-update, local host inference) | *(none)* |
+| `CF_SSH_DIR` | SSH keys directory | `~/.ssh/compose-farm` |
+| `CF_XDG_CONFIG` | Config/backup directory | `~/.config/compose-farm` |
--- a/docs/getting-started.md
+++ b/docs/getting-started.md
@@ -0,0 +1,340 @@
+---
+icon: lucide/rocket
+---
+
+# Getting Started
+
+This guide walks you through installing Compose Farm and setting up your first multi-host deployment.
+
+## Prerequisites
+
+Before you begin, ensure you have:
+
+- **[uv](https://docs.astral.sh/uv/)** (recommended) or Python 3.11+
+- **SSH key-based authentication** to your Docker hosts
+- **Docker and Docker Compose** installed on all target hosts
+- **Shared storage** for compose files (NFS, Syncthing, etc.)
+
+## Installation
+
+<video autoplay loop muted playsinline>
+  <source src="/assets/install.webm" type="video/webm">
+</video>
+
+### One-liner (recommended)
+
+```bash
+curl -fsSL https://compose-farm.nijho.lt/install | sh
+```
+
+This installs [uv](https://docs.astral.sh/uv/) if needed, then installs compose-farm.
+
+### Using uv
+
+If you already have [uv](https://docs.astral.sh/uv/) installed:
+
+```bash
+uv tool install compose-farm
+```
+
+### Using pip
+
+If you already have Python 3.11+ installed:
+
+```bash
+pip install compose-farm
+```
+
+### Using Docker
+
+```bash
+docker run --rm \
+  -v $SSH_AUTH_SOCK:/ssh-agent -e SSH_AUTH_SOCK=/ssh-agent \
+  -v ./compose-farm.yaml:/root/.config/compose-farm/compose-farm.yaml:ro \
+  ghcr.io/basnijholt/compose-farm up --all
+```
+
+**Running as non-root user** (recommended for NFS mounts):
+
+By default, containers run as root. To preserve file ownership on mounted volumes, set these environment variables in your `.env` file:
+
+```bash
+# Add to .env file (one-time setup)
+echo "CF_UID=$(id -u)" >> .env
+echo "CF_GID=$(id -g)" >> .env
+echo "CF_HOME=$HOME" >> .env
+echo "CF_USER=$USER" >> .env
+```
+
+Or use [direnv](https://direnv.net/) to auto-set these variables when entering the directory:
+```bash
+cp .envrc.example .envrc && direnv allow
+```
+
+This ensures files like `compose-farm-state.yaml` and web UI edits are owned by your user instead of root. The `CF_USER` variable is required for SSH to work when running as a non-root user.
+
+### Verify Installation
+
+```bash
+cf --version
+cf --help
+```
+
+## SSH Setup
+
+Compose Farm uses SSH to run commands on remote hosts. You need passwordless SSH access.
+
+### Option 1: SSH Agent (default)
+
+If you already have SSH keys loaded in your agent:
+
+```bash
+# Verify keys are loaded
+ssh-add -l
+
+# Test connection
+ssh user@192.168.1.10 "docker --version"
+```
+
+### Option 2: Dedicated Key (recommended for Docker)
+
+For persistent access when running in Docker:
+
+```bash
+# Generate and distribute key to all hosts
+cf ssh setup
+
+# Check status
+cf ssh status
+```
+
+This creates `~/.ssh/compose-farm/id_ed25519` and copies the public key to each host.
+
+## Shared Storage Setup
+
+Compose files must be accessible at the **same path** on all hosts. Common approaches:
+
+### NFS Mount
+
+```bash
+# On each Docker host
+sudo mount nas:/volume1/compose /opt/compose
+
+# Or add to /etc/fstab
+nas:/volume1/compose /opt/compose nfs defaults 0 0
+```
+
+### Directory Structure
+
+```
+/opt/compose/           # compose_dir in config
+├── plex/
+│   └── docker-compose.yml
+├── grafana/
+│   └── docker-compose.yml
+├── nextcloud/
+│   └── docker-compose.yml
+└── jellyfin/
+    └── docker-compose.yml
+```
+
+## Configuration
+
+### Create Config File
+
+Create `compose-farm.yaml` in the directory where you'll run commands. For example, if your stacks are in `/opt/stacks`, place the config there too:
+
+```bash
+cd /opt/stacks
+cf config init
+```
+
+Alternatively, use `~/.config/compose-farm/compose-farm.yaml` for a global config. You can also symlink a working directory config to the global location:
+
+```bash
+# Create config in your stacks directory, symlink to ~/.config
+cf config symlink /opt/stacks/compose-farm.yaml
+```
+
+This way, `cf` commands work from anywhere while the config lives with your stacks.
+
+#### Single host example
+
+```yaml
+# Where compose files are located (one folder per stack)
+compose_dir: /opt/stacks
+
+hosts:
+  local: localhost
+
+stacks:
+  plex: local
+  grafana: local
+  nextcloud: local
+```
+
+#### Multi-host example
+```yaml
+# Where compose files are located (same path on all hosts)
+compose_dir: /opt/compose
+
+# Define your Docker hosts
+hosts:
+  nuc:
+    address: 192.168.1.10
+    user: docker           # SSH user
+  hp:
+    address: 192.168.1.11
+    # user defaults to current user
+
+# Map stacks to hosts
+stacks:
+  plex: nuc
+  grafana: nuc
+  nextcloud: hp
+```
+
+Each entry in `stacks:` maps to a folder under `compose_dir` that contains a compose file.
+
+For cross-host HTTP routing, add Traefik labels and configure `traefik_file` (see [Traefik Integration](traefik.md)).
+### Validate Configuration
+
+```bash
+cf check --local
+```
+
+This validates syntax without SSH connections. For full validation:
+
+```bash
+cf check
+```
+
+## First Commands
+
+### Check Status
+
+```bash
+cf ps
+```
+
+Shows all configured stacks and their status.
+
+### Start All Stacks
+
+```bash
+cf up --all
+```
+
+Starts all stacks on their assigned hosts.
+
+### Start Specific Stacks
+
+```bash
+cf up plex grafana
+```
+
+### Apply Configuration
+
+The most powerful command - reconciles reality with your config:
+
+```bash
+cf apply --dry-run   # Preview changes
+cf apply             # Execute changes
+```
+
+This will:
+1. Start stacks in config but not running
+2. Migrate stacks on wrong host
+3. Stop stacks removed from config
+
+## Docker Network Setup
+
+If your stacks use an external Docker network:
+
+```bash
+# Create network on all hosts
+cf init-network
+
+# Or specific hosts
+cf init-network nuc hp
+```
+
+Default network: `mynetwork` with subnet `172.20.0.0/16`
+
+## Example Workflow
+
+### 1. Add a New Stack
+
+Create the compose file:
+
+```bash
+# On any host (shared storage)
+mkdir -p /opt/compose/gitea
+cat > /opt/compose/gitea/docker-compose.yml << 'EOF'
+services:
+  gitea:
+    image: docker.gitea.com/gitea:latest
+    container_name: gitea
+    environment:
+      - USER_UID=1000
+      - USER_GID=1000
+    volumes:
+      - /opt/config/gitea:/data
+      - /etc/timezone:/etc/timezone:ro
+      - /etc/localtime:/etc/localtime:ro
+    ports:
+      - "3000:3000"
+      - "2222:22"
+    restart: unless-stopped
+EOF
+```
+
+Add to config:
+
+```yaml
+stacks:
+  # ... existing stacks
+  gitea: nuc
+```
+
+Start the stack:
+
+```bash
+cf up gitea
+```
+
+### 2. Move a Stack to Another Host
+
+Edit `compose-farm.yaml`:
+
+```yaml
+stacks:
+  plex: hp  # Changed from nuc
+```
+
+Apply the change:
+
+```bash
+cf up plex
+# Automatically: down on nuc, up on hp
+```
+
+Or use apply to reconcile everything:
+
+```bash
+cf apply
+```
+
+### 3. Update All Stacks
+
+```bash
+cf update --all
+# Only recreates containers if images changed
+```
+
+## Next Steps
+
+- [Configuration Reference](configuration.md) - All config options
+- [Commands Reference](commands.md) - Full CLI documentation
+- [Traefik Integration](traefik.md) - Multi-host routing
+- [Best Practices](best-practices.md) - Tips and limitations
--- a/docs/index.md
+++ b/docs/index.md
@@ -0,0 +1,167 @@
+---
+icon: lucide/server
+---
+
+# Compose Farm
+
+A minimal CLI tool to run Docker Compose commands across multiple hosts via SSH.
+
+## What is Compose Farm?
+
+Compose Farm lets you manage Docker Compose stacks across multiple machines from a single command line. Think [Dockge](https://dockge.kuma.pet/) but with a CLI and web interface, designed for multi-host deployments.
+
+Define which stacks run where in one YAML file, then use `cf apply` to make reality match your configuration.
+It also works great on a single host with one folder per stack; just map stacks to `localhost`.
+
+## Quick Demo
+
+**CLI:**
+<video autoplay loop muted playsinline>
+  <source src="/assets/quickstart.webm" type="video/webm">
+</video>
+
+**[Web UI](web-ui.md):**
+<video autoplay loop muted playsinline>
+  <source src="/assets/web-workflow.webm" type="video/webm">
+</video>
+
+## Why Compose Farm?
+
+| Problem | Compose Farm Solution |
+|---------|----------------------|
+| 100+ containers on one machine | Distribute across multiple hosts |
+| Kubernetes too complex | Just SSH + docker compose |
+| Swarm in maintenance mode | Zero infrastructure changes |
+| Manual SSH for each host | Single command for all |
+
+**It's a convenience wrapper, not a new paradigm.** Your existing `docker-compose.yml` files work unchanged.
+
+## Quick Start
+
+### Single host
+
+No SSH, shared storage, or Traefik file-provider required.
+
+```yaml
+# compose-farm.yaml
+compose_dir: /opt/stacks
+
+hosts:
+  local: localhost
+
+stacks:
+  plex: local
+  jellyfin: local
+  traefik: local
+```
+
+```bash
+cf apply  # Start/stop stacks to match config
+```
+
+### Multi-host
+
+Requires SSH plus a shared `compose_dir` path on all hosts (NFS or sync).
+
+```yaml
+# compose-farm.yaml
+compose_dir: /opt/compose
+
+hosts:
+  server-1:
+    address: 192.168.1.10
+  server-2:
+    address: 192.168.1.11
+
+stacks:
+  plex: server-1
+  jellyfin: server-2
+  grafana: server-1
+```
+
+```bash
+cf apply  # Stacks start, migrate, or stop as needed
+```
+
+Each entry in `stacks:` maps to a folder under `compose_dir` that contains a compose file.
+
+For cross-host HTTP routing, add Traefik labels and configure `traefik_file` to generate file-provider config.
+### Installation
+
+```bash
+uv tool install compose-farm
+# or
+pip install compose-farm
+```
+
+### Configuration
+
+Create `compose-farm.yaml` in the directory where you'll run commands (e.g., `/opt/stacks`), or in `~/.config/compose-farm/`:
+
+```yaml
+compose_dir: /opt/compose
+
+hosts:
+  nuc:
+    address: 192.168.1.10
+    user: docker
+  hp:
+    address: 192.168.1.11
+
+stacks:
+  plex: nuc
+  grafana: nuc
+  nextcloud: hp
+```
+
+See [Configuration](configuration.md) for all options and the full search order.
+
+### Usage
+
+```bash
+# Make reality match config
+cf apply
+
+# Start specific stacks
+cf up plex grafana
+
+# Check status
+cf ps
+
+# View logs
+cf logs -f plex
+```
+
+## Key Features
+
+- **Declarative configuration**: One YAML defines where everything runs
+- **Auto-migration**: Change a host assignment, run `cf up`, stack moves automatically
+
+<video autoplay loop muted playsinline>
+  <source src="/assets/migration.webm" type="video/webm">
+</video>
+- **Parallel execution**: Multiple stacks start/stop concurrently
+- **State tracking**: Knows which stacks are running where
+- **Traefik integration**: Generate file-provider config for cross-host routing
+- **Zero changes**: Your compose files work as-is
+
+## Requirements
+
+- [uv](https://docs.astral.sh/uv/) (recommended) or Python 3.11+
+- SSH key-based authentication to your Docker hosts
+- Docker and Docker Compose on all target hosts
+- Shared storage (compose files at same path on all hosts)
+
+## Documentation
+
+- [Getting Started](getting-started.md) - Installation and first steps
+- [Configuration](configuration.md) - All configuration options
+- [Commands](commands.md) - CLI reference
+- [Web UI](web-ui.md) - Browser-based management interface
+- [Architecture](architecture.md) - How it works under the hood
+- [Traefik Integration](traefik.md) - Multi-host routing setup
+- [Best Practices](best-practices.md) - Tips and limitations
+
+## License
+
+MIT
--- a/docs/install
+++ b/docs/install
@@ -0,0 +1,29 @@
+#!/bin/sh
+# Compose Farm bootstrap script
+# Usage: curl -fsSL https://compose-farm.nijho.lt/install | sh
+#
+# This script installs uv (if needed) and then installs compose-farm as a uv tool.
+
+set -e
+
+if ! command -v uv >/dev/null 2>&1; then
+    echo "uv is not installed. Installing..."
+    curl -LsSf https://astral.sh/uv/install.sh | sh
+    echo "uv installation complete!"
+    echo ""
+
+    if [ -x ~/.local/bin/uv ]; then
+        ~/.local/bin/uv tool install compose-farm
+    else
+        echo "Please restart your shell and run this script again"
+        echo ""
+        exit 0
+    fi
+else
+    uv tool install compose-farm
+fi
+
+echo ""
+echo "compose-farm is installed!"
+echo "Run 'cf --help' to get started."
+echo "If 'cf' is not found, restart your shell or run: source ~/.bashrc"
--- a/docs/javascripts/video-fix.js
+++ b/docs/javascripts/video-fix.js
@@ -0,0 +1,21 @@
+// Fix Safari video autoplay issues
+(function() {
+  function initVideos() {
+    document.querySelectorAll('video[autoplay]').forEach(function(video) {
+      video.load();
+      video.play().catch(function() {});
+    });
+  }
+
+  // For initial page load (needed for Chrome)
+  if (document.readyState === 'loading') {
+    document.addEventListener('DOMContentLoaded', initVideos);
+  } else {
+    initVideos();
+  }
+
+  // For MkDocs instant navigation (needed for Safari)
+  if (typeof document$ !== 'undefined') {
+    document$.subscribe(initVideos);
+  }
+})();
--- a/docs/overrides/partials/integrations/analytics/custom.html
+++ b/docs/overrides/partials/integrations/analytics/custom.html
@@ -0,0 +1,6 @@
+<!-- Privacy-friendly analytics by Plausible -->
+<script async src="https://plausible.nijho.lt/js/pa-NRX7MolONWKTUREJpAjkB.js"></script>
+<script>
+  window.plausible=window.plausible||function(){(plausible.q=plausible.q||[]).push(arguments)},plausible.init=plausible.init||function(i){plausible.o=i||{}};
+  plausible.init()
+</script>
--- a/docs/reddit-post.md
+++ b/docs/reddit-post.md
@@ -5,7 +5,7 @@
 - I made a CLI to run Docker Compose across multiple hosts without Kubernetes or Swarm
 ---

-I've been running 100+ Docker Compose stacks on a single machine, and it kept running out of memory. I needed to spread services across multiple hosts, but:
+I've been running 100+ Docker Compose stacks on a single machine, and it kept running out of memory. I needed to spread stacks across multiple hosts, but:

 - **Kubernetes** felt like overkill. I don't need pods, ingress controllers, or 10x more YAML.
 - **Docker Swarm** is basically in maintenance mode.
@@ -15,7 +15,7 @@ So I built **Compose Farm**, a simple CLI that runs `docker compose` commands ov

 ## How it works

-One YAML file maps services to hosts:
+One YAML file maps stacks to hosts:

 ```yaml
 compose_dir: /opt/stacks
@@ -24,11 +24,11 @@ hosts:
  nuc: 192.168.1.10
  hp: 192.168.1.11

-services:
+stacks:
  plex: nuc
  jellyfin: hp
-  sonarr: nuc
-  radarr: nuc
+  grafana: nuc
+  nextcloud: nuc
 ```

 Then just:
@@ -43,7 +43,7 @@ cf ps             # shows status across all hosts

 ## Auto-migration

-Change a service's host in the config and run `cf up`. It stops the service on the old host and starts it on the new one. No manual SSH needed.
+Change a stack's host in the config and run `cf up`. It stops the stack on the old host and starts it on the new one. No manual SSH needed.

 ```yaml
 # Before
@@ -65,7 +65,7 @@ cf up plex  # migrates automatically

 ## What it doesn't do

- No high availability (if a host goes down, services don't auto-migrate)
+- No high availability (if a host goes down, stacks don't auto-migrate)
 - No overlay networking (containers on different hosts can't talk via Docker DNS)
 - No health checks or automatic restarts

--- a/docs/traefik.md
+++ b/docs/traefik.md
@@ -0,0 +1,384 @@
+---
+icon: lucide/globe
+---
+
+# Traefik Integration
+
+Compose Farm can generate Traefik file-provider configuration for routing traffic across multiple hosts.
+
+## The Problem
+
+When you run Traefik on one host but stacks on others, Traefik's docker provider can't see remote containers. The file provider bridges this gap.
+
+```
+                    Internet
+                        │
+                        ▼
+┌─────────────────────────────────────────────────────────────┐
+│                     Host: nuc                                │
+│                                                             │
+│  ┌─────────┐                                                │
+│  │ Traefik │◄─── Docker provider sees local containers      │
+│  │         │                                                │
+│  │         │◄─── File provider sees remote stacks           │
+│  └────┬────┘     (from compose-farm.yml)                    │
+│       │                                                     │
+└───────┼─────────────────────────────────────────────────────┘
+        │
+        ├────────────────────┐
+        │                    │
+        ▼                    ▼
+┌───────────────┐    ┌───────────────┐
+│   Host: hp    │    │  Host: nas    │
+│               │    │               │
+│  plex:32400   │    │ jellyfin:8096 │
+└───────────────┘    └───────────────┘
+```
+
+## How It Works
+
+1. Your compose files have standard Traefik labels
+2. Compose Farm reads labels and generates file-provider config
+3. Traefik watches the generated file
+4. Traffic routes to remote stacks via host IP + published port
+
+## Setup
+
+### Step 1: Configure Traefik File Provider
+
+Add directory watching to your Traefik config:
+
+```yaml
+# traefik.yml or docker-compose.yml command
+providers:
+  file:
+    directory: /opt/traefik/dynamic.d
+    watch: true
+```
+
+Or via command line:
+
+```yaml
+services:
+  traefik:
+    command:
+      - --providers.file.directory=/dynamic.d
+      - --providers.file.watch=true
+    volumes:
+      - /opt/traefik/dynamic.d:/dynamic.d:ro
+```
+
+### Step 2: Add Traefik Labels to Services
+
+Your compose files use standard Traefik labels:
+
+```yaml
+# /opt/compose/plex/docker-compose.yml
+services:
+  plex:
+    image: lscr.io/linuxserver/plex
+    ports:
+      - "32400:32400"  # IMPORTANT: Must publish port!
+    labels:
+      - traefik.enable=true
+      - traefik.http.routers.plex.rule=Host(`plex.example.com`)
+      - traefik.http.routers.plex.entrypoints=websecure
+      - traefik.http.routers.plex.tls.certresolver=letsencrypt
+      - traefik.http.services.plex.loadbalancer.server.port=32400
+```
+
+**Important:** Services must publish ports for cross-host routing. Traefik connects via `host_ip:published_port`.
+
+### Step 3: Generate File Provider Config
+
+```bash
+cf traefik-file --all -o /opt/traefik/dynamic.d/compose-farm.yml
+```
+
+This generates:
+
+```yaml
+# /opt/traefik/dynamic.d/compose-farm.yml
+http:
+  routers:
+    plex:
+      rule: Host(`plex.example.com`)
+      entryPoints:
+        - websecure
+      tls:
+        certResolver: letsencrypt
+      service: plex
+  services:
+    plex:
+      loadBalancer:
+        servers:
+          - url: http://192.168.1.11:32400
+```
+
+## Auto-Regeneration
+
+Configure automatic regeneration in `compose-farm.yaml`:
+
+```yaml
+compose_dir: /opt/compose
+traefik_file: /opt/traefik/dynamic.d/compose-farm.yml
+traefik_stack: traefik
+
+hosts:
+  nuc:
+    address: 192.168.1.10
+  hp:
+    address: 192.168.1.11
+
+stacks:
+  traefik: nuc      # Traefik runs here
+  plex: hp          # Routed via file-provider
+  grafana: hp
+```
+
+With `traefik_file` set, these commands auto-regenerate the config:
+- `cf up`
+- `cf down`
+- `cf update`
+- `cf apply`
+
+### traefik_stack Option
+
+When set, stacks on the **same host as Traefik** are skipped in file-provider output. Traefik's docker provider handles them directly.
+
+```yaml
+traefik_stack: traefik  # traefik runs on nuc
+stacks:
+  traefik: nuc            # NOT in file-provider (docker provider)
+  portainer: nuc          # NOT in file-provider (docker provider)
+  plex: hp                # IN file-provider (cross-host)
+```
+
+## Label Syntax
+
+### Routers
+
+```yaml
+labels:
+  # Basic router
+  - traefik.http.routers.myapp.rule=Host(`app.example.com`)
+  - traefik.http.routers.myapp.entrypoints=websecure
+
+  # With TLS
+  - traefik.http.routers.myapp.tls=true
+  - traefik.http.routers.myapp.tls.certresolver=letsencrypt
+
+  # With middleware
+  - traefik.http.routers.myapp.middlewares=auth@file
+```
+
+### Services
+
+```yaml
+labels:
+  # Load balancer port
+  - traefik.http.services.myapp.loadbalancer.server.port=8080
+
+  # Health check
+  - traefik.http.services.myapp.loadbalancer.healthcheck.path=/health
+```
+
+### Middlewares
+
+Middlewares should be defined in a separate file (not generated by Compose Farm):
+
+```yaml
+# /opt/traefik/dynamic.d/middlewares.yml
+http:
+  middlewares:
+    auth:
+      basicAuth:
+        users:
+          - "user:$apr1$..."
+```
+
+Reference in labels:
+
+```yaml
+labels:
+  - traefik.http.routers.myapp.middlewares=auth@file
+```
+
+## Variable Substitution
+
+Labels can use environment variables:
+
+```yaml
+labels:
+  - traefik.http.routers.myapp.rule=Host(`${DOMAIN}`)
+```
+
+Compose Farm resolves variables from:
+1. Stack's `.env` file
+2. Current environment
+
+```bash
+# /opt/compose/myapp/.env
+DOMAIN=app.example.com
+```
+
+## Port Resolution
+
+Compose Farm determines the target URL from published ports:
+
+```yaml
+ports:
+  - "8080:80"           # Uses 8080
+  - "192.168.1.11:8080:80"  # Uses 8080 on specific IP
+```
+
+If no suitable port is found, a warning is shown.
+
+## Complete Example
+
+### compose-farm.yaml
+
+```yaml
+compose_dir: /opt/compose
+traefik_file: /opt/traefik/dynamic.d/compose-farm.yml
+traefik_stack: traefik
+
+hosts:
+  nuc:
+    address: 192.168.1.10
+  hp:
+    address: 192.168.1.11
+  nas:
+    address: 192.168.1.100
+
+stacks:
+  traefik: nuc
+  plex: hp
+  jellyfin: nas
+  grafana: nuc
+  nextcloud: nuc
+```
+
+### /opt/compose/plex/docker-compose.yml
+
+```yaml
+services:
+  plex:
+    image: lscr.io/linuxserver/plex
+    container_name: plex
+    ports:
+      - "32400:32400"
+    labels:
+      - traefik.enable=true
+      - traefik.http.routers.plex.rule=Host(`plex.example.com`)
+      - traefik.http.routers.plex.entrypoints=websecure
+      - traefik.http.routers.plex.tls.certresolver=letsencrypt
+      - traefik.http.services.plex.loadbalancer.server.port=32400
+    # ... other config
+```
+
+### Generated compose-farm.yml
+
+```yaml
+http:
+  routers:
+    plex:
+      rule: Host(`plex.example.com`)
+      entryPoints:
+        - websecure
+      tls:
+        certResolver: letsencrypt
+      service: plex
+    jellyfin:
+      rule: Host(`jellyfin.example.com`)
+      entryPoints:
+        - websecure
+      tls:
+        certResolver: letsencrypt
+      service: jellyfin
+
+  services:
+    plex:
+      loadBalancer:
+        servers:
+          - url: http://192.168.1.11:32400
+    jellyfin:
+      loadBalancer:
+        servers:
+          - url: http://192.168.1.100:8096
+```
+
+Note: `grafana` and `nextcloud` are NOT in the file because they're on the same host as Traefik (`nuc`).
+
+## Combining with Existing Config
+
+If you have existing Traefik dynamic config:
+
+```bash
+# Move existing config to directory
+mkdir -p /opt/traefik/dynamic.d
+mv /opt/traefik/dynamic.yml /opt/traefik/dynamic.d/manual.yml
+
+# Generate Compose Farm config
+cf traefik-file --all -o /opt/traefik/dynamic.d/compose-farm.yml
+
+# Update Traefik to watch directory
+# --providers.file.directory=/dynamic.d
+```
+
+Traefik merges all YAML files in the directory.
+
+## Troubleshooting
+
+### Stack Not Accessible
+
+1. **Check port is published:**
+   ```yaml
+   ports:
+     - "8080:80"  # Must be published, not just exposed
+   ```
+
+2. **Check label syntax:**
+   ```bash
+   cf check mystack
+   ```
+
+3. **Verify generated config:**
+   ```bash
+   cf traefik-file mystack
+   ```
+
+4. **Check Traefik logs:**
+   ```bash
+   docker logs traefik
+   ```
+
+### Config Not Regenerating
+
+1. **Verify traefik_file is set:**
+   ```bash
+   cf config show | grep traefik
+   ```
+
+2. **Check file permissions:**
+   ```bash
+   ls -la /opt/traefik/dynamic.d/
+   ```
+
+3. **Manually regenerate:**
+   ```bash
+   cf traefik-file --all -o /opt/traefik/dynamic.d/compose-farm.yml
+   ```
+
+### Variable Not Resolved
+
+1. **Check .env file exists:**
+   ```bash
+   cat /opt/compose/myservice/.env
+   ```
+
+2. **Test variable resolution:**
+   ```bash
+   cd /opt/compose/myservice
+   docker compose config
+   ```
--- a/docs/web-ui.md
+++ b/docs/web-ui.md
@@ -0,0 +1,154 @@
+---
+icon: lucide/layout-dashboard
+---
+
+# Web UI
+
+Compose Farm includes a web interface for managing stacks from your browser. Start it with:
+
+```bash
+cf web
+```
+
+Then open [http://localhost:8000](http://localhost:8000).
+
+## Features
+
+### Full Workflow
+
+Console terminal, config editor, stack navigation, actions (up, logs, update), dashboard overview, and theme switching - all in one flow.
+
+<video autoplay loop muted playsinline>
+  <source src="/assets/web-workflow.webm" type="video/webm">
+</video>
+
+### Stack Actions
+
+Navigate to any stack and use the command palette to trigger actions like restart, pull, update, or view logs. Output streams in real-time via WebSocket.
+
+<video autoplay loop muted playsinline>
+  <source src="/assets/web-stack.webm" type="video/webm">
+</video>
+
+### Theme Switching
+
+35 themes available via the command palette. Type `theme:` to filter, then use arrow keys to preview themes live before selecting.
+
+<video autoplay loop muted playsinline>
+  <source src="/assets/web-themes.webm" type="video/webm">
+</video>
+
+### Command Palette
+
+Press `Ctrl+K` (or `Cmd+K` on macOS) to open the command palette. Use fuzzy search to quickly navigate, trigger actions, or change themes.
+
+<video autoplay loop muted playsinline>
+  <source src="/assets/web-navigation.webm" type="video/webm">
+</video>
+
+## Pages
+
+### Dashboard (`/`)
+
+- Stack overview with status indicators
+- Host statistics (CPU, memory, disk, load via Glances)
+- Pending operations (migrations, orphaned stacks)
+- Quick actions via command palette
+
+### Live Stats (`/live-stats`)
+
+Real-time container monitoring across all hosts, powered by [Glances](https://nicolargo.github.io/glances/).
+
+- **Live metrics**: CPU, memory, network I/O for every container
+- **Auto-refresh**: Updates every 3 seconds (pauses when dropdown menus are open)
+- **Filtering**: Type to filter containers by name, stack, host, or image
+- **Sorting**: Click column headers to sort by any metric
+- **Update detection**: Shows when container images have updates available
+
+<video autoplay loop muted playsinline>
+  <source src="/assets/web-live_stats.webm" type="video/webm">
+</video>
+
+#### Requirements
+
+Live Stats requires Glances to be deployed on all hosts:
+
+1. Add `glances_stack: glances` to your `compose-farm.yaml`
+2. Deploy a Glances stack that runs on all hosts (see [example](https://github.com/basnijholt/compose-farm/tree/main/examples/glances))
+3. Glances must expose its REST API on port 61208
+
+### Stack Detail (`/stack/{name}`)
+
+- Compose file editor (Monaco)
+- Environment file editor
+- Action buttons: Up, Down, Restart, Update, Pull, Logs
+- Container shell access (exec into running containers)
+- Terminal output for running commands
+
+Files are automatically backed up before saving to `~/.config/compose-farm/backups/`.
+
+### Console (`/console`)
+
+- Full shell access to any host
+- File editor for remote files
+- Monaco editor with syntax highlighting
+
+<video autoplay loop muted playsinline>
+  <source src="/assets/web-console.webm" type="video/webm">
+</video>
+
+### Container Shell
+
+Click the Shell button on any running container to exec into it directly from the browser.
+
+<video autoplay loop muted playsinline>
+  <source src="/assets/web-shell.webm" type="video/webm">
+</video>
+
+## Keyboard Shortcuts
+
+| Shortcut | Action |
+|----------|--------|
+| `Ctrl+K` / `Cmd+K` | Open command palette |
+| `Ctrl+S` / `Cmd+S` | Save editors |
+| `Escape` | Close command palette |
+| `Arrow keys` | Navigate command list |
+| `Enter` | Execute selected command |
+
+## Starting the Server
+
+```bash
+# Default: http://0.0.0.0:8000
+cf web
+
+# Custom port
+cf web --port 3000
+
+# Development mode with auto-reload
+cf web --reload
+
+# Bind to specific interface
+cf web --host 127.0.0.1
+```
+
+## Requirements
+
+The web UI requires additional dependencies:
+
+```bash
+# If installed via pip
+pip install compose-farm[web]
+
+# If installed via uv
+uv tool install 'compose-farm[web]'
+```
+
+## Architecture
+
+The web UI uses:
+
+- **FastAPI** - Backend API and WebSocket handling
+- **HTMX** - Dynamic page updates without full reloads
+- **DaisyUI + Tailwind** - Theming and styling
+- **Monaco Editor** - Code editing for compose/env files
+- **xterm.js** - Terminal emulation for logs and shell access
--- a/examples/README.md
+++ b/examples/README.md
@@ -2,21 +2,22 @@

 Real-world examples demonstrating compose-farm patterns for multi-host Docker deployments.

-## Services
+## Stacks

-| Service | Type | Demonstrates |
+| Stack | Type | Demonstrates |
 |---------|------|--------------|
 | [traefik](traefik/) | Infrastructure | Reverse proxy, Let's Encrypt, file-provider |
+| [coredns](coredns/) | Infrastructure | Wildcard DNS for `*.local` domains |
 | [mealie](mealie/) | Single container | Traefik labels, resource limits, environment vars |
 | [uptime-kuma](uptime-kuma/) | Single container | Docker socket, user mapping, custom DNS |
-| [paperless-ngx](paperless-ngx/) | Multi-container | Redis + App stack (SQLite) |
+| [paperless-ngx](paperless-ngx/) | Multi-container | Redis + PostgreSQL + App stack |
 | [autokuma](autokuma/) | Multi-host | Demonstrates `all` keyword (runs on every host) |

 ## Key Patterns

 ### External Network

-All services connect to a shared external network for inter-service communication:
+All stacks connect to a shared external network for inter-service communication:

 ```yaml
 networks:
@@ -32,12 +33,12 @@ compose-farm init-network --network mynetwork --subnet 172.20.0.0/16

 ### Traefik Labels (Dual Routes)

-Services expose two routes for different access patterns:
+Stacks expose two routes for different access patterns:

 1. **HTTPS route** (`websecure` entrypoint): For your custom domain with Let's Encrypt TLS
 2. **HTTP route** (`web` entrypoint): For `.local` domains on your LAN (no TLS needed)

-This pattern allows accessing services via:
+This pattern allows accessing stacks via:
 - `https://mealie.example.com` - from anywhere, with TLS
 - `http://mealie.local` - from your local network, no TLS overhead

@@ -53,11 +54,12 @@ labels:
  - traefik.http.routers.myapp-local.entrypoints=web
 ```

-> **Note:** `.local` domains require local DNS (e.g., Pi-hole, Technitium) to resolve to your Traefik host.
+> **Note:** `.local` domains require local DNS to resolve to your Traefik host.
+> The [coredns](coredns/) example provides this - edit `Corefile` to set your Traefik IP.

 ### Environment Variables

-Each service has a `.env` file for secrets and domain configuration.
+Each stack has a `.env` file for secrets and domain configuration.
 Edit these files to set your domain and credentials:

 ```bash
@@ -76,38 +78,21 @@ volumes:
  - /mnt/data/myapp:/app/data
 ```

-This allows services to migrate between hosts without data loss.
+This allows stacks to migrate between hosts without data loss.

-### Multi-Host Services
+### Multi-Host Stacks

-Services that need to run on every host (e.g., monitoring agents):
+Stacks that need to run on every host (e.g., monitoring agents):

 ```yaml
 # In compose-farm.yaml
-services:
+stacks:
  autokuma: all  # Runs on every configured host
 ```

-### Multi-Container Stacks
-
-Database-backed apps with multiple services:
-
-```yaml
-services:
-  redis:
-    image: redis:7
-  app:
-    depends_on:
-      - redis
-```
-
-> **NFS + PostgreSQL Warning:** PostgreSQL should NOT run on NFS storage due to
-> fsync and file locking issues. Use SQLite (safe for single-writer on NFS) or
-> keep PostgreSQL data on local volumes (non-migratable).
-
 ### AutoKuma Labels (Optional)

-The autokuma example demonstrates compose-farm's **multi-host feature** - running the same service on all hosts using the `all` keyword. AutoKuma itself is not part of compose-farm; it's just a good example because it needs to run on every host to monitor local Docker containers.
+The autokuma example demonstrates compose-farm's **multi-host feature** - running the same stack on all hosts using the `all` keyword. AutoKuma itself is not part of compose-farm; it's just a good example because it needs to run on every host to monitor local Docker containers.

 [AutoKuma](https://github.com/BigBoot/AutoKuma) automatically creates Uptime Kuma monitors from Docker labels:

@@ -125,10 +110,10 @@ cd examples
 # 1. Create the shared network on all hosts
 compose-farm init-network

-# 2. Start Traefik first (the reverse proxy)
-compose-farm up traefik
+# 2. Start infrastructure (reverse proxy + DNS)
+compose-farm up traefik coredns

-# 3. Start other services
+# 3. Start other stacks
 compose-farm up mealie uptime-kuma

 # 4. Check status
@@ -148,24 +133,24 @@ compose-farm down --all

 The `compose-farm.yaml` shows a multi-host setup:

- **primary** (192.168.1.10): Runs Traefik and heavy services
- **secondary** (192.168.1.11): Runs lighter services
+- **primary** (192.168.1.10): Runs Traefik and heavy stacks
+- **secondary** (192.168.1.11): Runs lighter stacks
 - **autokuma**: Runs on ALL hosts to monitor local containers

-When Traefik runs on `primary` and a service runs on `secondary`, compose-farm
+When Traefik runs on `primary` and a stack runs on `secondary`, compose-farm
 automatically generates file-provider config so Traefik can route to it.

 ## Traefik File-Provider

-When services run on different hosts than Traefik, use `traefik-file` to generate routing config:
+When stacks run on different hosts than Traefik, use `traefik-file` to generate routing config:

 ```bash
-# Generate config for all services
+# Generate config for all stacks
 compose-farm traefik-file --all -o traefik/dynamic.d/compose-farm.yml

 # Or configure auto-generation in compose-farm.yaml:
 traefik_file: /opt/stacks/traefik/dynamic.d/compose-farm.yml
-traefik_service: traefik
+traefik_stack: traefik
 ```

-With `traefik_file` configured, compose-farm automatically regenerates the config after `up`, `down`, `restart`, and `update` commands.
+With `traefik_file` configured, compose-farm automatically regenerates the config after `up`, `down`, and `update` commands.
--- a/examples/compose-farm-state.yaml
+++ b/examples/compose-farm-state.yaml
@@ -3,6 +3,7 @@ deployed:
  - primary
  - secondary
  - local
+  coredns: primary
  mealie: secondary
  paperless-ngx: primary
  traefik: primary
--- a/examples/compose-farm.yaml
+++ b/examples/compose-farm.yaml
@@ -5,36 +5,37 @@

 compose_dir: /opt/stacks/compose-farm/examples

-# Auto-regenerate Traefik file-provider config after up/down/restart/update
+# Auto-regenerate Traefik file-provider config after up/down/update
 traefik_file: /opt/stacks/compose-farm/examples/traefik/dynamic.d/compose-farm.yml
-traefik_service: traefik  # Skip Traefik's host in file-provider (docker provider handles it)
+traefik_stack: traefik  # Skip Traefik's host in file-provider (docker provider handles it)

 hosts:
-  # Primary server - runs Traefik and most services
+  # Primary server - runs Traefik and most stacks
  # Full form with all options
  primary:
    address: 192.168.1.10
    user: deploy
    port: 22

-  # Secondary server - runs some services for load distribution
+  # Secondary server - runs some stacks for load distribution
  # Short form (user defaults to current user, port defaults to 22)
  secondary: 192.168.1.11

  # Local execution (no SSH) - for testing or when running on the host itself
  local: localhost

-services:
+stacks:
  # Infrastructure (runs on primary where Traefik is)
  traefik: primary
+  coredns: primary  # DNS for *.local resolution

-  # Multi-host services (runs on ALL hosts)
+  # Multi-host stacks (runs on ALL hosts)
  # AutoKuma monitors Docker containers on each host
  autokuma: all

-  # Primary server services
+  # Primary server stacks
  paperless-ngx: primary

-  # Secondary server services (distributed for performance)
+  # Secondary server stacks (distributed for performance)
  mealie: secondary
  uptime-kuma: secondary
--- a/examples/coredns/.env
+++ b/examples/coredns/.env
@@ -0,0 +1,2 @@
+# CoreDNS doesn't need environment variables
+# The Traefik IP is configured in the Corefile
--- a/examples/coredns/Corefile
+++ b/examples/coredns/Corefile
@@ -0,0 +1,22 @@
+# CoreDNS configuration for .local domain resolution
+#
+# Resolves *.local to the Traefik host IP (where your reverse proxy runs).
+# All other queries are forwarded to upstream DNS.
+
+# Handle .local domains - resolve everything to Traefik's host
+local {
+    template IN A {
+        answer "{{ .Name }} 60 IN A 192.168.1.10"
+    }
+    template IN AAAA {
+        # Return empty for AAAA to avoid delays on IPv4-only networks
+        rcode NOERROR
+    }
+}
+
+# Forward everything else to upstream DNS
+. {
+    forward . 1.1.1.1 8.8.8.8
+    cache 300
+    errors
+}
--- a/examples/coredns/compose.yaml
+++ b/examples/coredns/compose.yaml
@@ -0,0 +1,27 @@
+# CoreDNS - DNS server for .local domain resolution
+#
+# Demonstrates:
+# - Wildcard DNS for *.local domains
+# - Config file mounting from stack directory
+# - UDP/TCP port exposure
+#
+# This enables all the .local routes in the examples to work.
+# Point your devices/router DNS to this server's IP.
+name: coredns
+services:
+  coredns:
+    image: coredns/coredns:latest
+    container_name: coredns
+    restart: unless-stopped
+    networks:
+      - mynetwork
+    ports:
+      - "53:53/udp"
+      - "53:53/tcp"
+    volumes:
+      - ./Corefile:/root/Corefile:ro
+    command: -conf /root/Corefile
+
+networks:
+  mynetwork:
+    external: true
--- a/examples/paperless-ngx/.env
+++ b/examples/paperless-ngx/.env
@@ -1,3 +1,4 @@
 # Copy to .env and fill in your values
 DOMAIN=example.com
-PAPERLESS_SECRET_KEY=change-me-to-a-random-string
+POSTGRES_PASSWORD=change-me-to-a-secure-password
+PAPERLESS_SECRET_KEY=change-me-to-a-long-random-string
--- a/examples/paperless-ngx/compose.yaml
+++ b/examples/paperless-ngx/compose.yaml
@@ -1,44 +1,57 @@
 # Paperless-ngx - Document management system
 #
 # Demonstrates:
-# - HTTPS route: paperless.${DOMAIN} (e.g., paperless.example.com) with Let's Encrypt
-# - HTTP route: paperless.local for LAN access without TLS
-# - Multi-container stack (Redis + App with SQLite)
-#
-# NOTE: This example uses SQLite (the default) instead of PostgreSQL.
-# PostgreSQL should NOT be used with NFS storage due to fsync/locking issues.
-# If you need PostgreSQL, use local volumes for the database.
+# - HTTPS route: paperless.${DOMAIN} with Let's Encrypt
+# - HTTP route: paperless.local for LAN access
+# - Multi-container stack (Redis + PostgreSQL + App)
+# - Separate env_file for app-specific settings
 name: paperless-ngx
 services:
-  redis:
-    image: redis:8
+  broker:
+    image: redis:7
    container_name: paperless-redis
    restart: unless-stopped
    networks:
      - mynetwork
    volumes:
-      - /mnt/data/paperless/redis:/data
+      - /mnt/data/paperless/redisdata:/data
+
+  db:
+    image: postgres:16
+    container_name: paperless-db
+    restart: unless-stopped
+    networks:
+      - mynetwork
+    volumes:
+      - /mnt/data/paperless/pgdata:/var/lib/postgresql/data
+    environment:
+      POSTGRES_DB: paperless
+      POSTGRES_USER: paperless
+      POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}

  paperless:
    image: ghcr.io/paperless-ngx/paperless-ngx:latest
    container_name: paperless
    restart: unless-stopped
    depends_on:
-      - redis
+      - db
+      - broker
    networks:
      - mynetwork
    ports:
      - "8000:8000"
    volumes:
-      # SQLite database stored here (safe on NFS for single-writer)
      - /mnt/data/paperless/data:/usr/src/paperless/data
      - /mnt/data/paperless/media:/usr/src/paperless/media
      - /mnt/data/paperless/export:/usr/src/paperless/export
      - /mnt/data/paperless/consume:/usr/src/paperless/consume
    environment:
-      PAPERLESS_REDIS: redis://redis:6379
+      PAPERLESS_REDIS: redis://broker:6379
+      PAPERLESS_DBHOST: db
      PAPERLESS_URL: https://paperless.${DOMAIN}
      PAPERLESS_SECRET_KEY: ${PAPERLESS_SECRET_KEY}
+      PAPERLESS_TIME_ZONE: America/Los_Angeles
+      PAPERLESS_OCR_LANGUAGE: eng
      USERMAP_UID: 1000
      USERMAP_GID: 1000
    labels:
--- a/hatch_build.py
+++ b/hatch_build.py
@@ -1,7 +1,7 @@
 """Hatch build hook to vendor CDN assets for offline use.

 During wheel builds, this hook:
-1. Parses base.html to find elements with data-vendor attributes
+1. Reads vendor-assets.json to find assets marked for vendoring
 2. Downloads each CDN asset to a temporary vendor directory
 3. Rewrites base.html to use local /static/vendor/ paths
 4. Fetches and bundles license information
@@ -13,6 +13,7 @@ distributed wheel has vendored assets.

 from __future__ import annotations

+import json
 import re
 import shutil
 import subprocess
@@ -23,22 +24,6 @@ from urllib.request import Request, urlopen

 from hatchling.builders.hooks.plugin.interface import BuildHookInterface

-# Matches elements with data-vendor attribute: extracts URL and target filename
-# Example: <script src="https://..." data-vendor="htmx.js">
-# Captures: (1) src/href, (2) URL, (3) attributes between, (4) vendor filename
-VENDOR_PATTERN = re.compile(r'(src|href)="(https://[^"]+)"([^>]*?)data-vendor="([^"]+)"')
-
-# License URLs for each package (GitHub raw URLs)
-LICENSE_URLS: dict[str, tuple[str, str]] = {
-    "htmx": ("MIT", "https://raw.githubusercontent.com/bigskysoftware/htmx/master/LICENSE"),
-    "xterm": ("MIT", "https://raw.githubusercontent.com/xtermjs/xterm.js/master/LICENSE"),
-    "daisyui": ("MIT", "https://raw.githubusercontent.com/saadeghi/daisyui/master/LICENSE"),
-    "tailwindcss": (
-        "MIT",
-        "https://raw.githubusercontent.com/tailwindlabs/tailwindcss/master/LICENSE",
-    ),
-}
-

 def _download(url: str) -> bytes:
    """Download a URL, trying urllib first then curl as fallback."""
@@ -61,7 +46,14 @@ def _download(url: str) -> bytes:
    return bytes(result.stdout)


-def _generate_licenses_file(temp_dir: Path) -> None:
+def _load_vendor_assets(root: Path) -> dict[str, Any]:
+    """Load vendor-assets.json from the web module."""
+    json_path = root / "src" / "compose_farm" / "web" / "vendor-assets.json"
+    with json_path.open() as f:
+        return json.load(f)
+
+
+def _generate_licenses_file(temp_dir: Path, licenses: dict[str, dict[str, str]]) -> None:
    """Download and combine license files into LICENSES.txt."""
    lines = [
        "# Vendored Dependencies - License Information",
@@ -73,7 +65,9 @@ def _generate_licenses_file(temp_dir: Path) -> None:
        "",
    ]

-    for pkg_name, (license_type, license_url) in LICENSE_URLS.items():
+    for pkg_name, license_info in licenses.items():
+        license_type = license_info["type"]
+        license_url = license_info["url"]
        lines.append(f"## {pkg_name} ({license_type})")
        lines.append(f"Source: {license_url}")
        lines.append("")
@@ -107,44 +101,57 @@ class VendorAssetsHook(BuildHookInterface):  # type: ignore[misc]
        if not base_html_path.exists():
            return

+        # Load vendor assets configuration
+        vendor_config = _load_vendor_assets(Path(self.root))
+        assets_to_vendor = vendor_config["assets"]
+
+        if not assets_to_vendor:
+            return
+
        # Create temp directory for vendored assets
        temp_dir = Path(tempfile.mkdtemp(prefix="compose_farm_vendor_"))
        vendor_dir = temp_dir / "vendor"
        vendor_dir.mkdir()

-        # Read and parse base.html
+        # Read base.html
        html_content = base_html_path.read_text()
+
+        # Build URL to filename mapping and download assets
        url_to_filename: dict[str, str] = {}
-
-        # Find all elements with data-vendor attribute and download them
-        for match in VENDOR_PATTERN.finditer(html_content):
-            url = match.group(2)
-            filename = match.group(4)
-
-            if url in url_to_filename:
-                continue
-
+        for asset in assets_to_vendor:
+            url = asset["url"]
+            filename = asset["filename"]
            url_to_filename[url] = filename
+            filepath = vendor_dir / filename
+            filepath.parent.mkdir(parents=True, exist_ok=True)
            content = _download(url)
-            (vendor_dir / filename).write_bytes(content)
+            filepath.write_bytes(content)

-        if not url_to_filename:
-            return
+        # Generate LICENSES.txt from the JSON config
+        _generate_licenses_file(vendor_dir, vendor_config["licenses"])

-        # Generate LICENSES.txt
-        _generate_licenses_file(vendor_dir)
+        # Rewrite HTML: replace CDN URLs with local paths and remove data-vendor attributes
+        # Pattern matches: src="URL" ... data-vendor="filename" or href="URL" ... data-vendor="filename"
+        vendor_pattern = re.compile(r'(src|href)="(https://[^"]+)"([^>]*?)data-vendor="([^"]+)"')

-        # Rewrite HTML to use local paths (remove data-vendor, update URL)
        def replace_vendor_tag(match: re.Match[str]) -> str:
            attr = match.group(1)  # src or href
            url = match.group(2)
            between = match.group(3)  # attributes between URL and data-vendor
-            filename = match.group(4)
            if url in url_to_filename:
+                filename = url_to_filename[url]
                return f'{attr}="/static/vendor/{filename}"{between}'
            return match.group(0)

-        modified_html = VENDOR_PATTERN.sub(replace_vendor_tag, html_content)
+        modified_html = vendor_pattern.sub(replace_vendor_tag, html_content)
+
+        # Inject vendored mode flag for JavaScript to detect
+        # Insert right after <head> tag so it's available early
+        modified_html = modified_html.replace(
+            "<head>",
+            "<head>\n    <script>window.CF_VENDORED=true;</script>",
+            1,  # Only replace first occurrence
+        )

        # Write modified base.html to temp
        templates_dir = temp_dir / "templates"
--- a/60
+++ b/60
@@ -0,0 +1,60 @@
+# Compose Farm Development Commands
+# Run `just` to see available commands
+
+# Default: list available commands
+default:
+    @just --list
+
+# Install development dependencies
+install:
+    uv sync --all-extras --dev
+
+# Run all tests (parallel)
+test:
+    uv run pytest -n auto
+
+# Run CLI tests only (parallel, with coverage)
+test-cli:
+    uv run pytest -m "not browser" -n auto
+
+# Run web UI tests only (parallel)
+test-web:
+    uv run pytest -m browser -n auto
+
+# Lint, format, and type check
+lint:
+    uv run ruff check --fix .
+    uv run ruff format .
+    uv run mypy src
+    uv run ty check src
+
+# Start web UI in development mode with auto-reload
+web:
+    uv run cf web --reload --port 9001
+
+# Kill the web server
+kill-web:
+    lsof -ti :9001 | xargs kill -9 2>/dev/null || true
+
+# Build docs and serve locally
+doc:
+    uvx zensical build
+    python -m http.server -d site 9002
+
+# Kill the docs server
+kill-doc:
+    lsof -ti :9002 | xargs kill -9 2>/dev/null || true
+
+# Record CLI demos (all or specific: just record-cli quickstart)
+record-cli *demos:
+    python docs/demos/cli/record.py {{demos}}
+
+# Record web UI demos (all or specific: just record-web navigation)
+record-web *demos:
+    python docs/demos/web/record.py {{demos}}
+
+# Clean up build artifacts and caches
+clean:
+    rm -rf .pytest_cache .mypy_cache .ruff_cache .coverage htmlcov dist build
+    find . -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true
+    find . -type d -name "*.egg-info" -exec rm -rf {} + 2>/dev/null || true
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -30,7 +30,8 @@ classifiers = [
    "Intended Audience :: Developers",
    "Intended Audience :: System Administrators",
    "License :: OSI Approved :: MIT License",
-    "Operating System :: OS Independent",
+    "Operating System :: MacOS",
+    "Operating System :: POSIX :: Linux",
    "Programming Language :: Python :: 3",
    "Programming Language :: Python :: 3.11",
    "Programming Language :: Python :: 3.12",
@@ -46,6 +47,7 @@ dependencies = [
    "asyncssh>=2.14.0",
    "pyyaml>=6.0",
    "rich>=13.0.0",
+    "python-dotenv>=1.0.0",
 ]

 [project.optional-dependencies]
@@ -53,6 +55,7 @@ web = [
    "fastapi[standard]>=0.109.0",
    "jinja2>=3.1.0",
    "websockets>=12.0",
+    "humanize>=4.0.0",
 ]

 [project.urls]
@@ -121,6 +124,10 @@ python_version = "3.11"
 strict = true
 plugins = ["pydantic.mypy"]

+[[tool.mypy.overrides]]
+module = "compose_farm._version"
+ignore_missing_imports = true
+
 [[tool.mypy.overrides]]
 module = "asyncssh.*"
 ignore_missing_imports = true
@@ -133,6 +140,10 @@ disallow_untyped_decorators = false
 module = "compose_farm.web.*"
 disallow_untyped_decorators = false

+[[tool.mypy.overrides]]
+module = "docs.demos.web.*"
+disallow_untyped_decorators = false
+
 [tool.pytest.ini_options]
 asyncio_mode = "auto"
 testpaths = ["tests"]
@@ -145,6 +156,9 @@ addopts = [
    "--no-cov-on-fail",
    "-v",
 ]
+markers = [
+    "browser: marks tests as browser tests (deselect with '-m \"not browser\"')",
+]

 [tool.coverage.run]
 omit = []
@@ -157,9 +171,23 @@ exclude_lines = [
    'if __name__ == "__main__":',
 ]

+[tool.ty.environment]
+python-version = "3.11"
+
+[tool.ty.src]
+exclude = [
+    "hatch_build.py",  # Build-time only, hatchling not in dev deps
+    "docs/demos/**",   # Demo scripts with local conftest imports
+    "src/compose_farm/_version.py",  # Generated at build time
+]
+
+[tool.ty.rules]
+unresolved-import = "ignore"  # _version.py is generated at build time
+
 [dependency-groups]
 dev = [
    "mypy>=1.19.0",
+    "ty>=0.0.1a13",
    "pre-commit>=4.5.0",
    "pytest>=9.0.2",
    "pytest-asyncio>=1.3.0",
@@ -174,4 +202,8 @@ dev = [
    "websockets>=12.0",
    # For FastAPI TestClient
    "httpx>=0.28.0",
+    # For browser tests (use system chromium via nix-shell -p chromium)
+    "pytest-playwright>=0.7.0",
+    # For parallel test execution
+    "pytest-xdist>=3.0.0",
 ]
--- a/src/compose_farm/cli/init.py
+++ b/src/compose_farm/cli/init.py
@@ -8,6 +8,7 @@ from compose_farm.cli import (
    lifecycle,  # noqa: F401
    management,  # noqa: F401
    monitoring,  # noqa: F401
+    ssh,  # noqa: F401
    web,  # noqa: F401
 )

--- a/src/compose_farm/cli/app.py
+++ b/src/compose_farm/cli/app.py
@@ -23,6 +23,7 @@ app = typer.Typer(
    help="Compose Farm - run docker compose commands across multiple hosts",
    no_args_is_help=True,
    context_settings={"help_option_names": ["-h", "--help"]},
+    rich_markup_mode="rich",
 )


--- a/src/compose_farm/cli/common.py
+++ b/src/compose_farm/cli/common.py
@@ -18,7 +18,15 @@ from rich.progress import (
    TimeElapsedColumn,
 )

-from compose_farm.console import console, err_console
+from compose_farm.console import (
+    MSG_HOST_NOT_FOUND,
+    MSG_STACK_NOT_FOUND,
+    console,
+    print_error,
+    print_hint,
+    print_success,
+    print_warning,
+)

 if TYPE_CHECKING:
    from collections.abc import Callable, Coroutine, Generator
@@ -27,16 +35,17 @@ if TYPE_CHECKING:
    from compose_farm.executor import CommandResult

 _T = TypeVar("_T")
+_R = TypeVar("_R")


 # --- Shared CLI Options ---
-ServicesArg = Annotated[
+StacksArg = Annotated[
    list[str] | None,
-    typer.Argument(help="Services to operate on"),
+    typer.Argument(help="Stacks to operate on"),
 ]
 AllOption = Annotated[
    bool,
-    typer.Option("--all", "-a", help="Run on all services"),
+    typer.Option("--all", "-a", help="Run on all stacks"),
 ]
 ConfigOption = Annotated[
    Path | None,
@@ -48,7 +57,11 @@ LogPathOption = Annotated[
 ]
 HostOption = Annotated[
    str | None,
-    typer.Option("--host", "-H", help="Filter to services on this host"),
+    typer.Option("--host", "-H", help="Filter to stacks on this host"),
+]
+ServiceOption = Annotated[
+    str | None,
+    typer.Option("--service", "-s", help="Target a specific service within the stack"),
 ]

 # --- Constants (internal) ---
@@ -56,6 +69,13 @@ _MISSING_PATH_PREVIEW_LIMIT = 2
 _STATS_PREVIEW_LIMIT = 3  # Max number of pending migrations to show by name


+def format_host(host: str | list[str]) -> str:
+    """Format a host value for display."""
+    if isinstance(host, list):
+        return ", ".join(host)
+    return host
+
+
@contextlib.contextmanager
 def progress_bar(
    label: str, total: int, *, initial_description: str = "[dim]connecting...[/]"
@@ -81,6 +101,37 @@ def progress_bar(
        yield progress, task_id


+def run_parallel_with_progress(
+    label: str,
+    items: list[_T],
+    async_fn: Callable[[_T], Coroutine[None, None, _R]],
+) -> list[_R]:
+    """Run async tasks in parallel with a progress bar.
+
+    Args:
+        label: Progress bar label (e.g., "Discovering", "Querying hosts")
+        items: List of items to process
+        async_fn: Async function to call for each item, returns tuple where
+                  first element is used for progress description
+
+    Returns:
+        List of results from async_fn in completion order.
+
+    """
+
+    async def gather() -> list[_R]:
+        with progress_bar(label, len(items)) as (progress, task_id):
+            tasks = [asyncio.create_task(async_fn(item)) for item in items]
+            results: list[_R] = []
+            for coro in asyncio.as_completed(tasks):
+                result = await coro
+                results.append(result)
+                progress.update(task_id, advance=1, description=f"[cyan]{result[0]}[/]")  # type: ignore[index]
+            return results
+
+    return asyncio.run(gather())
+
+
 def load_config_or_exit(config_path: Path | None) -> Config:
    """Load config or exit with a friendly error message."""
    # Lazy import: pydantic adds ~50ms to startup, only load when actually needed
@@ -89,37 +140,65 @@ def load_config_or_exit(config_path: Path | None) -> Config:
    try:
        return load_config(config_path)
    except FileNotFoundError as e:
-        err_console.print(f"[red]✗[/] {e}")
+        print_error(str(e))
+        raise typer.Exit(1) from e
+    except Exception as e:
+        print_error(f"Invalid config: {e}")
        raise typer.Exit(1) from e


-def get_services(
-    services: list[str],
-    all_services: bool,
+def get_stacks(
+    stacks: list[str],
+    all_stacks: bool,
    config_path: Path | None,
+    *,
+    host: str | None = None,
+    default_all: bool = False,
 ) -> tuple[list[str], Config]:
-    """Resolve service list and load config.
+    """Resolve stack list and load config.
+
+    Handles three mutually exclusive selection methods:
+    - Explicit stack names
+    - --all flag
+    - --host filter
+
+    Args:
+        stacks: Explicit stack names
+        all_stacks: Whether --all was specified
+        config_path: Path to config file
+        host: Filter to stacks on this host
+        default_all: If True, default to all stacks when nothing specified (for ps)

    Supports "." as shorthand for the current directory name.
+
    """
+    validate_stack_selection(stacks, all_stacks, host)
    config = load_config_or_exit(config_path)

-    if all_services:
-        return list(config.services.keys()), config
-    if not services:
-        err_console.print("[red]✗[/] Specify services or use --all")
+    if host is not None:
+        validate_hosts(config, host)
+        stack_list = [s for s in config.stacks if host in config.get_hosts(s)]
+        if not stack_list:
+            print_warning(f"No stacks configured for host [magenta]{host}[/]")
+            raise typer.Exit(0)
+        return stack_list, config
+
+    if all_stacks:
+        return list(config.stacks.keys()), config
+
+    if not stacks:
+        if default_all:
+            return list(config.stacks.keys()), config
+        print_error("Specify stacks or use [bold]--all[/] / [bold]--host[/]")
        raise typer.Exit(1)

    # Resolve "." to current directory name
-    resolved = [Path.cwd().name if svc == "." else svc for svc in services]
+    resolved = [Path.cwd().name if stack == "." else stack for stack in stacks]

-    # Validate all services exist in config
-    unknown = [svc for svc in resolved if svc not in config.services]
-    if unknown:
-        for svc in unknown:
-            err_console.print(f"[red]✗[/] Unknown service: [cyan]{svc}[/]")
-        err_console.print("[dim]Hint: Add the service to compose-farm.yaml or use --all[/]")
-        raise typer.Exit(1)
+    # Validate all stacks exist in config
+    validate_stacks(
+        config, resolved, hint="Add the stack to compose-farm.yaml or use [bold]--all[/]"
+    )

    return resolved, config

@@ -143,21 +222,19 @@ def report_results(results: list[CommandResult]) -> None:
        console.print()  # Blank line before summary
        if failed:
            for r in failed:
-                err_console.print(
-                    f"[red]✗[/] [cyan]{r.service}[/] failed with exit code {r.exit_code}"
-                )
+                print_error(f"[cyan]{r.stack}[/] failed with exit code {r.exit_code}")
            console.print()
            console.print(
-                f"[green]✓[/] {len(succeeded)}/{len(results)} services succeeded, "
+                f"[green]✓[/] {len(succeeded)}/{len(results)} stacks succeeded, "
                f"[red]✗[/] {len(failed)} failed"
            )
        else:
-            console.print(f"[green]✓[/] All {len(results)} services succeeded")
+            print_success(f"All {len(results)} stacks succeeded")

    elif failed:
-        # Single service failed
+        # Single stack failed
        r = failed[0]
-        err_console.print(f"[red]✗[/] [cyan]{r.service}[/] failed with exit code {r.exit_code}")
+        print_error(f"[cyan]{r.stack}[/] failed with exit code {r.exit_code}")

    if failed:
        raise typer.Exit(1)
@@ -169,12 +246,12 @@ def maybe_regenerate_traefik(
 ) -> None:
    """Regenerate traefik config if traefik_file is configured.

-    If results are provided, skips regeneration if all services failed.
+    If results are provided, skips regeneration if all stacks failed.
    """
    if cfg.traefik_file is None:
        return

-    # Skip if all services failed
+    # Skip if all stacks failed
    if results and not any(r.success for r in results):
        return

@@ -185,7 +262,7 @@ def maybe_regenerate_traefik(
    )

    try:
-        dynamic, warnings = generate_traefik_config(cfg, list(cfg.services.keys()))
+        dynamic, warnings = generate_traefik_config(cfg, list(cfg.stacks.keys()))
        new_content = render_traefik_config(dynamic)

        # Check if content changed
@@ -197,47 +274,58 @@ def maybe_regenerate_traefik(
            cfg.traefik_file.parent.mkdir(parents=True, exist_ok=True)
            cfg.traefik_file.write_text(new_content)
            console.print()  # Ensure we're on a new line after streaming output
-            console.print(f"[green]✓[/] Traefik config updated: {cfg.traefik_file}")
+            print_success(f"Traefik config updated: {cfg.traefik_file}")

        for warning in warnings:
-            err_console.print(f"[yellow]![/] {warning}")
+            print_warning(warning)
    except (FileNotFoundError, ValueError) as exc:
-        err_console.print(f"[yellow]![/] Failed to update traefik config: {exc}")
+        print_warning(f"Failed to update traefik config: {exc}")


-def validate_host_for_service(cfg: Config, service: str, host: str) -> None:
-    """Validate that a host is valid for a service."""
-    if host not in cfg.hosts:
-        err_console.print(f"[red]✗[/] Host '{host}' not found in config")
+def validate_stacks(cfg: Config, stacks: list[str], *, hint: str | None = None) -> None:
+    """Validate that all stacks exist in config. Exits with error if any not found."""
+    invalid = [s for s in stacks if s not in cfg.stacks]
+    if invalid:
+        for svc in invalid:
+            print_error(MSG_STACK_NOT_FOUND.format(name=svc))
+        if hint:
+            print_hint(hint)
        raise typer.Exit(1)
-    allowed_hosts = cfg.get_hosts(service)
+
+
+def validate_hosts(cfg: Config, hosts: str | list[str]) -> None:
+    """Validate that host(s) exist in config. Exits with error if any not found."""
+    host_list = [hosts] if isinstance(hosts, str) else hosts
+    invalid = [h for h in host_list if h not in cfg.hosts]
+    if invalid:
+        for h in invalid:
+            print_error(MSG_HOST_NOT_FOUND.format(name=h))
+        raise typer.Exit(1)
+
+
+def validate_host_for_stack(cfg: Config, stack: str, host: str) -> None:
+    """Validate that a host is valid for a stack."""
+    validate_hosts(cfg, host)
+    allowed_hosts = cfg.get_hosts(stack)
    if host not in allowed_hosts:
-        err_console.print(
-            f"[red]✗[/] Service '{service}' is not configured for host '{host}' "
+        print_error(
+            f"Stack [cyan]{stack}[/] is not configured for host [magenta]{host}[/] "
            f"(configured: {', '.join(allowed_hosts)})"
        )
        raise typer.Exit(1)


-def run_host_operation(
-    cfg: Config,
-    svc_list: list[str],
-    host: str,
-    command: str,
-    action_verb: str,
-    state_callback: Callable[[Config, str, str], None],
+def validate_stack_selection(
+    stacks: list[str] | None,
+    all_stacks: bool,
+    host: str | None,
 ) -> None:
-    """Run an operation on a specific host for multiple services."""
-    from compose_farm.executor import run_compose_on_host  # noqa: PLC0415
+    """Validate that only one stack selection method is used.

-    results: list[CommandResult] = []
-    for service in svc_list:
-        validate_host_for_service(cfg, service, host)
-        console.print(f"[cyan]\\[{service}][/] {action_verb} on [magenta]{host}[/]...")
-        result = run_async(run_compose_on_host(cfg, service, host, command, raw=True))
-        print()  # Newline after raw output
-        results.append(result)
-        if result.success:
-            state_callback(cfg, service, host)
-    maybe_regenerate_traefik(cfg, results)
-    report_results(results)
+    The three selection methods (explicit stacks, --all, --host) are mutually
+    exclusive. This ensures consistent behavior across all commands.
+    """
+    methods = sum([bool(stacks), all_stacks, host is not None])
+    if methods > 1:
+        print_error("Use only one of: stack names, [bold]--all[/], or [bold]--host[/]")
+        raise typer.Exit(1)
--- a/src/compose_farm/cli/config.py
+++ b/src/compose_farm/cli/config.py
@@ -3,20 +3,22 @@
 from __future__ import annotations

 import os
-import platform
 import shlex
 import shutil
 import subprocess
 from importlib import resources
 from pathlib import Path
-from typing import Annotated
+from typing import TYPE_CHECKING, Annotated

 import typer

 from compose_farm.cli.app import app
-from compose_farm.console import console, err_console
+from compose_farm.console import MSG_CONFIG_NOT_FOUND, console, print_error, print_success
 from compose_farm.paths import config_search_paths, default_config_path, find_config_path

+if TYPE_CHECKING:
+    from compose_farm.config import Config
+
 config_app = typer.Typer(
    name="config",
    help="Manage compose-farm configuration files.",
@@ -40,24 +42,10 @@ _RawOption = Annotated[


 def _get_editor() -> str:
-    """Get the user's preferred editor.
-
-    Checks $EDITOR, then $VISUAL, then falls back to platform defaults.
-    """
-    for env_var in ("EDITOR", "VISUAL"):
-        editor = os.environ.get(env_var)
-        if editor:
-            return editor
-
-    if platform.system() == "Windows":
-        return "notepad"
-
-    # Try common editors on Unix-like systems
-    for editor in ("nano", "vim", "vi"):
-        if shutil.which(editor):
-            return editor
-
-    return "vi"
+    """Get the user's preferred editor ($EDITOR > $VISUAL > platform default)."""
+    if editor := os.environ.get("EDITOR") or os.environ.get("VISUAL"):
+        return editor
+    return next((e for e in ("nano", "vim", "vi") if shutil.which(e)), "vi")


 def _generate_template() -> str:
@@ -66,8 +54,8 @@ def _generate_template() -> str:
        template_file = resources.files("compose_farm") / "example-config.yaml"
        return template_file.read_text(encoding="utf-8")
    except FileNotFoundError as e:
-        err_console.print("[red]Example config template is missing from the package.[/red]")
-        err_console.print("Reinstall compose-farm or report this issue.")
+        print_error("Example config template is missing from the package")
+        console.print("Reinstall compose-farm or report this issue.")
        raise typer.Exit(1) from e


@@ -80,6 +68,35 @@ def _get_config_file(path: Path | None) -> Path | None:
    return config_path.resolve() if config_path else None


+def _load_config_with_path(path: Path | None) -> tuple[Path, Config]:
+    """Load config and return both the resolved path and Config object.
+
+    Exits with error if config not found or invalid.
+    """
+    from compose_farm.cli.common import load_config_or_exit  # noqa: PLC0415
+
+    config_file = _get_config_file(path)
+    if config_file is None:
+        print_error(MSG_CONFIG_NOT_FOUND)
+        raise typer.Exit(1)
+
+    cfg = load_config_or_exit(config_file)
+    return config_file, cfg
+
+
+def _report_missing_config(explicit_path: Path | None = None) -> None:
+    """Report that a config file was not found."""
+    console.print("[yellow]Config file not found.[/yellow]")
+    if explicit_path:
+        console.print(f"\nProvided path does not exist: [cyan]{explicit_path}[/cyan]")
+    else:
+        console.print("\nSearched locations:")
+        for p in config_search_paths():
+            status = "[green]exists[/green]" if p.exists() else "[dim]not found[/dim]"
+            console.print(f"  - {p} ({status})")
+    console.print("\nRun [bold cyan]cf config init[/bold cyan] to create one.")
+
+
@config_app.command("init")
 def config_init(
    path: _PathOption = None,
@@ -107,7 +124,7 @@ def config_init(
    template_content = _generate_template()
    target_path.write_text(template_content, encoding="utf-8")

-    console.print(f"[green]✓[/] Config file created at: {target_path}")
+    print_success(f"Config file created at: {target_path}")
    console.print("\n[dim]Edit the file to customize your settings:[/dim]")
    console.print("  [cyan]cf config edit[/cyan]")

@@ -123,40 +140,34 @@ def config_edit(
    config_file = _get_config_file(path)

    if config_file is None:
-        console.print("[yellow]No config file found.[/yellow]")
-        console.print("\nRun [bold cyan]cf config init[/bold cyan] to create one.")
-        console.print("\nSearched locations:")
-        for p in config_search_paths():
-            console.print(f"  - {p}")
+        _report_missing_config()
        raise typer.Exit(1)

    if not config_file.exists():
-        console.print("[yellow]Config file not found.[/yellow]")
-        console.print(f"\nProvided path does not exist: [cyan]{config_file}[/cyan]")
-        console.print("\nRun [bold cyan]cf config init[/bold cyan] to create one.")
+        _report_missing_config(config_file)
        raise typer.Exit(1)

    editor = _get_editor()
    console.print(f"[dim]Opening {config_file} with {editor}...[/dim]")

    try:
-        editor_cmd = shlex.split(editor, posix=os.name != "nt")
+        editor_cmd = shlex.split(editor)
    except ValueError as e:
-        err_console.print("[red]Invalid editor command. Check $EDITOR/$VISUAL.[/red]")
+        print_error("Invalid editor command. Check [bold]$EDITOR[/]/[bold]$VISUAL[/]")
        raise typer.Exit(1) from e

    if not editor_cmd:
-        err_console.print("[red]Editor command is empty.[/red]")
+        print_error("Editor command is empty")
        raise typer.Exit(1)

    try:
        subprocess.run([*editor_cmd, str(config_file)], check=True)
    except FileNotFoundError:
-        err_console.print(f"[red]Editor '{editor_cmd[0]}' not found.[/red]")
-        err_console.print("Set $EDITOR environment variable to your preferred editor.")
+        print_error(f"Editor [cyan]{editor_cmd[0]}[/] not found")
+        console.print("Set [bold]$EDITOR[/] environment variable to your preferred editor.")
        raise typer.Exit(1) from None
    except subprocess.CalledProcessError as e:
-        err_console.print(f"[red]Editor exited with error code {e.returncode}[/red]")
+        print_error(f"Editor exited with error code {e.returncode}")
        raise typer.Exit(e.returncode) from None


@@ -169,18 +180,11 @@ def config_show(
    config_file = _get_config_file(path)

    if config_file is None:
-        console.print("[yellow]No config file found.[/yellow]")
-        console.print("\nSearched locations:")
-        for p in config_search_paths():
-            status = "[green]exists[/green]" if p.exists() else "[dim]not found[/dim]"
-            console.print(f"  - {p} ({status})")
-        console.print("\nRun [bold cyan]cf config init[/bold cyan] to create one.")
+        _report_missing_config()
        raise typer.Exit(0)

    if not config_file.exists():
-        console.print("[yellow]Config file not found.[/yellow]")
-        console.print(f"\nProvided path does not exist: [cyan]{config_file}[/cyan]")
-        console.print("\nRun [bold cyan]cf config init[/bold cyan] to create one.")
+        _report_missing_config(config_file)
        raise typer.Exit(1)

    content = config_file.read_text(encoding="utf-8")
@@ -207,11 +211,7 @@ def config_path(
    config_file = _get_config_file(path)

    if config_file is None:
-        console.print("[yellow]No config file found.[/yellow]")
-        console.print("\nSearched locations:")
-        for p in config_search_paths():
-            status = "[green]exists[/green]" if p.exists() else "[dim]not found[/dim]"
-            console.print(f"  - {p} ({status})")
+        _report_missing_config()
        raise typer.Exit(1)

    # Just print the path for easy piping
@@ -223,27 +223,11 @@ def config_validate(
    path: _PathOption = None,
 ) -> None:
    """Validate the config file syntax and schema."""
-    config_file = _get_config_file(path)
+    config_file, cfg = _load_config_with_path(path)

-    if config_file is None:
-        err_console.print("[red]✗[/] No config file found")
-        raise typer.Exit(1)
-
-    # Lazy import: pydantic adds ~50ms to startup, only load when actually needed
-    from compose_farm.config import load_config  # noqa: PLC0415
-
-    try:
-        cfg = load_config(config_file)
-    except FileNotFoundError as e:
-        err_console.print(f"[red]✗[/] {e}")
-        raise typer.Exit(1) from e
-    except Exception as e:
-        err_console.print(f"[red]✗[/] Invalid config: {e}")
-        raise typer.Exit(1) from e
-
-    console.print(f"[green]✓[/] Valid config: {config_file}")
+    print_success(f"Valid config: {config_file}")
    console.print(f"  Hosts: {len(cfg.hosts)}")
-    console.print(f"  Services: {len(cfg.services)}")
+    console.print(f"  Stacks: {len(cfg.stacks)}")


@config_app.command("symlink")
@@ -268,11 +252,11 @@ def config_symlink(
    target_path = (target or Path("compose-farm.yaml")).expanduser().resolve()

    if not target_path.exists():
-        err_console.print(f"[red]✗[/] Target config file not found: {target_path}")
+        print_error(f"Target config file not found: {target_path}")
        raise typer.Exit(1)

    if not target_path.is_file():
-        err_console.print(f"[red]✗[/] Target is not a file: {target_path}")
+        print_error(f"Target is not a file: {target_path}")
        raise typer.Exit(1)

    symlink_path = default_config_path()
@@ -282,7 +266,7 @@ def config_symlink(
        if symlink_path.is_symlink():
            current_target = symlink_path.resolve() if symlink_path.exists() else None
            if current_target == target_path:
-                console.print(f"[green]✓[/] Symlink already points to: {target_path}")
+                print_success(f"Symlink already points to: {target_path}")
                return
            # Update existing symlink
            if not force:
@@ -294,8 +278,8 @@ def config_symlink(
            symlink_path.unlink()
        else:
            # Regular file exists
-            err_console.print(f"[red]✗[/] A regular file exists at: {symlink_path}")
-            err_console.print("    Back it up or remove it first, then retry.")
+            print_error(f"A regular file exists at: {symlink_path}")
+            console.print("    Back it up or remove it first, then retry.")
            raise typer.Exit(1)

    # Create parent directories
@@ -304,10 +288,119 @@ def config_symlink(
    # Create symlink with absolute path
    symlink_path.symlink_to(target_path)

-    console.print("[green]✓[/] Created symlink:")
+    print_success("Created symlink:")
    console.print(f"    {symlink_path}")
    console.print(f"    -> {target_path}")


+def _detect_domain(cfg: Config) -> str | None:
+    """Try to detect DOMAIN from traefik Host() rules in existing stacks.
+
+    Uses extract_website_urls from traefik module to get interpolated
+    URLs, then extracts the domain from the first valid URL.
+    Skips local domains (.local, localhost, etc.).
+    """
+    from urllib.parse import urlparse  # noqa: PLC0415
+
+    from compose_farm.traefik import extract_website_urls  # noqa: PLC0415
+
+    max_stacks_to_check = 10
+    min_domain_parts = 2
+    subdomain_parts = 4
+    skip_tlds = {"local", "localhost", "internal", "lan", "home"}
+
+    for stack_name in list(cfg.stacks.keys())[:max_stacks_to_check]:
+        urls = extract_website_urls(cfg, stack_name)
+        for url in urls:
+            host = urlparse(url).netloc
+            parts = host.split(".")
+            # Skip local/internal domains
+            if parts[-1].lower() in skip_tlds:
+                continue
+            if len(parts) >= subdomain_parts:
+                # e.g., "app.lab.nijho.lt" -> "lab.nijho.lt"
+                return ".".join(parts[-3:])
+            if len(parts) >= min_domain_parts:
+                # e.g., "app.example.com" -> "example.com"
+                return ".".join(parts[-2:])
+    return None
+
+
+@config_app.command("init-env")
+def config_init_env(
+    path: _PathOption = None,
+    output: Annotated[
+        Path | None,
+        typer.Option(
+            "--output", "-o", help="Output .env file path. Defaults to .env in current directory."
+        ),
+    ] = None,
+    force: _ForceOption = False,
+) -> None:
+    """Generate a .env file for Docker deployment.
+
+    Reads the compose-farm.yaml config and auto-detects settings:
+
+    - CF_COMPOSE_DIR from compose_dir
+    - CF_UID/GID/HOME/USER from current user
+    - DOMAIN from traefik labels in stacks (if found)
+
+    Example::
+
+        cf config init-env                     # Create .env in current directory
+        cf config init-env -o /path/to/.env    # Create .env at specific path
+
+    """
+    config_file, cfg = _load_config_with_path(path)
+
+    # Determine output path (default: current directory)
+    env_path = output.expanduser().resolve() if output else Path.cwd() / ".env"
+
+    if env_path.exists() and not force:
+        console.print(f"[yellow].env file already exists:[/] {env_path}")
+        if not typer.confirm("Overwrite?"):
+            console.print("[dim]Aborted.[/dim]")
+            raise typer.Exit(0)
+
+    # Auto-detect values
+    uid = os.getuid()
+    gid = os.getgid()
+    home = os.environ.get("HOME", "/root")
+    user = os.environ.get("USER", "root")
+    compose_dir = str(cfg.compose_dir)
+    domain = _detect_domain(cfg)
+
+    # Generate .env content
+    lines = [
+        "# Generated by: cf config init-env",
+        f"# From config: {config_file}",
+        "",
+        "# Domain for Traefik labels",
+        f"DOMAIN={domain or 'example.com'}",
+        "",
+        "# Compose files location",
+        f"CF_COMPOSE_DIR={compose_dir}",
+        "",
+        "# Run as current user (recommended for NFS)",
+        f"CF_UID={uid}",
+        f"CF_GID={gid}",
+        f"CF_HOME={home}",
+        f"CF_USER={user}",
+        "",
+    ]
+
+    env_path.write_text("\n".join(lines), encoding="utf-8")
+
+    print_success(f"Created .env file: {env_path}")
+    console.print()
+    console.print("[dim]Detected settings:[/dim]")
+    console.print(f"  DOMAIN: {domain or '[yellow]example.com[/] (edit this)'}")
+    console.print(f"  CF_COMPOSE_DIR: {compose_dir}")
+    console.print(f"  CF_UID/GID: {uid}:{gid}")
+    console.print()
+    console.print("[dim]Review and edit as needed:[/dim]")
+    console.print(f"  [cyan]$EDITOR {env_path}[/cyan]")
+
+
 # Register config subcommand on the shared app
 app.add_typer(config_app, name="config", rich_help_panel="Configuration")
--- a/src/compose_farm/cli/lifecycle.py
+++ b/src/compose_farm/cli/lifecycle.py
@@ -2,6 +2,8 @@

 from __future__ import annotations

+import shlex
+from pathlib import Path
 from typing import TYPE_CHECKING, Annotated

 import typer
@@ -14,286 +16,421 @@ from compose_farm.cli.common import (
    AllOption,
    ConfigOption,
    HostOption,
-    ServicesArg,
-    get_services,
+    ServiceOption,
+    StacksArg,
+    format_host,
+    get_stacks,
    load_config_or_exit,
    maybe_regenerate_traefik,
    report_results,
    run_async,
-    run_host_operation,
+    validate_host_for_stack,
+    validate_stacks,
+)
+from compose_farm.cli.management import _discover_stacks_full
+from compose_farm.console import MSG_DRY_RUN, console, print_error, print_success
+from compose_farm.executor import run_compose_on_host, run_on_stacks
+from compose_farm.operations import (
+    build_up_cmd,
+    stop_orphaned_stacks,
+    stop_stray_stacks,
+    up_stacks,
 )
-from compose_farm.console import console, err_console
-from compose_farm.executor import run_on_services, run_sequential_on_services
-from compose_farm.operations import stop_orphaned_services, up_services
 from compose_farm.state import (
-    add_service_to_host,
-    get_orphaned_services,
-    get_service_host,
-    get_services_needing_migration,
-    get_services_not_in_state,
-    remove_service,
-    remove_service_from_host,
+    add_stack_host,
+    get_orphaned_stacks,
+    get_stack_host,
+    get_stacks_needing_migration,
+    get_stacks_not_in_state,
+    remove_stack,
 )


@app.command(rich_help_panel="Lifecycle")
 def up(
-    services: ServicesArg = None,
-    all_services: AllOption = False,
+    stacks: StacksArg = None,
+    all_stacks: AllOption = False,
    host: HostOption = None,
+    service: ServiceOption = None,
+    pull: Annotated[
+        bool,
+        typer.Option("--pull", help="Pull images before starting (--pull always)"),
+    ] = False,
+    build: Annotated[
+        bool,
+        typer.Option("--build", help="Build images before starting"),
+    ] = False,
    config: ConfigOption = None,
 ) -> None:
-    """Start services (docker compose up -d). Auto-migrates if host changed."""
-    svc_list, cfg = get_services(services or [], all_services, config)
-
-    # Per-host operation: run on specific host only
-    if host:
-        run_host_operation(cfg, svc_list, host, "up -d", "Starting", add_service_to_host)
-        return
-
-    # Normal operation: use up_services with migration logic
-    results = run_async(up_services(cfg, svc_list, raw=True))
+    """Start stacks (docker compose up -d). Auto-migrates if host changed."""
+    stack_list, cfg = get_stacks(stacks or [], all_stacks, config, host=host)
+    if service:
+        if len(stack_list) != 1:
+            print_error("--service requires exactly one stack")
+            raise typer.Exit(1)
+        # For service-level up, use run_on_stacks directly (no migration logic)
+        results = run_async(
+            run_on_stacks(
+                cfg, stack_list, build_up_cmd(pull=pull, build=build, service=service), raw=True
+            )
+        )
+    elif host:
+        # For host-filtered up, use run_on_stacks to only affect that host
+        # (skips migration logic, which is intended when explicitly specifying a host)
+        results = run_async(
+            run_on_stacks(
+                cfg,
+                stack_list,
+                build_up_cmd(pull=pull, build=build),
+                raw=True,
+                filter_host=host,
+            )
+        )
+        # Update state for successful host-filtered operations
+        for result in results:
+            if result.success:
+                base_stack = result.stack.split("@")[0]
+                add_stack_host(cfg, base_stack, host)
+    else:
+        results = run_async(up_stacks(cfg, stack_list, raw=True, pull=pull, build=build))
    maybe_regenerate_traefik(cfg, results)
    report_results(results)


@app.command(rich_help_panel="Lifecycle")
 def down(
-    services: ServicesArg = None,
-    all_services: AllOption = False,
+    stacks: StacksArg = None,
+    all_stacks: AllOption = False,
    orphaned: Annotated[
        bool,
-        typer.Option(
-            "--orphaned", help="Stop orphaned services (in state but removed from config)"
-        ),
+        typer.Option("--orphaned", help="Stop orphaned stacks (in state but removed from config)"),
    ] = False,
    host: HostOption = None,
    config: ConfigOption = None,
 ) -> None:
-    """Stop services (docker compose down)."""
-    # Handle --orphaned flag
+    """Stop stacks (docker compose down)."""
+    # Handle --orphaned flag (mutually exclusive with other selection methods)
    if orphaned:
-        if services or all_services or host:
-            err_console.print("[red]✗[/] Cannot use --orphaned with services, --all, or --host")
+        if stacks or all_stacks or host:
+            print_error(
+                "Cannot combine [bold]--orphaned[/] with stacks, [bold]--all[/], or [bold]--host[/]"
+            )
            raise typer.Exit(1)

        cfg = load_config_or_exit(config)
-        orphaned_services = get_orphaned_services(cfg)
+        orphaned_stacks = get_orphaned_stacks(cfg)

-        if not orphaned_services:
-            console.print("[green]✓[/] No orphaned services to stop")
+        if not orphaned_stacks:
+            print_success("No orphaned stacks to stop")
            return

        console.print(
-            f"[yellow]Stopping {len(orphaned_services)} orphaned service(s):[/] "
-            f"{', '.join(orphaned_services.keys())}"
+            f"[yellow]Stopping {len(orphaned_stacks)} orphaned stack(s):[/] "
+            f"{', '.join(orphaned_stacks.keys())}"
        )
-        results = run_async(stop_orphaned_services(cfg))
+        results = run_async(stop_orphaned_stacks(cfg))
        report_results(results)
        return

-    svc_list, cfg = get_services(services or [], all_services, config)
+    stack_list, cfg = get_stacks(stacks or [], all_stacks, config, host=host)
+    raw = len(stack_list) == 1
+    results = run_async(run_on_stacks(cfg, stack_list, "down", raw=raw, filter_host=host))

-    # Per-host operation: run on specific host only
-    if host:
-        run_host_operation(cfg, svc_list, host, "down", "Stopping", remove_service_from_host)
-        return
-
-    # Normal operation
-    raw = len(svc_list) == 1
-    results = run_async(run_on_services(cfg, svc_list, "down", raw=raw))
-
-    # Remove from state on success
-    # For multi-host services, result.service is "svc@host", extract base name
-    removed_services: set[str] = set()
+    # Update state on success
+    # For multi-host stacks, result.stack is "stack@host", extract base name
+    updated_stacks: set[str] = set()
    for result in results:
        if result.success:
-            base_service = result.service.split("@")[0]
-            if base_service not in removed_services:
-                remove_service(cfg, base_service)
-                removed_services.add(base_service)
+            base_stack = result.stack.split("@")[0]
+            if base_stack not in updated_stacks:
+                # When host is specified for multi-host stack, removes just that host
+                # Otherwise removes entire stack from state
+                filter_host = host if host and cfg.is_multi_host(base_stack) else None
+                remove_stack(cfg, base_stack, filter_host)
+                updated_stacks.add(base_stack)

    maybe_regenerate_traefik(cfg, results)
    report_results(results)


+@app.command(rich_help_panel="Lifecycle")
+def stop(
+    stacks: StacksArg = None,
+    all_stacks: AllOption = False,
+    service: ServiceOption = None,
+    config: ConfigOption = None,
+) -> None:
+    """Stop services without removing containers (docker compose stop)."""
+    stack_list, cfg = get_stacks(stacks or [], all_stacks, config)
+    if service and len(stack_list) != 1:
+        print_error("--service requires exactly one stack")
+        raise typer.Exit(1)
+    cmd = f"stop {service}" if service else "stop"
+    raw = len(stack_list) == 1
+    results = run_async(run_on_stacks(cfg, stack_list, cmd, raw=raw))
+    report_results(results)
+
+
@app.command(rich_help_panel="Lifecycle")
 def pull(
-    services: ServicesArg = None,
-    all_services: AllOption = False,
+    stacks: StacksArg = None,
+    all_stacks: AllOption = False,
+    service: ServiceOption = None,
    config: ConfigOption = None,
 ) -> None:
    """Pull latest images (docker compose pull)."""
-    svc_list, cfg = get_services(services or [], all_services, config)
-    raw = len(svc_list) == 1
-    results = run_async(run_on_services(cfg, svc_list, "pull", raw=raw))
+    stack_list, cfg = get_stacks(stacks or [], all_stacks, config)
+    if service and len(stack_list) != 1:
+        print_error("--service requires exactly one stack")
+        raise typer.Exit(1)
+    cmd = f"pull --ignore-buildable {service}" if service else "pull --ignore-buildable"
+    raw = len(stack_list) == 1
+    results = run_async(run_on_stacks(cfg, stack_list, cmd, raw=raw))
    report_results(results)


@app.command(rich_help_panel="Lifecycle")
 def restart(
-    services: ServicesArg = None,
-    all_services: AllOption = False,
+    stacks: StacksArg = None,
+    all_stacks: AllOption = False,
+    service: ServiceOption = None,
    config: ConfigOption = None,
 ) -> None:
-    """Restart services (down + up)."""
-    svc_list, cfg = get_services(services or [], all_services, config)
-    raw = len(svc_list) == 1
-    results = run_async(run_sequential_on_services(cfg, svc_list, ["down", "up -d"], raw=raw))
-    maybe_regenerate_traefik(cfg, results)
+    """Restart running containers (docker compose restart)."""
+    stack_list, cfg = get_stacks(stacks or [], all_stacks, config)
+    if service:
+        if len(stack_list) != 1:
+            print_error("--service requires exactly one stack")
+            raise typer.Exit(1)
+        cmd = f"restart {service}"
+    else:
+        cmd = "restart"
+    raw = len(stack_list) == 1
+    results = run_async(run_on_stacks(cfg, stack_list, cmd, raw=raw))
    report_results(results)


@app.command(rich_help_panel="Lifecycle")
 def update(
-    services: ServicesArg = None,
-    all_services: AllOption = False,
+    stacks: StacksArg = None,
+    all_stacks: AllOption = False,
+    service: ServiceOption = None,
    config: ConfigOption = None,
 ) -> None:
-    """Update services (pull + build + down + up)."""
-    svc_list, cfg = get_services(services or [], all_services, config)
-    raw = len(svc_list) == 1
-    results = run_async(
-        run_sequential_on_services(
-            cfg, svc_list, ["pull --ignore-buildable", "build", "down", "up -d"], raw=raw
-        )
-    )
-    maybe_regenerate_traefik(cfg, results)
-    report_results(results)
+    """Update stacks (pull + build + up). Shorthand for 'up --pull --build'."""
+    up(stacks=stacks, all_stacks=all_stacks, service=service, pull=True, build=True, config=config)


-def _format_host(host: str | list[str]) -> str:
-    """Format a host value for display."""
-    if isinstance(host, list):
-        return ", ".join(host)
-    return host
+def _discover_strays(cfg: Config) -> dict[str, list[str]]:
+    """Discover stacks running on unauthorized hosts by scanning all hosts."""
+    _, strays, duplicates = _discover_stacks_full(cfg)

+    # Merge duplicates into strays (for single-host stacks on multiple hosts,
+    # keep correct host and stop others)
+    for stack, running_hosts in duplicates.items():
+        configured = cfg.get_hosts(stack)[0]
+        stray_hosts = [h for h in running_hosts if h != configured]
+        if stray_hosts:
+            strays[stack] = stray_hosts

-def _report_pending_migrations(cfg: Config, migrations: list[str]) -> None:
-    """Report services that need migration."""
-    console.print(f"[cyan]Services to migrate ({len(migrations)}):[/]")
-    for svc in migrations:
-        current = get_service_host(cfg, svc)
-        target = cfg.get_hosts(svc)[0]
-        console.print(f"  [cyan]{svc}[/]: [magenta]{current}[/] → [magenta]{target}[/]")
-
-
-def _report_pending_orphans(orphaned: dict[str, str | list[str]]) -> None:
-    """Report orphaned services that will be stopped."""
-    console.print(f"[yellow]Orphaned services to stop ({len(orphaned)}):[/]")
-    for svc, hosts in orphaned.items():
-        console.print(f"  [cyan]{svc}[/] on [magenta]{_format_host(hosts)}[/]")
-
-
-def _report_pending_starts(cfg: Config, missing: list[str]) -> None:
-    """Report services that will be started."""
-    console.print(f"[green]Services to start ({len(missing)}):[/]")
-    for svc in missing:
-        target = _format_host(cfg.get_hosts(svc))
-        console.print(f"  [cyan]{svc}[/] on [magenta]{target}[/]")
-
-
-def _report_pending_refresh(cfg: Config, to_refresh: list[str]) -> None:
-    """Report services that will be refreshed."""
-    console.print(f"[blue]Services to refresh ({len(to_refresh)}):[/]")
-    for svc in to_refresh:
-        target = _format_host(cfg.get_hosts(svc))
-        console.print(f"  [cyan]{svc}[/] on [magenta]{target}[/]")
+    return strays


@app.command(rich_help_panel="Lifecycle")
-def apply(
+def apply(  # noqa: C901, PLR0912, PLR0915 (multi-phase reconciliation needs these branches)
    dry_run: Annotated[
        bool,
        typer.Option("--dry-run", "-n", help="Show what would change without executing"),
    ] = False,
    no_orphans: Annotated[
        bool,
-        typer.Option("--no-orphans", help="Only migrate, don't stop orphaned services"),
+        typer.Option("--no-orphans", help="Only migrate, don't stop orphaned stacks"),
+    ] = False,
+    no_strays: Annotated[
+        bool,
+        typer.Option("--no-strays", help="Don't stop stray stacks (running on wrong host)"),
    ] = False,
    full: Annotated[
        bool,
-        typer.Option("--full", "-f", help="Also run up on all services to apply config changes"),
+        typer.Option("--full", "-f", help="Also run up on all stacks to apply config changes"),
    ] = False,
    config: ConfigOption = None,
 ) -> None:
-    """Make reality match config (start, migrate, stop as needed).
+    """Make reality match config (start, migrate, stop strays/orphans as needed).

-    This is the "reconcile" command that ensures running services match your
+    This is the "reconcile" command that ensures running stacks match your
    config file. It will:

-    1. Stop orphaned services (in state but removed from config)
-    2. Migrate services on wrong host (host in state ≠ host in config)
-    3. Start missing services (in config but not in state)
+    1. Stop orphaned stacks (in state but removed from config)
+    2. Stop stray stacks (running on unauthorized hosts)
+    3. Migrate stacks on wrong host (host in state ≠ host in config)
+    4. Start missing stacks (in config but not in state)

    Use --dry-run to preview changes before applying.
-    Use --no-orphans to only migrate/start without stopping orphaned services.
-    Use --full to also run 'up' on all services (picks up compose/env changes).
+    Use --no-orphans to skip stopping orphaned stacks.
+    Use --no-strays to skip stopping stray stacks.
+    Use --full to also run 'up' on all stacks (picks up compose/env changes).
    """
    cfg = load_config_or_exit(config)
-    orphaned = get_orphaned_services(cfg)
-    migrations = get_services_needing_migration(cfg)
-    missing = get_services_not_in_state(cfg)
+    orphaned = get_orphaned_stacks(cfg)
+    migrations = get_stacks_needing_migration(cfg)
+    missing = get_stacks_not_in_state(cfg)

-    # For --full: refresh all services not already being started/migrated
+    strays: dict[str, list[str]] = {}
+    if not no_strays:
+        console.print("[dim]Scanning hosts for stray containers...[/]")
+        strays = _discover_strays(cfg)
+
+    # For --full: refresh all stacks not already being started/migrated
    handled = set(migrations) | set(missing)
-    to_refresh = [svc for svc in cfg.services if svc not in handled] if full else []
+    to_refresh = [stack for stack in cfg.stacks if stack not in handled] if full else []

    has_orphans = bool(orphaned) and not no_orphans
+    has_strays = bool(strays)
    has_migrations = bool(migrations)
    has_missing = bool(missing)
    has_refresh = bool(to_refresh)

-    if not has_orphans and not has_migrations and not has_missing and not has_refresh:
-        console.print("[green]✓[/] Nothing to apply - reality matches config")
+    if (
+        not has_orphans
+        and not has_strays
+        and not has_migrations
+        and not has_missing
+        and not has_refresh
+    ):
+        print_success("Nothing to apply - reality matches config")
        return

    # Report what will be done
    if has_orphans:
-        _report_pending_orphans(orphaned)
+        console.print(f"[yellow]Orphaned stacks to stop ({len(orphaned)}):[/]")
+        for svc, hosts in orphaned.items():
+            console.print(f"  [cyan]{svc}[/] on [magenta]{format_host(hosts)}[/]")
+    if has_strays:
+        console.print(f"[red]Stray stacks to stop ({len(strays)}):[/]")
+        for stack, hosts in strays.items():
+            configured = cfg.get_hosts(stack)
+            console.print(
+                f"  [cyan]{stack}[/] on [magenta]{', '.join(hosts)}[/] "
+                f"[dim](should be on {', '.join(configured)})[/]"
+            )
    if has_migrations:
-        _report_pending_migrations(cfg, migrations)
+        console.print(f"[cyan]Stacks to migrate ({len(migrations)}):[/]")
+        for stack in migrations:
+            current = get_stack_host(cfg, stack)
+            target = cfg.get_hosts(stack)[0]
+            console.print(f"  [cyan]{stack}[/]: [magenta]{current}[/] → [magenta]{target}[/]")
    if has_missing:
-        _report_pending_starts(cfg, missing)
+        console.print(f"[green]Stacks to start ({len(missing)}):[/]")
+        for stack in missing:
+            console.print(f"  [cyan]{stack}[/] on [magenta]{format_host(cfg.get_hosts(stack))}[/]")
    if has_refresh:
-        _report_pending_refresh(cfg, to_refresh)
+        console.print(f"[blue]Stacks to refresh ({len(to_refresh)}):[/]")
+        for stack in to_refresh:
+            console.print(f"  [cyan]{stack}[/] on [magenta]{format_host(cfg.get_hosts(stack))}[/]")

    if dry_run:
-        console.print("\n[dim](dry-run: no changes made)[/]")
+        console.print(f"\n{MSG_DRY_RUN}")
        return

    # Execute changes
    console.print()
    all_results = []

-    # 1. Stop orphaned services first
+    # 1. Stop orphaned stacks first
    if has_orphans:
-        console.print("[yellow]Stopping orphaned services...[/]")
-        all_results.extend(run_async(stop_orphaned_services(cfg)))
+        console.print("[yellow]Stopping orphaned stacks...[/]")
+        all_results.extend(run_async(stop_orphaned_stacks(cfg)))

-    # 2. Migrate services on wrong host
+    # 2. Stop stray stacks (running on unauthorized hosts)
+    if has_strays:
+        console.print("[red]Stopping stray stacks...[/]")
+        all_results.extend(run_async(stop_stray_stacks(cfg, strays)))
+
+    # 3. Migrate stacks on wrong host
    if has_migrations:
-        console.print("[cyan]Migrating services...[/]")
-        migrate_results = run_async(up_services(cfg, migrations, raw=True))
+        console.print("[cyan]Migrating stacks...[/]")
+        migrate_results = run_async(up_stacks(cfg, migrations, raw=True))
        all_results.extend(migrate_results)
        maybe_regenerate_traefik(cfg, migrate_results)

-    # 3. Start missing services (reuse up_services which handles state updates)
+    # 4. Start missing stacks (reuse up_stacks which handles state updates)
    if has_missing:
-        console.print("[green]Starting missing services...[/]")
-        start_results = run_async(up_services(cfg, missing, raw=True))
+        console.print("[green]Starting missing stacks...[/]")
+        start_results = run_async(up_stacks(cfg, missing, raw=True))
        all_results.extend(start_results)
        maybe_regenerate_traefik(cfg, start_results)

-    # 4. Refresh remaining services (--full: run up to apply config changes)
+    # 5. Refresh remaining stacks (--full: run up to apply config changes)
    if has_refresh:
-        console.print("[blue]Refreshing services...[/]")
-        refresh_results = run_async(up_services(cfg, to_refresh, raw=True))
+        console.print("[blue]Refreshing stacks...[/]")
+        refresh_results = run_async(up_stacks(cfg, to_refresh, raw=True))
        all_results.extend(refresh_results)
        maybe_regenerate_traefik(cfg, refresh_results)

    report_results(all_results)


-# Alias: cf a = cf apply
-app.command("a", hidden=True)(apply)
+@app.command(
+    rich_help_panel="Lifecycle",
+    context_settings={"allow_interspersed_args": False},
+)
+def compose(
+    stack: Annotated[str, typer.Argument(help="Stack to operate on (use '.' for current dir)")],
+    command: Annotated[str, typer.Argument(help="Docker compose command")],
+    args: Annotated[list[str] | None, typer.Argument(help="Additional arguments")] = None,
+    host: HostOption = None,
+    config: ConfigOption = None,
+) -> None:
+    """Run any docker compose command on a stack.
+
+    Passthrough to docker compose for commands not wrapped by cf.
+    Options after COMMAND are passed to docker compose, not cf.
+
+    Examples:
+      cf compose mystack --help        - show docker compose help
+      cf compose mystack top           - view running processes
+      cf compose mystack images        - list images
+      cf compose mystack exec web bash - interactive shell
+      cf compose mystack config        - view parsed config
+
+    """
+    cfg = load_config_or_exit(config)
+
+    # Resolve "." to current directory name
+    resolved_stack = Path.cwd().name if stack == "." else stack
+    validate_stacks(cfg, [resolved_stack])
+
+    # Handle multi-host stacks
+    hosts = cfg.get_hosts(resolved_stack)
+    if len(hosts) > 1:
+        if host is None:
+            print_error(
+                f"Stack [cyan]{resolved_stack}[/] runs on multiple hosts: {', '.join(hosts)}\n"
+                f"Use [bold]--host[/] to specify which host"
+            )
+            raise typer.Exit(1)
+        validate_host_for_stack(cfg, resolved_stack, host)
+        target_host = host
+    else:
+        target_host = hosts[0]
+
+    # Build the full compose command (quote args to preserve spaces)
+    full_cmd = command
+    if args:
+        full_cmd += " " + " ".join(shlex.quote(arg) for arg in args)
+
+    # Run with raw=True for proper TTY handling (progress bars, interactive)
+    result = run_async(run_compose_on_host(cfg, resolved_stack, target_host, full_cmd, raw=True))
+    print()  # Ensure newline after raw output
+
+    if not result.success:
+        raise typer.Exit(result.exit_code)
+
+
+# Aliases (hidden from help, shown in --help as "Aliases: ...")
+app.command("a", hidden=True)(apply)  # cf a = cf apply
+app.command("r", hidden=True)(restart)  # cf r = cf restart
+app.command("u", hidden=True)(update)  # cf u = cf update
+app.command("p", hidden=True)(pull)  # cf p = cf pull
+app.command("c", hidden=True)(compose)  # cf c = cf compose
--- a/src/compose_farm/cli/management.py
+++ b/src/compose_farm/cli/management.py
@@ -8,7 +8,6 @@ from pathlib import Path  # noqa: TC003
 from typing import TYPE_CHECKING, Annotated

 import typer
-from rich.progress import Progress, TaskID  # noqa: TC002

 from compose_farm.cli.app import app
 from compose_farm.cli.common import (
@@ -16,97 +15,85 @@ from compose_farm.cli.common import (
    AllOption,
    ConfigOption,
    LogPathOption,
-    ServicesArg,
-    get_services,
+    StacksArg,
+    format_host,
+    get_stacks,
    load_config_or_exit,
-    progress_bar,
    run_async,
+    run_parallel_with_progress,
+    validate_hosts,
+    validate_stacks,
 )

 if TYPE_CHECKING:
    from compose_farm.config import Config

-from compose_farm.console import console, err_console
+from compose_farm.console import (
+    MSG_DRY_RUN,
+    console,
+    print_error,
+    print_success,
+    print_warning,
+)
 from compose_farm.executor import (
    CommandResult,
+    get_running_stacks_on_host,
    is_local,
    run_command,
 )
 from compose_farm.logs import (
    DEFAULT_LOG_PATH,
    SnapshotEntry,
-    collect_service_entries,
+    collect_stacks_entries_on_host,
    isoformat,
    load_existing_entries,
    merge_entries,
    write_toml,
 )
 from compose_farm.operations import (
+    build_discovery_results,
    check_host_compatibility,
-    check_service_requirements,
-    discover_service_host,
+    check_stack_requirements,
 )
-from compose_farm.state import get_orphaned_services, load_state, save_state
-from compose_farm.traefik import generate_traefik_config, render_traefik_config
+from compose_farm.state import get_orphaned_stacks, load_state, save_state

 # --- Sync helpers ---


-def _discover_services(cfg: Config) -> dict[str, str | list[str]]:
-    """Discover running services with a progress bar."""
-
-    async def gather_with_progress(
-        progress: Progress, task_id: TaskID
-    ) -> dict[str, str | list[str]]:
-        tasks = [asyncio.create_task(discover_service_host(cfg, s)) for s in cfg.services]
-        discovered: dict[str, str | list[str]] = {}
-        for coro in asyncio.as_completed(tasks):
-            service, host = await coro
-            if host is not None:
-                discovered[service] = host
-            progress.update(task_id, advance=1, description=f"[cyan]{service}[/]")
-        return discovered
-
-    with progress_bar("Discovering", len(cfg.services)) as (progress, task_id):
-        return asyncio.run(gather_with_progress(progress, task_id))
-
-
-def _snapshot_services(
+def _snapshot_stacks(
    cfg: Config,
-    services: list[str],
+    discovered: dict[str, str | list[str]],
    log_path: Path | None,
 ) -> Path:
-    """Capture image digests with a progress bar."""
+    """Capture image digests using batched SSH calls (1 per host).

-    async def collect_service(service: str, now: datetime) -> list[SnapshotEntry]:
-        try:
-            return await collect_service_entries(cfg, service, now=now)
-        except RuntimeError:
-            return []
+    Args:
+        cfg: Configuration
+        discovered: Dict mapping stack -> host(s) where it's running
+        log_path: Optional path to write the log file

-    async def gather_with_progress(
-        progress: Progress, task_id: TaskID, now: datetime, svc_list: list[str]
-    ) -> list[SnapshotEntry]:
-        # Map tasks to service names so we can update description
-        task_to_service = {asyncio.create_task(collect_service(s, now)): s for s in svc_list}
-        all_entries: list[SnapshotEntry] = []
-        for coro in asyncio.as_completed(list(task_to_service.keys())):
-            entries = await coro
-            all_entries.extend(entries)
-            # Find which service just completed (by checking done tasks)
-            for t, svc in task_to_service.items():
-                if t.done() and not hasattr(t, "_reported"):
-                    t._reported = True  # type: ignore[attr-defined]
-                    progress.update(task_id, advance=1, description=f"[cyan]{svc}[/]")
-                    break
-        return all_entries
+    Returns:
+        Path to the written log file.

+    """
    effective_log_path = log_path or DEFAULT_LOG_PATH
    now_dt = datetime.now(UTC)
    now_iso = isoformat(now_dt)

-    with progress_bar("Capturing", len(services)) as (progress, task_id):
-        snapshot_entries = asyncio.run(gather_with_progress(progress, task_id, now_dt, services))
+    # Group stacks by host for batched SSH calls
+    stacks_by_host: dict[str, set[str]] = {}
+    for stack, hosts in discovered.items():
+        # Use first host for multi-host stacks (they use the same images)
+        host = hosts[0] if isinstance(hosts, list) else hosts
+        stacks_by_host.setdefault(host, set()).add(stack)
+
+    # Collect entries with 1 SSH call per host (with progress bar)
+    async def collect_on_host(host: str) -> tuple[str, list[SnapshotEntry]]:
+        entries = await collect_stacks_entries_on_host(cfg, host, stacks_by_host[host], now=now_dt)
+        return host, entries
+
+    results = run_parallel_with_progress("Capturing", list(stacks_by_host.keys()), collect_on_host)
+    snapshot_entries = [entry for _, entries in results for entry in entries]

    if not snapshot_entries:
        msg = "No image digests were captured"
@@ -119,11 +106,16 @@ def _snapshot_services(
    return effective_log_path


-def _format_host(host: str | list[str]) -> str:
-    """Format a host value for display."""
-    if isinstance(host, list):
-        return ", ".join(host)
-    return host
+def _merge_state(
+    current_state: dict[str, str | list[str]],
+    discovered: dict[str, str | list[str]],
+    removed: list[str],
+) -> dict[str, str | list[str]]:
+    """Merge discovered stacks into existing state for partial refresh."""
+    new_state = {**current_state, **discovered}
+    for svc in removed:
+        new_state.pop(svc, None)
+    return new_state


 def _report_sync_changes(
@@ -135,25 +127,80 @@ def _report_sync_changes(
 ) -> None:
    """Report sync changes to the user."""
    if added:
-        console.print(f"\nNew services found ({len(added)}):")
-        for service in sorted(added):
-            host_str = _format_host(discovered[service])
-            console.print(f"  [green]+[/] [cyan]{service}[/] on [magenta]{host_str}[/]")
+        console.print(f"\nNew stacks found ({len(added)}):")
+        for stack in sorted(added):
+            host_str = format_host(discovered[stack])
+            console.print(f"  [green]+[/] [cyan]{stack}[/] on [magenta]{host_str}[/]")

    if changed:
-        console.print(f"\nServices on different hosts ({len(changed)}):")
-        for service, old_host, new_host in sorted(changed):
-            old_str = _format_host(old_host)
-            new_str = _format_host(new_host)
+        console.print(f"\nStacks on different hosts ({len(changed)}):")
+        for stack, old_host, new_host in sorted(changed):
+            old_str = format_host(old_host)
+            new_str = format_host(new_host)
            console.print(
-                f"  [yellow]~[/] [cyan]{service}[/]: [magenta]{old_str}[/] → [magenta]{new_str}[/]"
+                f"  [yellow]~[/] [cyan]{stack}[/]: [magenta]{old_str}[/] → [magenta]{new_str}[/]"
            )

    if removed:
-        console.print(f"\nServices no longer running ({len(removed)}):")
-        for service in sorted(removed):
-            host_str = _format_host(current_state[service])
-            console.print(f"  [red]-[/] [cyan]{service}[/] (was on [magenta]{host_str}[/])")
+        console.print(f"\nStacks no longer running ({len(removed)}):")
+        for stack in sorted(removed):
+            host_str = format_host(current_state[stack])
+            console.print(f"  [red]-[/] [cyan]{stack}[/] (was on [magenta]{host_str}[/])")
+
+
+def _discover_stacks_full(
+    cfg: Config,
+    stacks: list[str] | None = None,
+) -> tuple[dict[str, str | list[str]], dict[str, list[str]], dict[str, list[str]]]:
+    """Discover running stacks with full host scanning for stray detection.
+
+    Queries each host once for all running stacks (with progress bar),
+    then delegates to build_discovery_results for categorization.
+    """
+    all_hosts = list(cfg.hosts.keys())
+
+    # Query each host for running stacks (with progress bar)
+    async def get_stacks_on_host(host: str) -> tuple[str, set[str]]:
+        running = await get_running_stacks_on_host(cfg, host)
+        return host, running
+
+    host_results = run_parallel_with_progress("Discovering", all_hosts, get_stacks_on_host)
+    running_on_host: dict[str, set[str]] = dict(host_results)
+
+    return build_discovery_results(cfg, running_on_host, stacks)
+
+
+def _report_stray_stacks(
+    strays: dict[str, list[str]],
+    cfg: Config,
+) -> None:
+    """Report stacks running on unauthorized hosts."""
+    if strays:
+        console.print(f"\n[red]Stray stacks[/] (running on wrong host, {len(strays)}):")
+        console.print("[dim]Run [bold]cf apply[/bold] to stop them.[/]")
+        for stack in sorted(strays):
+            stray_hosts = strays[stack]
+            configured = cfg.get_hosts(stack)
+            console.print(
+                f"  [red]![/] [cyan]{stack}[/] on [magenta]{', '.join(stray_hosts)}[/] "
+                f"[dim](should be on {', '.join(configured)})[/]"
+            )
+
+
+def _report_duplicate_stacks(duplicates: dict[str, list[str]], cfg: Config) -> None:
+    """Report single-host stacks running on multiple hosts."""
+    if duplicates:
+        console.print(
+            f"\n[yellow]Duplicate stacks[/] (running on multiple hosts, {len(duplicates)}):"
+        )
+        console.print("[dim]Run [bold]cf apply[/bold] to stop extras.[/]")
+        for stack in sorted(duplicates):
+            hosts = duplicates[stack]
+            configured = cfg.get_hosts(stack)[0]
+            console.print(
+                f"  [yellow]![/] [cyan]{stack}[/] on [magenta]{', '.join(hosts)}[/] "
+                f"[dim](should only be on {configured})[/]"
+            )


 # --- Check helpers ---
@@ -171,83 +218,77 @@ def _check_ssh_connectivity(cfg: Config) -> list[str]:

    async def check_host(host_name: str) -> tuple[str, bool]:
        host = cfg.hosts[host_name]
-        result = await run_command(host, "echo ok", host_name, stream=False)
-        return host_name, result.success
+        try:
+            result = await asyncio.wait_for(
+                run_command(host, "echo ok", host_name, stream=False),
+                timeout=5.0,
+            )
+            return host_name, result.success
+        except TimeoutError:
+            return host_name, False

-    async def gather_with_progress(progress: Progress, task_id: TaskID) -> list[str]:
-        tasks = [asyncio.create_task(check_host(h)) for h in remote_hosts]
-        unreachable: list[str] = []
-        for coro in asyncio.as_completed(tasks):
-            host_name, success = await coro
-            if not success:
-                unreachable.append(host_name)
-            progress.update(task_id, advance=1, description=f"[cyan]{host_name}[/]")
-        return unreachable
-
-    with progress_bar("Checking SSH connectivity", len(remote_hosts)) as (progress, task_id):
-        return asyncio.run(gather_with_progress(progress, task_id))
+    results = run_parallel_with_progress(
+        "Checking SSH connectivity",
+        remote_hosts,
+        check_host,
+    )
+    return [host for host, success in results if not success]


-def _check_service_requirements(
+def _check_stack_requirements(
    cfg: Config,
-    services: list[str],
+    stacks: list[str],
 ) -> tuple[list[tuple[str, str, str]], list[tuple[str, str, str]], list[tuple[str, str, str]]]:
-    """Check mounts, networks, and devices for all services with a progress bar.
+    """Check mounts, networks, and devices for all stacks with a progress bar.

    Returns (mount_errors, network_errors, device_errors) where each is a list of
-    (service, host, missing_item) tuples.
+    (stack, host, missing_item) tuples.
    """

-    async def check_service(
-        service: str,
+    async def check_stack(
+        stack: str,
    ) -> tuple[
        str,
        list[tuple[str, str, str]],
        list[tuple[str, str, str]],
        list[tuple[str, str, str]],
    ]:
-        """Check requirements for a single service on all its hosts."""
-        host_names = cfg.get_hosts(service)
+        """Check requirements for a single stack on all its hosts."""
+        host_names = cfg.get_hosts(stack)
        mount_errors: list[tuple[str, str, str]] = []
        network_errors: list[tuple[str, str, str]] = []
        device_errors: list[tuple[str, str, str]] = []

        for host_name in host_names:
-            missing_paths, missing_nets, missing_devs = await check_service_requirements(
-                cfg, service, host_name
+            missing_paths, missing_nets, missing_devs = await check_stack_requirements(
+                cfg, stack, host_name
            )
-            mount_errors.extend((service, host_name, p) for p in missing_paths)
-            network_errors.extend((service, host_name, n) for n in missing_nets)
-            device_errors.extend((service, host_name, d) for d in missing_devs)
+            mount_errors.extend((stack, host_name, p) for p in missing_paths)
+            network_errors.extend((stack, host_name, n) for n in missing_nets)
+            device_errors.extend((stack, host_name, d) for d in missing_devs)

-        return service, mount_errors, network_errors, device_errors
+        return stack, mount_errors, network_errors, device_errors

-    async def gather_with_progress(
-        progress: Progress, task_id: TaskID
-    ) -> tuple[list[tuple[str, str, str]], list[tuple[str, str, str]], list[tuple[str, str, str]]]:
-        tasks = [asyncio.create_task(check_service(s)) for s in services]
-        all_mount_errors: list[tuple[str, str, str]] = []
-        all_network_errors: list[tuple[str, str, str]] = []
-        all_device_errors: list[tuple[str, str, str]] = []
+    results = run_parallel_with_progress(
+        "Checking requirements",
+        stacks,
+        check_stack,
+    )

-        for coro in asyncio.as_completed(tasks):
-            service, mount_errs, net_errs, dev_errs = await coro
-            all_mount_errors.extend(mount_errs)
-            all_network_errors.extend(net_errs)
-            all_device_errors.extend(dev_errs)
-            progress.update(task_id, advance=1, description=f"[cyan]{service}[/]")
+    all_mount_errors: list[tuple[str, str, str]] = []
+    all_network_errors: list[tuple[str, str, str]] = []
+    all_device_errors: list[tuple[str, str, str]] = []
+    for _, mount_errs, net_errs, dev_errs in results:
+        all_mount_errors.extend(mount_errs)
+        all_network_errors.extend(net_errs)
+        all_device_errors.extend(dev_errs)

-        return all_mount_errors, all_network_errors, all_device_errors
-
-    with progress_bar(
-        "Checking requirements", len(services), initial_description="[dim]checking...[/]"
-    ) as (progress, task_id):
-        return asyncio.run(gather_with_progress(progress, task_id))
+    return all_mount_errors, all_network_errors, all_device_errors


 def _report_config_status(cfg: Config) -> bool:
    """Check and report config vs disk status. Returns True if errors found."""
-    configured = set(cfg.services.keys())
+    configured = set(cfg.stacks.keys())
    on_disk = cfg.discover_compose_dirs()
    unmanaged = sorted(on_disk - configured)
    missing_from_disk = sorted(configured - on_disk)
@@ -263,86 +304,57 @@ def _report_config_status(cfg: Config) -> bool:
            console.print(f"  [red]-[/] [cyan]{name}[/]")

    if not unmanaged and not missing_from_disk:
-        console.print("[green]✓[/] Config matches disk")
+        print_success("Config matches disk")

    return bool(missing_from_disk)


-def _report_orphaned_services(cfg: Config) -> bool:
-    """Check for services in state but not in config. Returns True if orphans found."""
-    orphaned = get_orphaned_services(cfg)
+def _report_orphaned_stacks(cfg: Config) -> bool:
+    """Check for stacks in state but not in config. Returns True if orphans found."""
+    orphaned = get_orphaned_stacks(cfg)

    if orphaned:
-        console.print("\n[yellow]Orphaned services[/] (in state but not in config):")
+        console.print("\n[yellow]Orphaned stacks[/] (in state but not in config):")
        console.print(
-            "[dim]Run 'cf apply' to stop them, or 'cf down --orphaned' for just orphans.[/]"
+            "[dim]Run [bold]cf apply[/bold] to stop them, or [bold]cf down --orphaned[/bold] for just orphans.[/]"
        )
        for name, hosts in sorted(orphaned.items()):
-            host_str = ", ".join(hosts) if isinstance(hosts, list) else hosts
-            console.print(f"  [yellow]![/] [cyan]{name}[/] on [magenta]{host_str}[/]")
+            console.print(f"  [yellow]![/] [cyan]{name}[/] on [magenta]{format_host(hosts)}[/]")
        return True

    return False


-def _report_traefik_status(cfg: Config, services: list[str]) -> None:
+def _report_traefik_status(cfg: Config, stacks: list[str]) -> None:
    """Check and report traefik label status."""
+    from compose_farm.traefik import generate_traefik_config  # noqa: PLC0415
+
    try:
-        _, warnings = generate_traefik_config(cfg, services, check_all=True)
+        _, warnings = generate_traefik_config(cfg, stacks, check_all=True)
    except (FileNotFoundError, ValueError):
        return

    if warnings:
        console.print(f"\n[yellow]Traefik issues[/] ({len(warnings)}):")
        for warning in warnings:
-            console.print(f"  [yellow]![/] {warning}")
+            print_warning(warning)
    else:
-        console.print("[green]✓[/] Traefik labels valid")
+        print_success("Traefik labels valid")


-def _report_mount_errors(mount_errors: list[tuple[str, str, str]]) -> None:
-    """Report mount errors grouped by service."""
-    by_service: dict[str, list[tuple[str, str]]] = {}
-    for svc, host, path in mount_errors:
-        by_service.setdefault(svc, []).append((host, path))
+def _report_requirement_errors(errors: list[tuple[str, str, str]], category: str) -> None:
+    """Report requirement errors (mounts, networks, devices) grouped by stack."""
+    by_stack: dict[str, list[tuple[str, str]]] = {}
+    for stack, host, item in errors:
+        by_stack.setdefault(stack, []).append((host, item))

-    console.print(f"[red]Missing mounts[/] ({len(mount_errors)}):")
-    for svc, items in sorted(by_service.items()):
+    console.print(f"[red]Missing {category}[/] ({len(errors)}):")
+    for stack, items in sorted(by_stack.items()):
        host = items[0][0]
-        paths = [p for _, p in items]
-        console.print(f"  [cyan]{svc}[/] on [magenta]{host}[/]:")
-        for path in paths:
-            console.print(f"    [red]✗[/] {path}")
-
-
-def _report_network_errors(network_errors: list[tuple[str, str, str]]) -> None:
-    """Report network errors grouped by service."""
-    by_service: dict[str, list[tuple[str, str]]] = {}
-    for svc, host, net in network_errors:
-        by_service.setdefault(svc, []).append((host, net))
-
-    console.print(f"[red]Missing networks[/] ({len(network_errors)}):")
-    for svc, items in sorted(by_service.items()):
-        host = items[0][0]
-        networks = [n for _, n in items]
-        console.print(f"  [cyan]{svc}[/] on [magenta]{host}[/]:")
-        for net in networks:
-            console.print(f"    [red]✗[/] {net}")
-
-
-def _report_device_errors(device_errors: list[tuple[str, str, str]]) -> None:
-    """Report device errors grouped by service."""
-    by_service: dict[str, list[tuple[str, str]]] = {}
-    for svc, host, dev in device_errors:
-        by_service.setdefault(svc, []).append((host, dev))
-
-    console.print(f"[red]Missing devices[/] ({len(device_errors)}):")
-    for svc, items in sorted(by_service.items()):
-        host = items[0][0]
-        devices = [d for _, d in items]
-        console.print(f"  [cyan]{svc}[/] on [magenta]{host}[/]:")
-        for dev in devices:
-            console.print(f"    [red]✗[/] {dev}")
+        missing = [i for _, i in items]
+        console.print(f"  [cyan]{stack}[/] on [magenta]{host}[/]:")
+        for item in missing:
+            console.print(f"    [red]✗[/] {item}")


 def _report_ssh_status(unreachable_hosts: list[str]) -> bool:
@@ -350,9 +362,9 @@ def _report_ssh_status(unreachable_hosts: list[str]) -> bool:
    if unreachable_hosts:
        console.print(f"[red]Unreachable hosts[/] ({len(unreachable_hosts)}):")
        for host in sorted(unreachable_hosts):
-            console.print(f"  [red]✗[/] [magenta]{host}[/]")
+            print_error(f"[magenta]{host}[/]")
        return True
-    console.print("[green]✓[/] All hosts reachable")
+    print_success("All hosts reachable")
    return False


@@ -360,7 +372,7 @@ def _report_host_compatibility(
    compat: dict[str, tuple[int, int, list[str]]],
    assigned_hosts: list[str],
 ) -> None:
-    """Report host compatibility for a service."""
+    """Report host compatibility for a stack."""
    for host_name, (found, total, missing) in sorted(compat.items()):
        is_assigned = host_name in assigned_hosts
        marker = " [dim](assigned)[/]" if is_assigned else ""
@@ -391,25 +403,25 @@ def _run_remote_checks(cfg: Config, svc_list: list[str], *, show_host_compat: bo
    console.print()  # Spacing before mounts/networks check

    # Check mounts, networks, and devices
-    mount_errors, network_errors, device_errors = _check_service_requirements(cfg, svc_list)
+    mount_errors, network_errors, device_errors = _check_stack_requirements(cfg, svc_list)

    if mount_errors:
-        _report_mount_errors(mount_errors)
+        _report_requirement_errors(mount_errors, "mounts")
        has_errors = True
    if network_errors:
-        _report_network_errors(network_errors)
+        _report_requirement_errors(network_errors, "networks")
        has_errors = True
    if device_errors:
-        _report_device_errors(device_errors)
+        _report_requirement_errors(device_errors, "devices")
        has_errors = True
    if not mount_errors and not network_errors and not device_errors:
-        console.print("[green]✓[/] All mounts, networks, and devices exist")
+        print_success("All mounts, networks, and devices exist")

    if show_host_compat:
-        for service in svc_list:
-            console.print(f"\n[bold]Host compatibility for[/] [cyan]{service}[/]:")
-            compat = run_async(check_host_compatibility(cfg, service))
-            assigned_hosts = cfg.get_hosts(service)
+        for stack in svc_list:
+            console.print(f"\n[bold]Host compatibility for[/] [cyan]{stack}[/]:")
+            compat = run_async(check_host_compatibility(cfg, stack))
+            assigned_hosts = cfg.get_hosts(stack)
            _report_host_compatibility(compat, assigned_hosts)

    return has_errors
@@ -423,8 +435,8 @@ _DEFAULT_NETWORK_GATEWAY = "172.20.0.1"

@app.command("traefik-file", rich_help_panel="Configuration")
 def traefik_file(
-    services: ServicesArg = None,
-    all_services: AllOption = False,
+    stacks: StacksArg = None,
+    all_stacks: AllOption = False,
    output: Annotated[
        Path | None,
        typer.Option(
@@ -436,11 +448,16 @@ def traefik_file(
    config: ConfigOption = None,
 ) -> None:
    """Generate a Traefik file-provider fragment from compose Traefik labels."""
-    svc_list, cfg = get_services(services or [], all_services, config)
+    from compose_farm.traefik import (  # noqa: PLC0415
+        generate_traefik_config,
+        render_traefik_config,
+    )
+
+    stack_list, cfg = get_stacks(stacks or [], all_stacks, config)
    try:
-        dynamic, warnings = generate_traefik_config(cfg, svc_list)
+        dynamic, warnings = generate_traefik_config(cfg, stack_list)
    except (FileNotFoundError, ValueError) as exc:
-        err_console.print(f"[red]✗[/] {exc}")
+        print_error(str(exc))
        raise typer.Exit(1) from exc

    rendered = render_traefik_config(dynamic)
@@ -448,16 +465,18 @@ def traefik_file(
    if output:
        output.parent.mkdir(parents=True, exist_ok=True)
        output.write_text(rendered)
-        console.print(f"[green]✓[/] Traefik config written to {output}")
+        print_success(f"Traefik config written to {output}")
    else:
        console.print(rendered)

    for warning in warnings:
-        err_console.print(f"[yellow]![/] {warning}")
+        print_warning(warning)


@app.command(rich_help_panel="Configuration")
 def refresh(
+    stacks: StacksArg = None,
+    all_stacks: AllOption = False,
    config: ConfigOption = None,
    log_path: LogPathOption = None,
    dry_run: Annotated[
@@ -465,22 +484,35 @@ def refresh(
        typer.Option("--dry-run", "-n", help="Show what would change without writing"),
    ] = False,
 ) -> None:
-    """Update local state from running services.
+    """Update local state from running stacks.

-    Discovers which services are running on which hosts, updates the state
+    Discovers which stacks are running on which hosts, updates the state
    file, and captures image digests. This is a read operation - it updates
    your local state to match reality, not the other way around.

+    Without arguments: refreshes all stacks (same as --all).
+    With stack names: refreshes only those stacks.
+
    Use 'cf apply' to make reality match your config (stop orphans, migrate).
    """
-    cfg = load_config_or_exit(config)
+    stack_list, cfg = get_stacks(stacks or [], all_stacks, config, default_all=True)
+
+    # Partial refresh merges with existing state; full refresh replaces it
+    # Partial = specific stacks provided (not --all, not default)
+    partial_refresh = bool(stacks) and not all_stacks
+
    current_state = load_state(cfg)

-    discovered = _discover_services(cfg)
+    discovered, strays, duplicates = _discover_stacks_full(cfg, stack_list)

-    # Calculate changes
+    # Calculate changes (only for the stacks we're refreshing)
    added = [s for s in discovered if s not in current_state]
-    removed = [s for s in current_state if s not in discovered]
+    # Only mark as "removed" if we're doing a full refresh
+    if partial_refresh:
+        # In partial refresh, a stack not running is just "not found"
+        removed = [s for s in stack_list if s in current_state and s not in discovered]
+    else:
+        removed = [s for s in current_state if s not in discovered]
    changed = [
        (s, current_state[s], discovered[s])
        for s in discovered
@@ -492,29 +524,35 @@ def refresh(
    if state_changed:
        _report_sync_changes(added, removed, changed, discovered, current_state)
    else:
-        console.print("[green]✓[/] State is already in sync.")
+        print_success("State is already in sync.")
+
+    _report_stray_stacks(strays, cfg)
+    _report_duplicate_stacks(duplicates, cfg)

    if dry_run:
-        console.print("\n[dim](dry-run: no changes made)[/]")
+        console.print(f"\n{MSG_DRY_RUN}")
        return

    # Update state file
    if state_changed:
-        save_state(cfg, discovered)
-        console.print(f"\n[green]✓[/] State updated: {len(discovered)} services tracked.")
+        new_state = (
+            _merge_state(current_state, discovered, removed) if partial_refresh else discovered
+        )
+        save_state(cfg, new_state)
+        print_success(f"State updated: {len(new_state)} stacks tracked.")

-    # Capture image digests for running services
+    # Capture image digests for running stacks (1 SSH call per host)
    if discovered:
        try:
-            path = _snapshot_services(cfg, list(discovered.keys()), log_path)
-            console.print(f"[green]✓[/] Digests written to {path}")
+            path = _snapshot_stacks(cfg, discovered, log_path)
+            print_success(f"Digests written to {path}")
        except RuntimeError as exc:
-            err_console.print(f"[yellow]![/] {exc}")
+            print_warning(str(exc))


@app.command(rich_help_panel="Configuration")
 def check(
-    services: ServicesArg = None,
+    stacks: StacksArg = None,
    local: Annotated[
        bool,
        typer.Option("--local", help="Skip SSH-based checks (faster)"),
@@ -523,35 +561,31 @@ def check(
 ) -> None:
    """Validate configuration, traefik labels, mounts, and networks.

-    Without arguments: validates all services against configured hosts.
-    With service arguments: validates specific services and shows host compatibility.
+    Without arguments: validates all stacks against configured hosts.
+    With stack arguments: validates specific stacks and shows host compatibility.

    Use --local to skip SSH-based checks for faster validation.
    """
    cfg = load_config_or_exit(config)

-    # Determine which services to check and whether to show host compatibility
-    if services:
-        svc_list = list(services)
-        invalid = [s for s in svc_list if s not in cfg.services]
-        if invalid:
-            for svc in invalid:
-                err_console.print(f"[red]✗[/] Service '{svc}' not found in config")
-            raise typer.Exit(1)
+    # Determine which stacks to check and whether to show host compatibility
+    if stacks:
+        stack_list = list(stacks)
+        validate_stacks(cfg, stack_list)
        show_host_compat = True
    else:
-        svc_list = list(cfg.services.keys())
+        stack_list = list(cfg.stacks.keys())
        show_host_compat = False

    # Run checks
    has_errors = _report_config_status(cfg)
-    _report_traefik_status(cfg, svc_list)
+    _report_traefik_status(cfg, stack_list)

-    if not local and _run_remote_checks(cfg, svc_list, show_host_compat=show_host_compat):
+    if not local and _run_remote_checks(cfg, stack_list, show_host_compat=show_host_compat):
        has_errors = True

-    # Check for orphaned services (in state but removed from config)
-    if _report_orphaned_services(cfg):
+    # Check for orphaned stacks (in state but removed from config)
+    if _report_orphaned_stacks(cfg):
        has_errors = True

    if has_errors:
@@ -580,18 +614,14 @@ def init_network(
 ) -> None:
    """Create Docker network on hosts with consistent settings.

-    Creates an external Docker network that services can use for cross-host
+    Creates an external Docker network that stacks can use for cross-host
    communication. Uses the same subnet/gateway on all hosts to ensure
    consistent networking.
    """
    cfg = load_config_or_exit(config)

    target_hosts = list(hosts) if hosts else list(cfg.hosts.keys())
-    invalid = [h for h in target_hosts if h not in cfg.hosts]
-    if invalid:
-        for h in invalid:
-            err_console.print(f"[red]✗[/] Host '{h}' not found in config")
-        raise typer.Exit(1)
+    validate_hosts(cfg, target_hosts)

    async def create_network_on_host(host_name: str) -> CommandResult:
        host = cfg.hosts[host_name]
@@ -601,7 +631,7 @@ def init_network(

        if check_result.success:
            console.print(f"[cyan]\\[{host_name}][/] Network '{network}' already exists")
-            return CommandResult(service=host_name, exit_code=0, success=True)
+            return CommandResult(stack=host_name, exit_code=0, success=True)

        # Create the network
        create_cmd = (
@@ -616,9 +646,8 @@ def init_network(
        if result.success:
            console.print(f"[cyan]\\[{host_name}][/] [green]✓[/] Created network '{network}'")
        else:
-            err_console.print(
-                f"[cyan]\\[{host_name}][/] [red]✗[/] Failed to create network: "
-                f"{result.stderr.strip()}"
+            print_error(
+                f"[cyan]\\[{host_name}][/] Failed to create network: {result.stderr.strip()}"
            )

        return result
@@ -630,3 +659,9 @@ def init_network(
    failed = [r for r in results if not r.success]
    if failed:
        raise typer.Exit(1)
+
+
+# Aliases (hidden from help)
+app.command("rf", hidden=True)(refresh)  # cf rf = cf refresh
+app.command("ck", hidden=True)(check)  # cf ck = cf check
+app.command("tf", hidden=True)(traefik_file)  # cf tf = cf traefik-file
--- a/Show More
+++ b/Show More