fix(web): Make rainbow glow animation more visible on mobile (#46 )

The 900% background-size meant only ~11% of the gradient was visible at any time. On smaller screens, the rainbow colors would flash by too quickly during the intro animation, appearing mostly white. Use a CSS variable for background-size and reduce it to 500% on mobile (<768px), showing ~20% of the gradient for a more visible rainbow effect.
feat(web): Add collapsible blocks to console terminal and editor (#44 )
2026-02-03 14:13:26 +00:00 · 2025-12-18 15:53:03 -08:00 · 2025-12-18 15:52:36 -08:00 · 2025-12-18 15:37:08 -08:00 · 2025-12-18 15:34:07 -08:00 · 2025-12-18 15:29:37 -08:00
103 changed files with 12493 additions and 1471 deletions
--- a/.github/check_readme_commands.py
+++ b/.github/check_readme_commands.py
@@ -0,0 +1,88 @@
+#!/usr/bin/env python3
+"""Check that all CLI commands are documented in the README."""
+
+from __future__ import annotations
+
+import re
+import sys
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    import typer
+
+from compose_farm.cli import app
+
+
+def get_all_commands(typer_app: typer.Typer, prefix: str = "cf") -> set[str]:
+    """Extract all command names from a Typer app, including nested subcommands."""
+    commands = set()
+
+    # Get registered commands (skip hidden ones like aliases)
+    for command in typer_app.registered_commands:
+        if command.hidden:
+            continue
+        name = command.name
+        if not name and command.callback:
+            name = command.callback.__name__
+        if name:
+            commands.add(f"{prefix} {name}")
+
+    # Get registered sub-apps (like 'config')
+    for group in typer_app.registered_groups:
+        sub_app = group.typer_instance
+        sub_name = group.name
+        if sub_app and sub_name:
+            commands.add(f"{prefix} {sub_name}")
+            # Don't recurse into subcommands - we only document the top-level subcommand
+
+    return commands
+
+
+def get_documented_commands(readme_path: Path) -> set[str]:
+    """Extract commands documented in README from help output sections."""
+    content = readme_path.read_text()
+
+    # Match patterns like: <code>cf command --help</code>
+    pattern = r"<code>(cf\s+[\w-]+)\s+--help</code>"
+    matches = re.findall(pattern, content)
+
+    return set(matches)
+
+
+def main() -> int:
+    """Check that all CLI commands are documented in the README."""
+    readme_path = Path(__file__).parent.parent / "README.md"
+
+    if not readme_path.exists():
+        print(f"ERROR: README.md not found at {readme_path}")
+        return 1
+
+    cli_commands = get_all_commands(app)
+    documented_commands = get_documented_commands(readme_path)
+
+    # Also check for the main 'cf' help
+    if "<code>cf --help</code>" in readme_path.read_text():
+        documented_commands.add("cf")
+    cli_commands.add("cf")
+
+    missing = cli_commands - documented_commands
+    extra = documented_commands - cli_commands
+
+    if missing or extra:
+        if missing:
+            print("ERROR: Commands missing from README --help documentation:")
+            for cmd in sorted(missing):
+                print(f"  - {cmd}")
+        if extra:
+            print("WARNING: Commands documented but not in CLI:")
+            for cmd in sorted(extra):
+                print(f"  - {cmd}")
+        return 1
+
+    print(f"✓ All {len(cli_commands)} commands documented in README")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -16,10 +16,10 @@ jobs:
        python-version: ["3.11", "3.12", "3.13"]

    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6

      - name: Install uv
-        uses: astral-sh/setup-uv@v6
+        uses: astral-sh/setup-uv@v7

      - name: Set up Python ${{ matrix.python-version }}
        run: uv python install ${{ matrix.python-version }}
@@ -39,10 +39,10 @@ jobs:
  lint:
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6

      - name: Install uv
-        uses: astral-sh/setup-uv@v6
+        uses: astral-sh/setup-uv@v7

      - name: Set up Python
        run: uv python install 3.12
@@ -50,11 +50,5 @@ jobs:
      - name: Install dependencies
        run: uv sync --all-extras --dev

-      - name: Run ruff check
-        run: uv run ruff check .
-
-      - name: Run ruff format check
-        run: uv run ruff format --check .
-
-      - name: Run mypy
-        run: uv run mypy src/compose_farm
+      - name: Run pre-commit (via prek)
+        uses: j178/prek-action@v1
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@@ -0,0 +1,92 @@
+name: Build and Push Docker Image
+
+on:
+  workflow_run:
+    workflows: ["Upload Python Package"]
+    types: [completed]
+  workflow_dispatch:
+    inputs:
+      version:
+        description: 'Version to build (leave empty for latest)'
+        required: false
+
+env:
+  REGISTRY: ghcr.io
+  IMAGE_NAME: ${{ github.repository }}
+
+jobs:
+  build-and-push:
+    runs-on: ubuntu-latest
+    # Only run if PyPI upload succeeded (or manual dispatch)
+    if: ${{ github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success' }}
+    permissions:
+      contents: read
+      packages: write
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v6
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Log in to Container Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Extract version
+        id: version
+        run: |
+          if [ "${{ github.event_name }}" = "workflow_run" ]; then
+            # Get version from the tag that triggered the release
+            VERSION="${{ github.event.workflow_run.head_branch }}"
+            # Strip 'v' prefix if present
+            VERSION="${VERSION#v}"
+          elif [ -n "${{ github.event.inputs.version }}" ]; then
+            VERSION="${{ github.event.inputs.version }}"
+          else
+            VERSION=""
+          fi
+          echo "version=$VERSION" >> $GITHUB_OUTPUT
+
+      - name: Wait for PyPI
+        if: steps.version.outputs.version != ''
+        run: |
+          VERSION="${{ steps.version.outputs.version }}"
+          echo "Waiting for compose-farm==$VERSION on PyPI..."
+          for i in {1..30}; do
+            if curl -sf "https://pypi.org/pypi/compose-farm/$VERSION/json" > /dev/null; then
+              echo "✓ Version $VERSION available on PyPI"
+              exit 0
+            fi
+            echo "Attempt $i: not yet available, waiting 10s..."
+            sleep 10
+          done
+          echo "✗ Timeout waiting for PyPI"
+          exit 1
+
+      - name: Extract metadata
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
+          tags: |
+            type=semver,pattern={{version}},value=v${{ steps.version.outputs.version }}
+            type=semver,pattern={{major}}.{{minor}},value=v${{ steps.version.outputs.version }}
+            type=semver,pattern={{major}},value=v${{ steps.version.outputs.version }}
+            type=raw,value=latest
+
+      - name: Build and push
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          build-args: |
+            VERSION=${{ steps.version.outputs.version }}
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -13,9 +13,9 @@ jobs:
    permissions:
      id-token: write
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
      - name: Install uv
-        uses: astral-sh/setup-uv@v6
+        uses: astral-sh/setup-uv@v7
      - name: Build
        run: uv build
      - name: Publish package distributions to PyPI
--- a/.github/workflows/update-readme.yml
+++ b/.github/workflows/update-readme.yml
@@ -11,16 +11,16 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Check out repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
        with:
          persist-credentials: false
          fetch-depth: 0

      - name: Set up Python
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@v6

      - name: Install uv
-        uses: astral-sh/setup-uv@v6
+        uses: astral-sh/setup-uv@v7

      - name: Run markdown-code-runner
        env:
--- a/.gitignore
+++ b/.gitignore
@@ -42,3 +42,5 @@ htmlcov/
 compose-farm.yaml
 !examples/compose-farm.yaml
 coverage.xml
+.env
+homepage/
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,4 +1,13 @@
 repos:
+  - repo: local
+    hooks:
+      - id: check-readme-commands
+        name: Check README documents all CLI commands
+        entry: uv run python .github/check_readme_commands.py
+        language: system
+        files: ^(README\.md|src/compose_farm/cli/.*)$
+        pass_filenames: false
+
  - repo: https://github.com/pre-commit/pre-commit-hooks
    rev: v5.0.0
    hooks:
@@ -10,7 +19,7 @@ repos:
      - id: debug-statements

  - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.8.4
+    rev: v0.14.9
    hooks:
      - id: ruff
        args: [--fix]
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -10,19 +10,42 @@

 ```
 compose_farm/
-├── config.py  # Pydantic models, YAML loading
-├── ssh.py     # asyncssh execution, streaming
-└── cli.py     # Typer commands
+├── cli/               # CLI subpackage
+│   ├── __init__.py    # Imports modules to trigger command registration
+│   ├── app.py         # Shared Typer app instance, version callback
+│   ├── common.py      # Shared helpers, options, progress bar utilities
+│   ├── config.py      # Config subcommand (init, show, path, validate, edit)
+│   ├── lifecycle.py   # up, down, pull, restart, update, apply commands
+│   ├── management.py  # refresh, check, init-network, traefik-file commands
+│   └── monitoring.py  # logs, ps, stats commands
+├── config.py          # Pydantic models, YAML loading
+├── compose.py         # Compose file parsing (.env, ports, volumes, networks)
+├── console.py         # Shared Rich console instances
+├── executor.py        # SSH/local command execution, streaming output
+├── operations.py      # Business logic (up, migrate, discover, preflight checks)
+├── state.py           # Deployment state tracking (which service on which host)
+├── logs.py            # Image digest snapshots (dockerfarm-log.toml)
+└── traefik.py         # Traefik file-provider config generation from labels
 ```

+## Web UI Icons
+
+Icons use [Lucide](https://lucide.dev/). Add new icons as macros in `web/templates/partials/icons.html` by copying SVG paths from their site. The `action_btn`, `stat_card`, and `collapse` macros in `components.html` accept an optional `icon` parameter.
+
 ## Key Design Decisions

-1. **asyncssh over Paramiko/Fabric**: Native async support, built-in streaming
+1. **Hybrid SSH approach**: asyncssh for parallel streaming with prefixes; native `ssh -t` for raw mode (progress bars)
 2. **Parallel by default**: Multiple services run concurrently via `asyncio.gather`
-3. **Streaming output**: Real-time stdout/stderr with `[service]` prefix
+3. **Streaming output**: Real-time stdout/stderr with `[service]` prefix using Rich
 4. **SSH key auth only**: Uses ssh-agent, no password handling (YAGNI)
 5. **NFS assumption**: Compose files at same path on all hosts
-6. **Local execution**: When host is `localhost`/`local`, skip SSH and run locally
+6. **Local IP auto-detection**: Skips SSH when target host matches local machine's IP
+7. **State tracking**: Tracks where services are deployed for auto-migration
+8. **Pre-flight checks**: Verifies NFS mounts and Docker networks exist before starting/migrating
+
+## Code Style
+
+- **Imports at top level**: Never add imports inside functions unless they are explicitly marked with `# noqa: PLC0415` and a comment explaining it speeds up CLI startup. Heavy modules like `pydantic`, `yaml`, and `rich.table` are lazily imported to keep `cf --help` fast.

 ## Communication Notes

@@ -34,14 +57,46 @@ compose_farm/
 - **NEVER merge anything into main.** Always commit directly or use fast-forward/rebase.
 - Never force push.

+## Pull Requests
+
+- Never include unchecked checklists (e.g., `- [ ] ...`) in PR descriptions. Either omit the checklist or use checked items.
+- **NEVER run `gh pr merge`**. PRs are merged via the GitHub UI, not the CLI.
+
+## Releases
+
+Use `gh release create` to create releases. The tag is created automatically.
+
+```bash
+# Check current version
+git tag --sort=-v:refname | head -1
+
+# Create release (minor version bump: v0.21.1 -> v0.22.0)
+gh release create v0.22.0 --title "v0.22.0" --notes "release notes here"
+```
+
+Versioning:
+- **Patch** (v0.21.0 → v0.21.1): Bug fixes
+- **Minor** (v0.21.1 → v0.22.0): New features, non-breaking changes
+
+Write release notes manually describing what changed. Group by features and bug fixes.
+
 ## Commands Quick Reference

-| Command | Docker Compose Equivalent |
-|---------|--------------------------|
-| `up`    | `docker compose up -d`   |
-| `down`  | `docker compose down`    |
-| `pull`  | `docker compose pull`    |
-| `restart` | `down` + `up -d`       |
+CLI available as `cf` or `compose-farm`.
+
+| Command | Description |
+|---------|-------------|
+| `up`    | Start services (`docker compose up -d`), auto-migrates if host changed |
+| `down`  | Stop services (`docker compose down`). Use `--orphaned` to stop services removed from config |
+| `pull`  | Pull latest images |
+| `restart` | `down` + `up -d` |
 | `update` | `pull` + `down` + `up -d` |
-| `logs`  | `docker compose logs`    |
-| `ps`    | `docker compose ps`      |
+| `apply` | Make reality match config: migrate services + stop orphans. Use `--dry-run` to preview |
+| `logs`  | Show service logs |
+| `ps`    | Show status of all services |
+| `stats` | Show overview (hosts, services, pending migrations; `--live` for container counts) |
+| `refresh` | Update state from reality: discover running services, capture image digests |
+| `check` | Validate config, traefik labels, mounts, networks; show host compatibility |
+| `init-network` | Create Docker network on hosts with consistent subnet/gateway |
+| `traefik-file` | Generate Traefik file-provider config from compose labels |
+| `config` | Manage config files (init, show, path, validate, edit) |
--- a/20
+++ b/20
@@ -0,0 +1,20 @@
+# syntax=docker/dockerfile:1
+
+# Build stage - install with uv
+FROM ghcr.io/astral-sh/uv:python3.14-alpine AS builder
+
+ARG VERSION
+RUN uv tool install --compile-bytecode "compose-farm[web]${VERSION:+==$VERSION}"
+
+# Runtime stage - minimal image without uv
+FROM python:3.14-alpine
+
+# Install only runtime requirements
+RUN apk add --no-cache openssh-client
+
+# Copy installed tool virtualenv and bin symlinks from builder
+COPY --from=builder /root/.local/share/uv/tools/compose-farm /root/.local/share/uv/tools/compose-farm
+COPY --from=builder /usr/local/bin/cf /usr/local/bin/compose-farm /usr/local/bin/
+
+ENTRYPOINT ["cf"]
+CMD ["--help"]
--- a/21
+++ b/21
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2025 Bas Nijholt
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/PLAN.md
+++ b/PLAN.md
@@ -1,35 +0,0 @@
-# Compose Farm – Traefik Multihost Ingress Plan
-
-## Goal
-Generate a Traefik file-provider fragment from existing docker-compose Traefik labels (no config duplication) so a single front-door Traefik on 192.168.1.66 with wildcard `*.lab.mydomain.org` can route to services running on other hosts. Keep the current simplicity (SSH + docker compose); no Swarm/K8s.
-
-## Requirements
- Traefik stays on main host; keep current `dynamic.yml` and Docker provider for local containers.
- Add a watched directory provider (any path works) and load a generated fragment (e.g., `compose-farm.generated.yml`).
- No edits to compose files: reuse existing `traefik.*` labels as the single source of truth; Compose Farm only reads them.
- Generator infers routing from labels and reachability from `ports:` mappings; prefer host-published ports so Traefik can reach services across hosts. Upstreams point to `<host address>:<published host port>`; warn if no published port is found.
- Only minimal data in `compose-farm.yaml`: hosts map and service→host mapping (already present).
- No new orchestration/discovery layers; respect KISS/YAGNI/DRY.
-
-## Non-Goals
- No Swarm/Kubernetes adoption.
- No global Docker provider across hosts.
- No health checks/service discovery layer.
-
-## Current State (Dec 2025)
- Compose Farm: Typer CLI wrapping `docker compose` over SSH; config in `compose-farm.yaml`; parallel by default; snapshot/log tooling present.
- Traefik: single instance on 192.168.1.66, wildcard `*.lab.mydomain.org`, Docker provider for local services, file provider via `dynamic.yml` already in use.
-
-## Proposed Implementation Steps
-1) Add generator command: `compose-farm traefik-file --output <path>`.
-2) Resolve per-service host from `compose-farm.yaml`; read compose file at `{compose_dir}/{service}/docker-compose.yml`.
-3) Parse `traefik.*` labels to build routers/services/middlewares as in compose; map container port to published host port (from `ports:`) to form upstream URLs with host address.
-4) Emit file-provider YAML to the watched directory (recommended default: `/mnt/data/traefik/dynamic.d/compose-farm.generated.yml`, but user chooses via `--output`).
-5) Warnings: if no published port is found, warn that cross-host reachability requires L3 reachability to container IPs.
-6) Tests: label parsing, port mapping, YAML render; scenario with published port; scenario without published port.
-7) Docs: update README/CLAUDE to describe directory provider flags and the generator workflow; note that compose files remain unchanged.
-
-## Open Questions
- How to derive target host address: use `hosts.<name>.address` verbatim, or allow override per service? (Default: use host address.)
- Should we support multiple hosts/backends per service for LB/HA? (Start with single server.)
- Where to store generated file by default? (Default to user-specified `--output`; maybe fallback to `./compose-farm-traefik.yml`.)
--- a/README.md
+++ b/README.md
--- a/compose-farm.example.yaml
+++ b/compose-farm.example.yaml
@@ -3,23 +3,28 @@

 compose_dir: /opt/compose

+# Optional: Auto-regenerate Traefik file-provider config after up/down/restart/update
+traefik_file: /opt/traefik/dynamic.d/compose-farm.yml
+traefik_service: traefik  # Skip services on same host (docker provider handles them)
+
 hosts:
  # Full form with all options
-  nas01:
+  server-1:
    address: 192.168.1.10
    user: docker
    port: 22

  # Short form (just address, user defaults to current user)
-  nas02: 192.168.1.11
+  server-2: 192.168.1.11

  # Local execution (no SSH)
  local: localhost

 services:
  # Map service names to hosts
-  # Compose file expected at: {compose_dir}/{service}/docker-compose.yml
-  plex: nas01
-  jellyfin: nas02
-  sonarr: nas01
-  radarr: nas02
+  # Compose file expected at: {compose_dir}/{service}/compose.yaml
+  traefik: server-1    # Traefik runs here
+  plex: server-2       # Services on other hosts get file-provider entries
+  jellyfin: server-2
+  sonarr: server-1
+  radarr: local
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -0,0 +1,52 @@
+services:
+  cf:
+    image: ghcr.io/basnijholt/compose-farm:latest
+    volumes:
+      - ${SSH_AUTH_SOCK}:/ssh-agent:ro
+      # Compose directory (contains compose files AND compose-farm.yaml config)
+      - ${CF_COMPOSE_DIR:-/opt/stacks}:${CF_COMPOSE_DIR:-/opt/stacks}
+      # SSH keys for passwordless auth (generated by `cf ssh setup`)
+      # Choose ONE option below (use the same option for both cf and web services):
+      # Option 1: Host path (default) - keys at ~/.ssh/compose-farm/id_ed25519
+      - ${CF_SSH_DIR:-~/.ssh/compose-farm}:/root/.ssh
+      # Option 2: Named volume - managed by Docker, shared between services
+      # - cf-ssh:/root/.ssh
+    environment:
+      - SSH_AUTH_SOCK=/ssh-agent
+      # Config file path (state stored alongside it)
+      - CF_CONFIG=${CF_COMPOSE_DIR:-/opt/stacks}/compose-farm.yaml
+
+  web:
+    image: ghcr.io/basnijholt/compose-farm:latest
+    restart: unless-stopped
+    command: web --host 0.0.0.0 --port 9000
+    volumes:
+      - ${SSH_AUTH_SOCK}:/ssh-agent:ro
+      - ${CF_COMPOSE_DIR:-/opt/stacks}:${CF_COMPOSE_DIR:-/opt/stacks}
+      # SSH keys - use the SAME option as cf service above
+      # Option 1: Host path (default)
+      - ${CF_SSH_DIR:-~/.ssh/compose-farm}:/root/.ssh
+      # Option 2: Named volume
+      # - cf-ssh:/root/.ssh
+    environment:
+      - SSH_AUTH_SOCK=/ssh-agent
+      - CF_CONFIG=${CF_COMPOSE_DIR:-/opt/stacks}/compose-farm.yaml
+      # Used to detect self-updates and run via SSH to survive container restart
+      - CF_WEB_SERVICE=compose-farm
+    labels:
+      - traefik.enable=true
+      - traefik.http.routers.compose-farm.rule=Host(`compose-farm.${DOMAIN}`)
+      - traefik.http.routers.compose-farm.entrypoints=websecure
+      - traefik.http.routers.compose-farm-local.rule=Host(`compose-farm.local`)
+      - traefik.http.routers.compose-farm-local.entrypoints=web
+      - traefik.http.services.compose-farm.loadbalancer.server.port=9000
+    networks:
+      - mynetwork
+
+networks:
+  mynetwork:
+    external: true
+
+volumes:
+  cf-ssh:
+    # Only used if Option 2 is selected above
--- a/docs/dev/future-improvements.md
+++ b/docs/dev/future-improvements.md
@@ -0,0 +1,128 @@
+# Future Improvements
+
+Low-priority improvements identified during code review. These are not currently causing issues but could be addressed if they become pain points.
+
+## 1. State Module Efficiency (LOW)
+
+**Current:** Every state operation reads and writes the entire file.
+
+```python
+def set_service_host(config, service, host):
+    state = load_state(config)   # Read file
+    state[service] = host
+    save_state(config, state)    # Write file
+```
+
+**Impact:** With 87 services, this is fine. With 1000+, it would be slow.
+
+**Potential fix:** Add batch operations:
+```python
+def update_state(config, updates: dict[str, str | None]) -> None:
+    """Batch update: set services to hosts, None means remove."""
+    state = load_state(config)
+    for service, host in updates.items():
+        if host is None:
+            state.pop(service, None)
+        else:
+            state[service] = host
+    save_state(config, state)
+```
+
+**When to do:** Only if state operations become noticeably slow.
+
+---
+
+## 2. Remote-Aware Compose Path Resolution (LOW)
+
+**Current:** `config.get_compose_path()` checks if files exist on the local filesystem:
+
+```python
+def get_compose_path(self, service: str) -> Path:
+    for filename in ("compose.yaml", "compose.yml", ...):
+        candidate = service_dir / filename
+        if candidate.exists():  # Local check!
+            return candidate
+```
+
+**Why this works:** NFS/shared storage means local = remote.
+
+**Why it could break:** If running compose-farm from a machine without the NFS mount, it returns `compose.yaml` (the default) even if `docker-compose.yml` exists on the remote host.
+
+**Potential fix:** Query the remote host for file existence, or accept this limitation and document it.
+
+**When to do:** Only if users need to run compose-farm from non-NFS machines.
+
+---
+
+## 3. Add Integration Tests for CLI Commands (MEDIUM)
+
+**Current:** No integration tests for the actual CLI commands. Tests cover the underlying functions but not the Typer commands themselves.
+
+**Potential fix:** Add integration tests using `CliRunner` from Typer:
+
+```python
+from typer.testing import CliRunner
+from compose_farm.cli import app
+
+runner = CliRunner()
+
+def test_check_command_validates_config():
+    result = runner.invoke(app, ["check", "--local"])
+    assert result.exit_code == 0
+```
+
+**When to do:** When CLI behavior becomes complex enough to warrant dedicated testing.
+
+---
+
+## 4. Add Tests for operations.py (MEDIUM)
+
+**Current:** Operations module has 30% coverage. Most logic is tested indirectly through test_sync.py.
+
+**Potential fix:** Add dedicated tests for:
+- `up_services()` with migration scenarios
+- `preflight_check()`
+- `check_host_compatibility()`
+
+**When to do:** When adding new operations or modifying migration logic.
+
+---
+
+## 5. Consider Structured Logging (LOW)
+
+**Current:** Operations print directly to console using Rich. This couples the operations module to the Rich library.
+
+**Potential fix:** Use Python's logging module with a custom Rich handler:
+
+```python
+import logging
+
+logger = logging.getLogger(__name__)
+
+# In operations:
+logger.info("Migrating %s from %s to %s", service, old_host, new_host)
+
+# In cli.py - configure Rich handler:
+from rich.logging import RichHandler
+logging.basicConfig(handlers=[RichHandler()])
+```
+
+**Benefits:**
+- Operations become testable without capturing stdout
+- Logs can be redirected to files
+- Log levels provide filtering
+
+**When to do:** Only if console output coupling becomes a problem for testing or extensibility.
+
+---
+
+## Design Decisions to Keep
+
+These patterns are working well and should be preserved:
+
+1. **asyncio + asyncssh** - Solid async foundation
+2. **Pydantic models** - Clean validation
+3. **Rich for output** - Good UX
+4. **Test structure** - Good coverage
+5. **Module separation** - cli/operations/executor/compose pattern
+6. **KISS principle** - Don't over-engineer
--- a/docs/reddit-post.md
+++ b/docs/reddit-post.md
@@ -0,0 +1,79 @@
+# Title options
+
+- Multi-host Docker Compose without Kubernetes or file changes
+- I built a CLI to run Docker Compose across hosts. Zero changes to your files.
+- I made a CLI to run Docker Compose across multiple hosts without Kubernetes or Swarm
+---
+
+I've been running 100+ Docker Compose stacks on a single machine, and it kept running out of memory. I needed to spread services across multiple hosts, but:
+
+- **Kubernetes** felt like overkill. I don't need pods, ingress controllers, or 10x more YAML.
+- **Docker Swarm** is basically in maintenance mode.
+- Both require rewriting my compose files.
+
+So I built **Compose Farm**, a simple CLI that runs `docker compose` commands over SSH. No agents, no cluster setup, no changes to your existing compose files.
+
+## How it works
+
+One YAML file maps services to hosts:
+
+```yaml
+compose_dir: /opt/stacks
+
+hosts:
+  nuc: 192.168.1.10
+  hp: 192.168.1.11
+
+services:
+  plex: nuc
+  jellyfin: hp
+  sonarr: nuc
+  radarr: nuc
+```
+
+Then just:
+
+```bash
+cf up plex        # runs on nuc via SSH
+cf apply          # makes config state match desired state on all hosts (like Terraform apply)
+cf up --all       # starts everything on their assigned hosts
+cf logs -f plex   # streams logs
+cf ps             # shows status across all hosts
+```
+
+## Auto-migration
+
+Change a service's host in the config and run `cf up`. It stops the service on the old host and starts it on the new one. No manual SSH needed.
+
+```yaml
+# Before
+plex: nuc
+
+# After (just change this)
+plex: hp
+```
+
+```bash
+cf up plex  # migrates automatically
+```
+
+## Requirements
+
+- SSH key auth to your hosts
+- Same paths on all hosts (I use NFS from my NAS)
+- That's it. No agents, no daemons.
+
+## What it doesn't do
+
+- No high availability (if a host goes down, services don't auto-migrate)
+- No overlay networking (containers on different hosts can't talk via Docker DNS)
+- No health checks or automatic restarts
+
+It's a convenience wrapper around `docker compose` + SSH. If you need failover or cross-host container networking, you probably do need Swarm or Kubernetes.
+
+## Links
+
+- GitHub: https://github.com/basnijholt/compose-farm
+- Install: `uv tool install compose-farm` or `pip install compose-farm`
+
+Happy to answer questions or take feedback!
--- a/docs/truenas-nested-nfs.md
+++ b/docs/truenas-nested-nfs.md
@@ -0,0 +1,169 @@
+# TrueNAS NFS: Accessing Child ZFS Datasets
+
+When NFS-exporting a parent ZFS dataset on TrueNAS, child datasets appear as **empty directories** to NFS clients. This document explains the problem and provides a workaround.
+
+## The Problem
+
+TrueNAS structures storage as ZFS datasets. A common pattern is:
+
+```
+tank/data              <- parent dataset (NFS exported)
+tank/data/app1         <- child dataset
+tank/data/app2         <- child dataset
+```
+
+When you create an NFS share for `tank/data`, clients mount it and see the `app1/` and `app2/` directories—but they're empty. This happens because each ZFS dataset is a separate filesystem, and NFS doesn't traverse into child filesystems by default.
+
+## The Solution: `crossmnt`
+
+The NFS `crossmnt` export option tells the server to allow clients to traverse into child filesystems. However, TrueNAS doesn't expose this option in the UI.
+
+### Workaround Script
+
+This Python script injects `crossmnt` into `/etc/exports`:
+
+```python
+#!/usr/bin/env python3
+"""
+Add crossmnt to TrueNAS NFS exports for child dataset visibility.
+
+Usage: fix-nfs-crossmnt.py /mnt/pool/dataset
+
+Setup:
+  1. scp fix-nfs-crossmnt.py root@truenas.local:/root/
+  2. chmod +x /root/fix-nfs-crossmnt.py
+  3. Test: /root/fix-nfs-crossmnt.py /mnt/pool/dataset
+  4. Add cron job: TrueNAS UI > System > Advanced > Cron Jobs
+     Command: /root/fix-nfs-crossmnt.py /mnt/pool/dataset
+     Schedule: */5 * * * *
+"""
+
+import re
+import subprocess
+import sys
+from pathlib import Path
+
+EXPORTS_FILE = Path("/etc/exports")
+
+
+def main():
+    if len(sys.argv) != 2:
+        print(f"Usage: {sys.argv[0]} /mnt/pool/dataset", file=sys.stderr)
+        return 1
+
+    export_path = sys.argv[1]
+    content = EXPORTS_FILE.read_text()
+
+    if f'"{export_path}"' not in content:
+        print(f"ERROR: {export_path} not found in {EXPORTS_FILE}", file=sys.stderr)
+        return 1
+
+    lines = content.splitlines()
+    result = []
+    in_block = False
+    modified = False
+
+    for line in lines:
+        if f'"{export_path}"' in line:
+            in_block = True
+        elif line.startswith('"'):
+            in_block = False
+
+        if in_block and line[:1] in (" ", "\t") and "crossmnt" not in line:
+            line = re.sub(r"\)(\\\s*)?$", r",crossmnt)\1", line)
+            modified = True
+
+        result.append(line)
+
+    if not modified:
+        return 0  # Already applied
+
+    EXPORTS_FILE.write_text("\n".join(result) + "\n")
+    subprocess.run(["exportfs", "-ra"], check=True)
+    print(f"Added crossmnt to {export_path}")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
+```
+
+## Setup Instructions
+
+### 1. Copy the script to TrueNAS
+
+```bash
+scp fix-nfs-crossmnt.py root@truenas.local:/root/
+ssh root@truenas.local chmod +x /root/fix-nfs-crossmnt.py
+```
+
+### 2. Test manually
+
+```bash
+ssh root@truenas.local
+
+# Run the script
+/root/fix-nfs-crossmnt.py /mnt/tank/data
+
+# Verify crossmnt was added
+cat /etc/exports
+```
+
+You should see `,crossmnt` added to the client options:
+
+```
+"/mnt/tank/data"\
+    192.168.1.10(sec=sys,rw,no_subtree_check,crossmnt)\
+    192.168.1.11(sec=sys,rw,no_subtree_check,crossmnt)
+```
+
+### 3. Verify on NFS client
+
+```bash
+# Before: empty directory
+ls /mnt/data/app1/
+# (nothing)
+
+# After: actual contents visible
+ls /mnt/data/app1/
+# config.yaml  data/  logs/
+```
+
+### 4. Make it persistent
+
+TrueNAS regenerates `/etc/exports` when you modify NFS shares in the UI. To survive this, set up a cron job:
+
+1. Go to **TrueNAS UI → System → Advanced → Cron Jobs → Add**
+2. Configure:
+   - **Description:** Fix NFS crossmnt
+   - **Command:** `/root/fix-nfs-crossmnt.py /mnt/tank/data`
+   - **Run As User:** root
+   - **Schedule:** `*/5 * * * *` (every 5 minutes)
+   - **Enabled:** checked
+3. Save
+
+The script is idempotent—it only modifies the file if `crossmnt` is missing, and skips the write entirely if already applied.
+
+## How It Works
+
+1. Parses `/etc/exports` to find the specified export block
+2. Adds `,crossmnt` before the closing `)` on each client line
+3. Writes the file only if changes were made
+4. Runs `exportfs -ra` to reload the NFS configuration
+
+## Why Not Use SMB Instead?
+
+SMB handles child datasets seamlessly, but:
+
+- NFS is simpler for Linux-to-Linux with matching UIDs
+- SMB requires more complex permission mapping for Docker volumes
+- Many existing setups already use NFS
+
+## Related Links
+
+- [TrueNAS Forum: Add crossmnt option to NFS exports](https://forums.truenas.com/t/add-crossmnt-option-to-nfs-exports/10573)
+- [exports(5) man page](https://man7.org/linux/man-pages/man5/exports.5.html) - see `crossmnt` option
+
+## Tested On
+
+- TrueNAS SCALE 24.10
--- a/docs/truenas-nfs-root-squash.md
+++ b/docs/truenas-nfs-root-squash.md
@@ -0,0 +1,65 @@
+# TrueNAS NFS: Disabling Root Squash
+
+When running Docker containers on NFS-mounted storage, containers that run as root will fail to write files unless root squash is disabled. This document explains the problem and solution.
+
+## The Problem
+
+By default, NFS uses "root squash" which maps the root user (UID 0) on clients to `nobody` on the server. This is a security feature to prevent remote root users from having root access to the NFS server's files.
+
+However, many Docker containers run as root internally. When these containers try to write to NFS-mounted volumes, the writes fail with "Permission denied" because the NFS server sees them as `nobody`, not `root`.
+
+Example error in container logs:
+```
+System.UnauthorizedAccessException: Access to the path '/data' is denied.
+Error: EACCES: permission denied, mkdir '/app/data'
+```
+
+## The Solution
+
+In TrueNAS, configure the NFS share to map remote root to local root:
+
+### TrueNAS SCALE UI
+
+1. Go to **Shares → NFS**
+2. Edit your share
+3. Under **Advanced Options**:
+   - **Maproot User**: `root`
+   - **Maproot Group**: `wheel`
+4. Save
+
+### Result in /etc/exports
+
+```
+"/mnt/pool/data"\
+    192.168.1.25(sec=sys,rw,no_root_squash,no_subtree_check)\
+    192.168.1.26(sec=sys,rw,no_root_squash,no_subtree_check)
+```
+
+The `no_root_squash` option means remote root is treated as root on the server.
+
+## Why `wheel`?
+
+On FreeBSD/TrueNAS, the root user's primary group is `wheel` (GID 0), not `root` like on Linux. So `root:wheel` = `0:0`.
+
+## Security Considerations
+
+Disabling root squash means any machine that can mount the NFS share has full root access to those files. This is acceptable when:
+
+- The NFS clients are on a trusted private network
+- Only known hosts (by IP) are allowed to mount the share
+- The data isn't security-critical
+
+For home lab setups with Docker containers, this is typically fine.
+
+## Alternative: Run Containers as Non-Root
+
+If you prefer to keep root squash enabled, you can run containers as a non-root user:
+
+1. **LinuxServer.io images**: Set `PUID=1000` and `PGID=1000` environment variables
+2. **Other images**: Add `user: "1000:1000"` to the compose service
+
+However, not all containers support running as non-root (they may need to bind to privileged ports, create system directories, etc.).
+
+## Tested On
+
+- TrueNAS SCALE 24.10
--- a/examples/README.md
+++ b/examples/README.md
@@ -1,42 +1,171 @@
 # Compose Farm Examples

-This folder contains example Docker Compose services for testing Compose Farm locally.
+Real-world examples demonstrating compose-farm patterns for multi-host Docker deployments.
+
+## Services
+
+| Service | Type | Demonstrates |
+|---------|------|--------------|
+| [traefik](traefik/) | Infrastructure | Reverse proxy, Let's Encrypt, file-provider |
+| [mealie](mealie/) | Single container | Traefik labels, resource limits, environment vars |
+| [uptime-kuma](uptime-kuma/) | Single container | Docker socket, user mapping, custom DNS |
+| [paperless-ngx](paperless-ngx/) | Multi-container | Redis + App stack (SQLite) |
+| [autokuma](autokuma/) | Multi-host | Demonstrates `all` keyword (runs on every host) |
+
+## Key Patterns
+
+### External Network
+
+All services connect to a shared external network for inter-service communication:
+
+```yaml
+networks:
+  mynetwork:
+    external: true
+```
+
+Create it on each host with consistent settings:
+
+```bash
+compose-farm init-network --network mynetwork --subnet 172.20.0.0/16
+```
+
+### Traefik Labels (Dual Routes)
+
+Services expose two routes for different access patterns:
+
+1. **HTTPS route** (`websecure` entrypoint): For your custom domain with Let's Encrypt TLS
+2. **HTTP route** (`web` entrypoint): For `.local` domains on your LAN (no TLS needed)
+
+This pattern allows accessing services via:
+- `https://mealie.example.com` - from anywhere, with TLS
+- `http://mealie.local` - from your local network, no TLS overhead
+
+```yaml
+labels:
+  # HTTPS route for custom domain (e.g., mealie.example.com)
+  - traefik.enable=true
+  - traefik.http.routers.myapp.rule=Host(`myapp.${DOMAIN}`)
+  - traefik.http.routers.myapp.entrypoints=websecure
+  - traefik.http.services.myapp.loadbalancer.server.port=8080
+  # HTTP route for .local domain (e.g., myapp.local)
+  - traefik.http.routers.myapp-local.rule=Host(`myapp.local`)
+  - traefik.http.routers.myapp-local.entrypoints=web
+```
+
+> **Note:** `.local` domains require local DNS (e.g., Pi-hole, Technitium) to resolve to your Traefik host.
+
+### Environment Variables
+
+Each service has a `.env` file for secrets and domain configuration.
+Edit these files to set your domain and credentials:
+
+```bash
+# Example: set your domain
+echo "DOMAIN=example.com" > mealie/.env
+```
+
+Variables like `${DOMAIN}` are substituted at runtime by Docker Compose.
+
+### NFS Volume Mounts
+
+All data is stored on shared NFS storage at `/mnt/data/`:
+
+```yaml
+volumes:
+  - /mnt/data/myapp:/app/data
+```
+
+This allows services to migrate between hosts without data loss.
+
+### Multi-Host Services
+
+Services that need to run on every host (e.g., monitoring agents):
+
+```yaml
+# In compose-farm.yaml
+services:
+  autokuma: all  # Runs on every configured host
+```
+
+### Multi-Container Stacks
+
+Database-backed apps with multiple services:
+
+```yaml
+services:
+  redis:
+    image: redis:7
+  app:
+    depends_on:
+      - redis
+```
+
+> **NFS + PostgreSQL Warning:** PostgreSQL should NOT run on NFS storage due to
+> fsync and file locking issues. Use SQLite (safe for single-writer on NFS) or
+> keep PostgreSQL data on local volumes (non-migratable).
+
+### AutoKuma Labels (Optional)
+
+The autokuma example demonstrates compose-farm's **multi-host feature** - running the same service on all hosts using the `all` keyword. AutoKuma itself is not part of compose-farm; it's just a good example because it needs to run on every host to monitor local Docker containers.
+
+[AutoKuma](https://github.com/BigBoot/AutoKuma) automatically creates Uptime Kuma monitors from Docker labels:
+
+```yaml
+labels:
+  - kuma.myapp.http.name=My App
+  - kuma.myapp.http.url=https://myapp.${DOMAIN}
+```

 ## Quick Start

 ```bash
 cd examples

-# Check status of all services
+# 1. Create the shared network on all hosts
+compose-farm init-network
+
+# 2. Start Traefik first (the reverse proxy)
+compose-farm up traefik
+
+# 3. Start other services
+compose-farm up mealie uptime-kuma
+
+# 4. Check status
 compose-farm ps

-# Pull images
-compose-farm pull --all
+# 5. Generate Traefik file-provider config for cross-host routing
+compose-farm traefik-file --all

-# Start hello-world (runs and exits)
-compose-farm up hello
+# 6. View logs
+compose-farm logs mealie

-# Start nginx (stays running)
-compose-farm up nginx
-
-# Check nginx is running
-curl localhost:8080
-
-# View logs
-compose-farm logs nginx
-
-# Stop nginx
-compose-farm down nginx
-
-# Update all (pull + restart)
-compose-farm update --all
+# 7. Stop everything
+compose-farm down --all
 ```

-## Services
+## Configuration

- **hello**: Simple hello-world container (exits immediately)
- **nginx**: Nginx web server on port 8080
+The `compose-farm.yaml` shows a multi-host setup:

-## Config
+- **primary** (192.168.1.10): Runs Traefik and heavy services
+- **secondary** (192.168.1.11): Runs lighter services
+- **autokuma**: Runs on ALL hosts to monitor local containers

-The `compose-farm.yaml` in this directory configures both services to run locally (no SSH).
+When Traefik runs on `primary` and a service runs on `secondary`, compose-farm
+automatically generates file-provider config so Traefik can route to it.
+
+## Traefik File-Provider
+
+When services run on different hosts than Traefik, use `traefik-file` to generate routing config:
+
+```bash
+# Generate config for all services
+compose-farm traefik-file --all -o traefik/dynamic.d/compose-farm.yml
+
+# Or configure auto-generation in compose-farm.yaml:
+traefik_file: /opt/stacks/traefik/dynamic.d/compose-farm.yml
+traefik_service: traefik
+```
+
+With `traefik_file` configured, compose-farm automatically regenerates the config after `up`, `down`, `restart`, and `update` commands.
--- a/examples/autokuma/.env
+++ b/examples/autokuma/.env
@@ -0,0 +1,4 @@
+# Copy to .env and fill in your values
+DOMAIN=example.com
+UPTIME_KUMA_USERNAME=admin
+UPTIME_KUMA_PASSWORD=your-uptime-kuma-password
--- a/examples/autokuma/compose.yaml
+++ b/examples/autokuma/compose.yaml
@@ -0,0 +1,31 @@
+# AutoKuma - Automatic Uptime Kuma monitor creation from Docker labels
+# Demonstrates: Multi-host service (runs on ALL hosts)
+#
+# This service monitors Docker containers on each host and automatically
+# creates Uptime Kuma monitors based on container labels.
+#
+# In compose-farm.yaml, configure as:
+#   autokuma: all
+#
+# This runs the same container on every host, so each host's local
+# Docker socket is monitored.
+name: autokuma
+services:
+  autokuma:
+    image: ghcr.io/bigboot/autokuma:latest
+    container_name: autokuma
+    restart: unless-stopped
+    environment:
+      # Connect to your Uptime Kuma instance
+      AUTOKUMA__KUMA__URL: https://uptime.${DOMAIN}
+      AUTOKUMA__KUMA__USERNAME: ${UPTIME_KUMA_USERNAME}
+      AUTOKUMA__KUMA__PASSWORD: ${UPTIME_KUMA_PASSWORD}
+      # Tag for auto-created monitors
+      AUTOKUMA__TAG__NAME: autokuma
+      AUTOKUMA__TAG__COLOR: "#10B981"
+    volumes:
+      # Access local Docker socket to discover containers
+      - /var/run/docker.sock:/var/run/docker.sock:ro
+    # Custom DNS for resolving internal domains
+    dns:
+      - 192.168.1.1  # Your local DNS server
--- a/examples/compose-farm-state.yaml
+++ b/examples/compose-farm-state.yaml
@@ -0,0 +1,9 @@
+deployed:
+  autokuma:
+  - primary
+  - secondary
+  - local
+  mealie: secondary
+  paperless-ngx: primary
+  traefik: primary
+  uptime-kuma: secondary
--- a/examples/compose-farm.yaml
+++ b/examples/compose-farm.yaml
@@ -1,11 +1,40 @@
-# Example Compose Farm config for local testing
-# Run from the examples directory: cd examples && compose-farm ps
+# Example Compose Farm configuration
+# Demonstrates a multi-host setup with NFS shared storage
+#
+# To test locally: Update the host addresses and run from the examples directory

-compose_dir: .
+compose_dir: /opt/stacks/compose-farm/examples
+
+# Auto-regenerate Traefik file-provider config after up/down/restart/update
+traefik_file: /opt/stacks/compose-farm/examples/traefik/dynamic.d/compose-farm.yml
+traefik_service: traefik  # Skip Traefik's host in file-provider (docker provider handles it)

 hosts:
+  # Primary server - runs Traefik and most services
+  # Full form with all options
+  primary:
+    address: 192.168.1.10
+    user: deploy
+    port: 22
+
+  # Secondary server - runs some services for load distribution
+  # Short form (user defaults to current user, port defaults to 22)
+  secondary: 192.168.1.11
+
+  # Local execution (no SSH) - for testing or when running on the host itself
  local: localhost

 services:
-  hello: local
-  nginx: local
+  # Infrastructure (runs on primary where Traefik is)
+  traefik: primary
+
+  # Multi-host services (runs on ALL hosts)
+  # AutoKuma monitors Docker containers on each host
+  autokuma: all
+
+  # Primary server services
+  paperless-ngx: primary
+
+  # Secondary server services (distributed for performance)
+  mealie: secondary
+  uptime-kuma: secondary
--- a/examples/hello/docker-compose.yml
+++ b/examples/hello/docker-compose.yml
@@ -1,4 +0,0 @@
-services:
-  hello:
-    image: hello-world
-    container_name: sdc-hello
--- a/examples/mealie/.env
+++ b/examples/mealie/.env
@@ -0,0 +1,2 @@
+# Copy to .env and fill in your values
+DOMAIN=example.com
--- a/examples/mealie/compose.yaml
+++ b/examples/mealie/compose.yaml
@@ -0,0 +1,47 @@
+# Mealie - Recipe manager
+# Simple single-container service with Traefik labels
+#
+# Demonstrates:
+# - HTTPS route: mealie.${DOMAIN} (e.g., mealie.example.com) with Let's Encrypt
+# - HTTP route: mealie.local for LAN access without TLS
+# - External network, resource limits, environment variables
+name: mealie
+services:
+  mealie:
+    image: ghcr.io/mealie-recipes/mealie:latest
+    container_name: mealie
+    restart: unless-stopped
+    networks:
+      - mynetwork
+    ports:
+      - "9925:9000"
+    deploy:
+      resources:
+        limits:
+          memory: 1000M
+    volumes:
+      - /mnt/data/mealie:/app/data
+    environment:
+      ALLOW_SIGNUP: "false"
+      PUID: 1000
+      PGID: 1000
+      TZ: America/Los_Angeles
+      MAX_WORKERS: 1
+      WEB_CONCURRENCY: 1
+      BASE_URL: https://mealie.${DOMAIN}
+    labels:
+      # HTTPS route: mealie.example.com (requires DOMAIN in .env)
+      - traefik.enable=true
+      - traefik.http.routers.mealie.rule=Host(`mealie.${DOMAIN}`)
+      - traefik.http.routers.mealie.entrypoints=websecure
+      - traefik.http.services.mealie.loadbalancer.server.port=9000
+      # HTTP route: mealie.local (for LAN access, no TLS)
+      - traefik.http.routers.mealie-local.rule=Host(`mealie.local`)
+      - traefik.http.routers.mealie-local.entrypoints=web
+      # AutoKuma: automatically create Uptime Kuma monitor
+      - kuma.mealie.http.name=Mealie
+      - kuma.mealie.http.url=https://mealie.${DOMAIN}
+
+networks:
+  mynetwork:
+    external: true
--- a/examples/nginx/docker-compose.yml
+++ b/examples/nginx/docker-compose.yml
@@ -1,6 +0,0 @@
-services:
-  nginx:
-    image: nginx:alpine
-    container_name: sdc-nginx
-    ports:
-      - "8080:80"
--- a/examples/paperless-ngx/.env
+++ b/examples/paperless-ngx/.env
@@ -0,0 +1,3 @@
+# Copy to .env and fill in your values
+DOMAIN=example.com
+PAPERLESS_SECRET_KEY=change-me-to-a-random-string
--- a/examples/paperless-ngx/compose.yaml
+++ b/examples/paperless-ngx/compose.yaml
@@ -0,0 +1,60 @@
+# Paperless-ngx - Document management system
+#
+# Demonstrates:
+# - HTTPS route: paperless.${DOMAIN} (e.g., paperless.example.com) with Let's Encrypt
+# - HTTP route: paperless.local for LAN access without TLS
+# - Multi-container stack (Redis + App with SQLite)
+#
+# NOTE: This example uses SQLite (the default) instead of PostgreSQL.
+# PostgreSQL should NOT be used with NFS storage due to fsync/locking issues.
+# If you need PostgreSQL, use local volumes for the database.
+name: paperless-ngx
+services:
+  redis:
+    image: redis:8
+    container_name: paperless-redis
+    restart: unless-stopped
+    networks:
+      - mynetwork
+    volumes:
+      - /mnt/data/paperless/redis:/data
+
+  paperless:
+    image: ghcr.io/paperless-ngx/paperless-ngx:latest
+    container_name: paperless
+    restart: unless-stopped
+    depends_on:
+      - redis
+    networks:
+      - mynetwork
+    ports:
+      - "8000:8000"
+    volumes:
+      # SQLite database stored here (safe on NFS for single-writer)
+      - /mnt/data/paperless/data:/usr/src/paperless/data
+      - /mnt/data/paperless/media:/usr/src/paperless/media
+      - /mnt/data/paperless/export:/usr/src/paperless/export
+      - /mnt/data/paperless/consume:/usr/src/paperless/consume
+    environment:
+      PAPERLESS_REDIS: redis://redis:6379
+      PAPERLESS_URL: https://paperless.${DOMAIN}
+      PAPERLESS_SECRET_KEY: ${PAPERLESS_SECRET_KEY}
+      USERMAP_UID: 1000
+      USERMAP_GID: 1000
+    labels:
+      # HTTPS route: paperless.example.com (requires DOMAIN in .env)
+      - traefik.enable=true
+      - traefik.http.routers.paperless.rule=Host(`paperless.${DOMAIN}`)
+      - traefik.http.routers.paperless.entrypoints=websecure
+      - traefik.http.services.paperless.loadbalancer.server.port=8000
+      - traefik.docker.network=mynetwork
+      # HTTP route: paperless.local (for LAN access, no TLS)
+      - traefik.http.routers.paperless-local.rule=Host(`paperless.local`)
+      - traefik.http.routers.paperless-local.entrypoints=web
+      # AutoKuma: automatically create Uptime Kuma monitor
+      - kuma.paperless.http.name=Paperless
+      - kuma.paperless.http.url=https://paperless.${DOMAIN}
+
+networks:
+  mynetwork:
+    external: true
--- a/examples/traefik/.env
+++ b/examples/traefik/.env
@@ -0,0 +1,5 @@
+# Copy to .env and fill in your values
+DOMAIN=example.com
+ACME_EMAIL=you@example.com
+CF_API_EMAIL=you@example.com
+CF_API_KEY=your-cloudflare-api-key
--- a/examples/traefik/compose.yaml
+++ b/examples/traefik/compose.yaml
@@ -0,0 +1,58 @@
+# Traefik reverse proxy with Let's Encrypt and file-provider support
+# This is the foundation service - other services route through it
+#
+# Entrypoints:
+# - web (port 80): HTTP for .local domains (no TLS needed on LAN)
+# - websecure (port 443): HTTPS with Let's Encrypt for custom domains
+name: traefik
+services:
+  traefik:
+    image: traefik:v3.2
+    container_name: traefik
+    command:
+      - --api.dashboard=true
+      - --providers.docker=true
+      - --providers.docker.exposedbydefault=false
+      - --providers.docker.network=mynetwork
+      # File provider for routing to services on other hosts
+      - --providers.file.directory=/dynamic.d
+      - --providers.file.watch=true
+      # HTTP entrypoint for .local domains (LAN access, no TLS)
+      - --entrypoints.web.address=:80
+      # HTTPS entrypoint for custom domains (with Let's Encrypt TLS)
+      - --entrypoints.websecure.address=:443
+      - --entrypoints.websecure.asDefault=true
+      - --entrypoints.websecure.http.tls.certresolver=letsencrypt
+      # Let's Encrypt DNS challenge (using Cloudflare as example)
+      - --certificatesresolvers.letsencrypt.acme.email=${ACME_EMAIL}
+      - --certificatesresolvers.letsencrypt.acme.storage=/letsencrypt/acme.json
+      - --certificatesresolvers.letsencrypt.acme.dnschallenge.provider=cloudflare
+      - --certificatesresolvers.letsencrypt.acme.dnschallenge.resolvers=1.1.1.1:53
+    environment:
+      # Cloudflare API token for DNS challenge
+      CF_API_EMAIL: ${CF_API_EMAIL}
+      CF_API_KEY: ${CF_API_KEY}
+    restart: unless-stopped
+    ports:
+      - "80:80"
+      - "443:443"
+      - "8080:8080"  # Dashboard
+    volumes:
+      - /var/run/docker.sock:/var/run/docker.sock:ro
+      - /mnt/data/traefik/letsencrypt:/letsencrypt
+      - ./dynamic.d:/dynamic.d:ro
+    networks:
+      - mynetwork
+    labels:
+      - traefik.enable=true
+      # Dashboard accessible at traefik.yourdomain.com
+      - traefik.http.routers.traefik.rule=Host(`traefik.${DOMAIN}`)
+      - traefik.http.routers.traefik.entrypoints=websecure
+      - traefik.http.routers.traefik.service=api@internal
+      # AutoKuma: automatically create Uptime Kuma monitor
+      - kuma.traefik.http.name=Traefik
+      - kuma.traefik.http.url=https://traefik.${DOMAIN}
+
+networks:
+  mynetwork:
+    external: true
--- a/examples/traefik/dynamic.d/compose-farm.yml
+++ b/examples/traefik/dynamic.d/compose-farm.yml
@@ -0,0 +1,40 @@
+# Auto-generated by compose-farm
+# https://github.com/basnijholt/compose-farm
+#
+# This file routes traffic to services running on hosts other than Traefik's host.
+# Services on Traefik's host use the Docker provider directly.
+#
+# Regenerate with: compose-farm traefik-file --all -o <this-file>
+# Or configure traefik_file in compose-farm.yaml for automatic updates.
+
+http:
+  routers:
+    mealie:
+      rule: Host(`mealie.example.com`)
+      entrypoints:
+      - websecure
+      service: mealie
+    mealie-local:
+      rule: Host(`mealie.local`)
+      entrypoints:
+      - web
+      service: mealie
+    uptime:
+      rule: Host(`uptime.example.com`)
+      entrypoints:
+      - websecure
+      service: uptime
+    uptime-local:
+      rule: Host(`uptime.local`)
+      entrypoints:
+      - web
+      service: uptime
+  services:
+    mealie:
+      loadbalancer:
+        servers:
+        - url: http://192.168.1.11:9925
+    uptime:
+      loadbalancer:
+        servers:
+        - url: http://192.168.1.11:3001
--- a/examples/uptime-kuma/.env
+++ b/examples/uptime-kuma/.env
@@ -0,0 +1,2 @@
+# Copy to .env and fill in your values
+DOMAIN=example.com
--- a/examples/uptime-kuma/compose.yaml
+++ b/examples/uptime-kuma/compose.yaml
@@ -0,0 +1,43 @@
+# Uptime Kuma - Monitoring dashboard
+#
+# Demonstrates:
+# - HTTPS route: uptime.${DOMAIN} (e.g., uptime.example.com) with Let's Encrypt
+# - HTTP route: uptime.local for LAN access without TLS
+# - Docker socket access, user mapping for NFS, custom DNS
+name: uptime-kuma
+services:
+  uptime-kuma:
+    image: louislam/uptime-kuma:2
+    container_name: uptime-kuma
+    restart: unless-stopped
+    # Run as non-root user (important for NFS volumes)
+    user: "1000:1000"
+    networks:
+      - mynetwork
+    ports:
+      - "3001:3001"
+    volumes:
+      - /var/run/docker.sock:/var/run/docker.sock:ro
+      - /mnt/data/uptime-kuma:/app/data
+    environment:
+      PUID: 1000
+      PGID: 1000
+    # Custom DNS for internal domain resolution
+    dns:
+      - 192.168.1.1  # Your local DNS server
+    labels:
+      # HTTPS route: uptime.example.com (requires DOMAIN in .env)
+      - traefik.enable=true
+      - traefik.http.routers.uptime.rule=Host(`uptime.${DOMAIN}`)
+      - traefik.http.routers.uptime.entrypoints=websecure
+      - traefik.http.services.uptime.loadbalancer.server.port=3001
+      # HTTP route: uptime.local (for LAN access, no TLS)
+      - traefik.http.routers.uptime-local.rule=Host(`uptime.local`)
+      - traefik.http.routers.uptime-local.entrypoints=web
+      # AutoKuma: automatically create Uptime Kuma monitor
+      - kuma.uptime.http.name=Uptime Kuma
+      - kuma.uptime.http.url=https://uptime.${DOMAIN}
+
+networks:
+  mynetwork:
+    external: true
--- a/hatch_build.py
+++ b/hatch_build.py
@@ -0,0 +1,170 @@
+"""Hatch build hook to vendor CDN assets for offline use.
+
+During wheel builds, this hook:
+1. Parses base.html to find elements with data-vendor attributes
+2. Downloads each CDN asset to a temporary vendor directory
+3. Rewrites base.html to use local /static/vendor/ paths
+4. Fetches and bundles license information
+5. Includes everything in the wheel via force_include
+
+The source base.html keeps CDN links for development; only the
+distributed wheel has vendored assets.
+"""
+
+from __future__ import annotations
+
+import re
+import shutil
+import subprocess
+import tempfile
+from pathlib import Path
+from typing import Any
+from urllib.request import Request, urlopen
+
+from hatchling.builders.hooks.plugin.interface import BuildHookInterface
+
+# Matches elements with data-vendor attribute: extracts URL and target filename
+# Example: <script src="https://..." data-vendor="htmx.js">
+# Captures: (1) src/href, (2) URL, (3) attributes between, (4) vendor filename
+VENDOR_PATTERN = re.compile(r'(src|href)="(https://[^"]+)"([^>]*?)data-vendor="([^"]+)"')
+
+# License URLs for each package (GitHub raw URLs)
+LICENSE_URLS: dict[str, tuple[str, str]] = {
+    "htmx": ("MIT", "https://raw.githubusercontent.com/bigskysoftware/htmx/master/LICENSE"),
+    "xterm": ("MIT", "https://raw.githubusercontent.com/xtermjs/xterm.js/master/LICENSE"),
+    "daisyui": ("MIT", "https://raw.githubusercontent.com/saadeghi/daisyui/master/LICENSE"),
+    "tailwindcss": (
+        "MIT",
+        "https://raw.githubusercontent.com/tailwindlabs/tailwindcss/master/LICENSE",
+    ),
+}
+
+
+def _download(url: str) -> bytes:
+    """Download a URL, trying urllib first then curl as fallback."""
+    # Try urllib first
+    try:
+        req = Request(  # noqa: S310
+            url, headers={"User-Agent": "Mozilla/5.0 (compatible; compose-farm build)"}
+        )
+        with urlopen(req, timeout=30) as resp:  # noqa: S310
+            return resp.read()  # type: ignore[no-any-return]
+    except Exception:  # noqa: S110
+        pass  # Fall through to curl
+
+    # Fallback to curl (handles SSL proxies better)
+    result = subprocess.run(
+        ["curl", "-fsSL", "--max-time", "30", url],  # noqa: S607
+        capture_output=True,
+        check=True,
+    )
+    return bytes(result.stdout)
+
+
+def _generate_licenses_file(temp_dir: Path) -> None:
+    """Download and combine license files into LICENSES.txt."""
+    lines = [
+        "# Vendored Dependencies - License Information",
+        "",
+        "This file contains license information for JavaScript/CSS libraries",
+        "bundled with compose-farm for offline use.",
+        "",
+        "=" * 70,
+        "",
+    ]
+
+    for pkg_name, (license_type, license_url) in LICENSE_URLS.items():
+        lines.append(f"## {pkg_name} ({license_type})")
+        lines.append(f"Source: {license_url}")
+        lines.append("")
+        lines.append(_download(license_url).decode("utf-8"))
+        lines.append("")
+        lines.append("=" * 70)
+        lines.append("")
+
+    (temp_dir / "LICENSES.txt").write_text("\n".join(lines))
+
+
+class VendorAssetsHook(BuildHookInterface):  # type: ignore[misc]
+    """Hatch build hook that vendors CDN assets into the wheel."""
+
+    PLUGIN_NAME = "vendor-assets"
+
+    def initialize(
+        self,
+        _version: str,
+        build_data: dict[str, Any],
+    ) -> None:
+        """Download CDN assets and prepare them for inclusion in the wheel."""
+        # Only run for wheel builds
+        if self.target_name != "wheel":
+            return
+
+        # Paths
+        src_dir = Path(self.root) / "src" / "compose_farm"
+        base_html_path = src_dir / "web" / "templates" / "base.html"
+
+        if not base_html_path.exists():
+            return
+
+        # Create temp directory for vendored assets
+        temp_dir = Path(tempfile.mkdtemp(prefix="compose_farm_vendor_"))
+        vendor_dir = temp_dir / "vendor"
+        vendor_dir.mkdir()
+
+        # Read and parse base.html
+        html_content = base_html_path.read_text()
+        url_to_filename: dict[str, str] = {}
+
+        # Find all elements with data-vendor attribute and download them
+        for match in VENDOR_PATTERN.finditer(html_content):
+            url = match.group(2)
+            filename = match.group(4)
+
+            if url in url_to_filename:
+                continue
+
+            url_to_filename[url] = filename
+            content = _download(url)
+            (vendor_dir / filename).write_bytes(content)
+
+        if not url_to_filename:
+            return
+
+        # Generate LICENSES.txt
+        _generate_licenses_file(vendor_dir)
+
+        # Rewrite HTML to use local paths (remove data-vendor, update URL)
+        def replace_vendor_tag(match: re.Match[str]) -> str:
+            attr = match.group(1)  # src or href
+            url = match.group(2)
+            between = match.group(3)  # attributes between URL and data-vendor
+            filename = match.group(4)
+            if url in url_to_filename:
+                return f'{attr}="/static/vendor/{filename}"{between}'
+            return match.group(0)
+
+        modified_html = VENDOR_PATTERN.sub(replace_vendor_tag, html_content)
+
+        # Write modified base.html to temp
+        templates_dir = temp_dir / "templates"
+        templates_dir.mkdir()
+        (templates_dir / "base.html").write_text(modified_html)
+
+        # Add to force_include to override files in the wheel
+        force_include = build_data.setdefault("force_include", {})
+        force_include[str(vendor_dir)] = "compose_farm/web/static/vendor"
+        force_include[str(templates_dir / "base.html")] = "compose_farm/web/templates/base.html"
+
+        # Store temp_dir path for cleanup
+        self._temp_dir = temp_dir
+
+    def finalize(
+        self,
+        _version: str,
+        _build_data: dict[str, Any],
+        _artifact_path: str,
+    ) -> None:
+        """Clean up temporary directory after build."""
+        if hasattr(self, "_temp_dir") and self._temp_dir.exists():
+            shutil.rmtree(self._temp_dir, ignore_errors=True)
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -3,10 +3,43 @@ name = "compose-farm"
 dynamic = ["version"]
 description = "Compose Farm - run docker compose commands across multiple hosts"
 readme = "README.md"
+license = "MIT"
+license-files = ["LICENSE"]
 authors = [
    { name = "Bas Nijholt", email = "bas@nijho.lt" }
 ]
+maintainers = [
+    { name = "Bas Nijholt", email = "bas@nijho.lt" }
+]
 requires-python = ">=3.11"
+keywords = [
+    "docker",
+    "docker-compose",
+    "ssh",
+    "devops",
+    "deployment",
+    "container",
+    "orchestration",
+    "multi-host",
+    "homelab",
+    "self-hosted",
+]
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Environment :: Console",
+    "Intended Audience :: Developers",
+    "Intended Audience :: System Administrators",
+    "License :: OSI Approved :: MIT License",
+    "Operating System :: OS Independent",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+    "Programming Language :: Python :: 3.14",
+    "Topic :: System :: Systems Administration",
+    "Topic :: Utilities",
+    "Typing :: Typed",
+]
 dependencies = [
    "typer>=0.9.0",
    "pydantic>=2.0.0",
@@ -15,6 +48,20 @@ dependencies = [
    "rich>=13.0.0",
 ]

+[project.optional-dependencies]
+web = [
+    "fastapi[standard]>=0.109.0",
+    "jinja2>=3.1.0",
+    "websockets>=12.0",
+]
+
+[project.urls]
+Homepage = "https://github.com/basnijholt/compose-farm"
+Repository = "https://github.com/basnijholt/compose-farm"
+Documentation = "https://github.com/basnijholt/compose-farm#readme"
+Issues = "https://github.com/basnijholt/compose-farm/issues"
+Changelog = "https://github.com/basnijholt/compose-farm/releases"
+
 [project.scripts]
 compose-farm = "compose_farm.cli:app"
 cf = "compose_farm.cli:app"
@@ -32,6 +79,9 @@ version-file = "src/compose_farm/_version.py"
 [tool.hatch.build.targets.wheel]
 packages = ["src/compose_farm"]

+[tool.hatch.build.hooks.custom]
+# Vendors CDN assets (JS/CSS) into the wheel for offline use
+
 [tool.ruff]
 target-version = "py311"
 line-length = 100
@@ -61,7 +111,7 @@ ignore = [
 ]

 [tool.ruff.lint.per-file-ignores]
-"tests/*" = ["S101", "PLR2004", "S108", "D102", "D103"]  # relaxed docstrings + asserts in tests
+"tests/*" = ["S101", "PLR2004", "S108", "D102", "D103", "PLC0415", "ARG001", "ARG002", "TC003"]  # relaxed for tests

 [tool.ruff.lint.mccabe]
 max-complexity = 18
@@ -79,6 +129,10 @@ ignore_missing_imports = true
 module = "tests.*"
 disallow_untyped_decorators = false

+[[tool.mypy.overrides]]
+module = "compose_farm.web.*"
+disallow_untyped_decorators = false
+
 [tool.pytest.ini_options]
 asyncio_mode = "auto"
 testpaths = ["tests"]
@@ -113,4 +167,11 @@ dev = [
    "ruff>=0.14.8",
    "types-pyyaml>=6.0.12.20250915",
    "markdown-code-runner>=0.7.0",
+    # Web deps for type checking (these ship with inline types)
+    "fastapi>=0.109.0",
+    "uvicorn[standard]>=0.27.0",
+    "jinja2>=3.1.0",
+    "websockets>=12.0",
+    # For FastAPI TestClient
+    "httpx>=0.28.0",
 ]
--- a/src/compose_farm/cli.py
+++ b/src/compose_farm/cli.py
@@ -1,468 +0,0 @@
-"""CLI interface using Typer."""
-
-from __future__ import annotations
-
-import asyncio
-from pathlib import Path
-from typing import TYPE_CHECKING, Annotated, TypeVar
-
-import typer
-import yaml
-from rich.console import Console
-
-from . import __version__
-from .config import Config, load_config
-from .logs import snapshot_services
-from .ssh import (
-    CommandResult,
-    check_service_running,
-    run_compose,
-    run_compose_on_host,
-    run_on_services,
-    run_sequential_on_services,
-)
-from .state import get_service_host, load_state, remove_service, save_state, set_service_host
-from .traefik import generate_traefik_config
-
-if TYPE_CHECKING:
-    from collections.abc import Coroutine
-
-T = TypeVar("T")
-
-console = Console(highlight=False)
-err_console = Console(stderr=True, highlight=False)
-
-
-def _load_config_or_exit(config_path: Path | None) -> Config:
-    """Load config or exit with a friendly error message."""
-    try:
-        return load_config(config_path)
-    except FileNotFoundError as e:
-        err_console.print(f"[red]✗[/] {e}")
-        raise typer.Exit(1) from e
-
-
-def _maybe_regenerate_traefik(cfg: Config) -> None:
-    """Regenerate traefik config if traefik_file is configured."""
-    if cfg.traefik_file is None:
-        return
-
-    try:
-        dynamic, warnings = generate_traefik_config(cfg, list(cfg.services.keys()))
-        cfg.traefik_file.parent.mkdir(parents=True, exist_ok=True)
-        cfg.traefik_file.write_text(yaml.safe_dump(dynamic, sort_keys=False))
-        console.print()  # Ensure we're on a new line after streaming output
-        console.print(f"[green]✓[/] Traefik config updated: {cfg.traefik_file}")
-        for warning in warnings:
-            err_console.print(f"[yellow]![/] {warning}")
-    except (FileNotFoundError, ValueError) as exc:
-        err_console.print(f"[yellow]![/] Failed to update traefik config: {exc}")
-
-
-def _version_callback(value: bool) -> None:
-    """Print version and exit."""
-    if value:
-        typer.echo(f"compose-farm {__version__}")
-        raise typer.Exit
-
-
-app = typer.Typer(
-    name="compose-farm",
-    help="Compose Farm - run docker compose commands across multiple hosts",
-    no_args_is_help=True,
-    context_settings={"help_option_names": ["-h", "--help"]},
-)
-
-
-@app.callback()
-def main(
-    version: Annotated[
-        bool,
-        typer.Option(
-            "--version",
-            "-v",
-            help="Show version and exit",
-            callback=_version_callback,
-            is_eager=True,
-        ),
-    ] = False,
-) -> None:
-    """Compose Farm - run docker compose commands across multiple hosts."""
-
-
-def _get_services(
-    services: list[str],
-    all_services: bool,
-    config_path: Path | None,
-) -> tuple[list[str], Config]:
-    """Resolve service list and load config."""
-    config = _load_config_or_exit(config_path)
-
-    if all_services:
-        return list(config.services.keys()), config
-    if not services:
-        err_console.print("[red]✗[/] Specify services or use --all")
-        raise typer.Exit(1)
-    return list(services), config
-
-
-def _run_async(coro: Coroutine[None, None, T]) -> T:
-    """Run async coroutine."""
-    return asyncio.run(coro)
-
-
-def _report_results(results: list[CommandResult]) -> None:
-    """Report command results and exit with appropriate code."""
-    failed = [r for r in results if not r.success]
-    if failed:
-        for r in failed:
-            err_console.print(
-                f"[cyan]\\[{r.service}][/] [red]Failed with exit code {r.exit_code}[/]"
-            )
-        raise typer.Exit(1)
-
-
-ServicesArg = Annotated[
-    list[str] | None,
-    typer.Argument(help="Services to operate on"),
-]
-AllOption = Annotated[
-    bool,
-    typer.Option("--all", "-a", help="Run on all services"),
-]
-ConfigOption = Annotated[
-    Path | None,
-    typer.Option("--config", "-c", help="Path to config file"),
-]
-LogPathOption = Annotated[
-    Path | None,
-    typer.Option("--log-path", "-l", help="Path to Dockerfarm TOML log"),
-]
-
-
-async def _up_with_migration(
-    cfg: Config,
-    services: list[str],
-) -> list[CommandResult]:
-    """Start services with automatic migration if host changed."""
-    results: list[CommandResult] = []
-
-    for service in services:
-        target_host = cfg.services[service]
-        current_host = get_service_host(cfg, service)
-
-        # If service is deployed elsewhere, migrate it
-        if current_host and current_host != target_host:
-            if current_host in cfg.hosts:
-                console.print(
-                    f"[cyan]\\[{service}][/] Migrating from "
-                    f"[magenta]{current_host}[/] → [magenta]{target_host}[/]..."
-                )
-                down_result = await run_compose_on_host(cfg, service, current_host, "down")
-                if not down_result.success:
-                    results.append(down_result)
-                    continue
-            else:
-                err_console.print(
-                    f"[cyan]\\[{service}][/] [yellow]![/] was on "
-                    f"[magenta]{current_host}[/] (not in config), skipping down"
-                )
-
-        # Start on target host
-        up_result = await run_compose(cfg, service, "up -d")
-        results.append(up_result)
-
-        # Update state on success
-        if up_result.success:
-            set_service_host(cfg, service, target_host)
-
-    return results
-
-
-@app.command(rich_help_panel="Lifecycle")
-def up(
-    services: ServicesArg = None,
-    all_services: AllOption = False,
-    config: ConfigOption = None,
-) -> None:
-    """Start services (docker compose up -d). Auto-migrates if host changed."""
-    svc_list, cfg = _get_services(services or [], all_services, config)
-    results = _run_async(_up_with_migration(cfg, svc_list))
-    _maybe_regenerate_traefik(cfg)
-    _report_results(results)
-
-
-@app.command(rich_help_panel="Lifecycle")
-def down(
-    services: ServicesArg = None,
-    all_services: AllOption = False,
-    config: ConfigOption = None,
-) -> None:
-    """Stop services (docker compose down)."""
-    svc_list, cfg = _get_services(services or [], all_services, config)
-    results = _run_async(run_on_services(cfg, svc_list, "down"))
-
-    # Remove from state on success
-    for result in results:
-        if result.success:
-            remove_service(cfg, result.service)
-
-    _maybe_regenerate_traefik(cfg)
-    _report_results(results)
-
-
-@app.command(rich_help_panel="Lifecycle")
-def pull(
-    services: ServicesArg = None,
-    all_services: AllOption = False,
-    config: ConfigOption = None,
-) -> None:
-    """Pull latest images (docker compose pull)."""
-    svc_list, cfg = _get_services(services or [], all_services, config)
-    results = _run_async(run_on_services(cfg, svc_list, "pull"))
-    _report_results(results)
-
-
-@app.command(rich_help_panel="Lifecycle")
-def restart(
-    services: ServicesArg = None,
-    all_services: AllOption = False,
-    config: ConfigOption = None,
-) -> None:
-    """Restart services (down + up)."""
-    svc_list, cfg = _get_services(services or [], all_services, config)
-    results = _run_async(run_sequential_on_services(cfg, svc_list, ["down", "up -d"]))
-    _maybe_regenerate_traefik(cfg)
-    _report_results(results)
-
-
-@app.command(rich_help_panel="Lifecycle")
-def update(
-    services: ServicesArg = None,
-    all_services: AllOption = False,
-    config: ConfigOption = None,
-) -> None:
-    """Update services (pull + down + up)."""
-    svc_list, cfg = _get_services(services or [], all_services, config)
-    results = _run_async(run_sequential_on_services(cfg, svc_list, ["pull", "down", "up -d"]))
-    _maybe_regenerate_traefik(cfg)
-    _report_results(results)
-
-
-@app.command(rich_help_panel="Monitoring")
-def logs(
-    services: ServicesArg = None,
-    all_services: AllOption = False,
-    follow: Annotated[bool, typer.Option("--follow", "-f", help="Follow logs")] = False,
-    tail: Annotated[int, typer.Option("--tail", "-n", help="Number of lines")] = 100,
-    config: ConfigOption = None,
-) -> None:
-    """Show service logs."""
-    svc_list, cfg = _get_services(services or [], all_services, config)
-    cmd = f"logs --tail {tail}"
-    if follow:
-        cmd += " -f"
-    results = _run_async(run_on_services(cfg, svc_list, cmd))
-    _report_results(results)
-
-
-@app.command(rich_help_panel="Monitoring")
-def ps(
-    config: ConfigOption = None,
-) -> None:
-    """Show status of all services."""
-    cfg = _load_config_or_exit(config)
-    results = _run_async(run_on_services(cfg, list(cfg.services.keys()), "ps"))
-    _report_results(results)
-
-
-@app.command("traefik-file", rich_help_panel="Configuration")
-def traefik_file(
-    services: ServicesArg = None,
-    all_services: AllOption = False,
-    output: Annotated[
-        Path | None,
-        typer.Option(
-            "--output",
-            "-o",
-            help="Write Traefik file-provider YAML to this path (stdout if omitted)",
-        ),
-    ] = None,
-    config: ConfigOption = None,
-) -> None:
-    """Generate a Traefik file-provider fragment from compose Traefik labels."""
-    svc_list, cfg = _get_services(services or [], all_services, config)
-    try:
-        dynamic, warnings = generate_traefik_config(cfg, svc_list)
-    except (FileNotFoundError, ValueError) as exc:
-        err_console.print(f"[red]✗[/] {exc}")
-        raise typer.Exit(1) from exc
-
-    rendered = yaml.safe_dump(dynamic, sort_keys=False)
-
-    if output:
-        output.parent.mkdir(parents=True, exist_ok=True)
-        output.write_text(rendered)
-        console.print(f"[green]✓[/] Traefik config written to {output}")
-    else:
-        console.print(rendered)
-
-    for warning in warnings:
-        err_console.print(f"[yellow]![/] {warning}")
-
-
-async def _discover_running_services(cfg: Config) -> dict[str, str]:
-    """Discover which services are running on which hosts.
-
-    Returns a dict mapping service names to host names for running services.
-    """
-    discovered: dict[str, str] = {}
-
-    for service, assigned_host in cfg.services.items():
-        # Check assigned host first (most common case)
-        if await check_service_running(cfg, service, assigned_host):
-            discovered[service] = assigned_host
-            continue
-
-        # Check other hosts in case service was migrated but state is stale
-        for host_name in cfg.hosts:
-            if host_name == assigned_host:
-                continue
-            if await check_service_running(cfg, service, host_name):
-                discovered[service] = host_name
-                break
-
-    return discovered
-
-
-def _report_sync_changes(
-    added: list[str],
-    removed: list[str],
-    changed: list[tuple[str, str, str]],
-    discovered: dict[str, str],
-    current_state: dict[str, str],
-) -> None:
-    """Report sync changes to the user."""
-    if added:
-        console.print(f"\nNew services found ({len(added)}):")
-        for service in sorted(added):
-            console.print(f"  [green]+[/] [cyan]{service}[/] on [magenta]{discovered[service]}[/]")
-
-    if changed:
-        console.print(f"\nServices on different hosts ({len(changed)}):")
-        for service, old_host, new_host in sorted(changed):
-            console.print(
-                f"  [yellow]~[/] [cyan]{service}[/]: "
-                f"[magenta]{old_host}[/] → [magenta]{new_host}[/]"
-            )
-
-    if removed:
-        console.print(f"\nServices no longer running ({len(removed)}):")
-        for service in sorted(removed):
-            console.print(
-                f"  [red]-[/] [cyan]{service}[/] (was on [magenta]{current_state[service]}[/])"
-            )
-
-
-@app.command(rich_help_panel="Configuration")
-def sync(
-    config: ConfigOption = None,
-    log_path: LogPathOption = None,
-    dry_run: Annotated[
-        bool,
-        typer.Option("--dry-run", "-n", help="Show what would be synced without writing"),
-    ] = False,
-) -> None:
-    """Sync local state with running services.
-
-    Discovers which services are running on which hosts, updates the state
-    file, and captures image digests. Combines service discovery with
-    image snapshot into a single command.
-    """
-    cfg = _load_config_or_exit(config)
-    current_state = load_state(cfg)
-
-    console.print("Discovering running services...")
-    discovered = _run_async(_discover_running_services(cfg))
-
-    # Calculate changes
-    added = [s for s in discovered if s not in current_state]
-    removed = [s for s in current_state if s not in discovered]
-    changed = [
-        (s, current_state[s], discovered[s])
-        for s in discovered
-        if s in current_state and current_state[s] != discovered[s]
-    ]
-
-    # Report state changes
-    state_changed = bool(added or removed or changed)
-    if state_changed:
-        _report_sync_changes(added, removed, changed, discovered, current_state)
-    else:
-        console.print("[green]✓[/] State is already in sync.")
-
-    if dry_run:
-        console.print("\n[dim](dry-run: no changes made)[/]")
-        return
-
-    # Update state file
-    if state_changed:
-        save_state(cfg, discovered)
-        console.print(f"\n[green]✓[/] State updated: {len(discovered)} services tracked.")
-
-    # Capture image digests for running services
-    if discovered:
-        console.print("\nCapturing image digests...")
-        try:
-            path = _run_async(snapshot_services(cfg, list(discovered.keys()), log_path=log_path))
-            console.print(f"[green]✓[/] Digests written to {path}")
-        except RuntimeError as exc:
-            err_console.print(f"[yellow]![/] {exc}")
-
-
-@app.command(rich_help_panel="Configuration")
-def check(
-    config: ConfigOption = None,
-) -> None:
-    """Check for compose directories not in config (and vice versa)."""
-    cfg = _load_config_or_exit(config)
-    configured = set(cfg.services.keys())
-    on_disk = cfg.discover_compose_dirs()
-
-    missing_from_config = sorted(on_disk - configured)
-    missing_from_disk = sorted(configured - on_disk)
-
-    if missing_from_config:
-        console.print(f"\n[yellow]Not in config[/] ({len(missing_from_config)}):")
-        for name in missing_from_config:
-            console.print(f"  [yellow]+[/] [cyan]{name}[/]")
-
-    if missing_from_disk:
-        console.print(f"\n[red]No compose file found[/] ({len(missing_from_disk)}):")
-        for name in missing_from_disk:
-            console.print(f"  [red]-[/] [cyan]{name}[/]")
-
-    if not missing_from_config and not missing_from_disk:
-        console.print("[green]✓[/] All compose directories are in config.")
-    elif missing_from_config:
-        console.print(f"\n[dim]To add missing services, append to {cfg.config_path}:[/]")
-        for name in missing_from_config:
-            console.print(f"[dim]  {name}: docker-debian[/]")
-
-    # Check traefik labels have matching ports
-    try:
-        _, traefik_warnings = generate_traefik_config(
-            cfg, list(cfg.services.keys()), check_all=True
-        )
-        if traefik_warnings:
-            console.print(f"\n[yellow]Traefik issues[/] ({len(traefik_warnings)}):")
-            for warning in traefik_warnings:
-                console.print(f"  [yellow]![/] {warning}")
-        elif not missing_from_config and not missing_from_disk:
-            console.print("[green]✓[/] All traefik services have published ports.")
-    except (FileNotFoundError, ValueError):
-        pass  # Skip traefik check if config can't be loaded
-
-
-if __name__ == "__main__":
-    app()
--- a/src/compose_farm/cli/init.py
+++ b/src/compose_farm/cli/init.py
@@ -0,0 +1,21 @@
+"""CLI interface using Typer."""
+
+from __future__ import annotations
+
+# Import command modules to trigger registration via @app.command() decorators
+from compose_farm.cli import (
+    config,  # noqa: F401
+    lifecycle,  # noqa: F401
+    management,  # noqa: F401
+    monitoring,  # noqa: F401
+    ssh,  # noqa: F401
+    web,  # noqa: F401
+)
+
+# Import the shared app instance
+from compose_farm.cli.app import app
+
+__all__ = ["app"]
+
+if __name__ == "__main__":
+    app()
--- a/src/compose_farm/cli/app.py
+++ b/src/compose_farm/cli/app.py
@@ -0,0 +1,42 @@
+"""Shared Typer app instance."""
+
+from __future__ import annotations
+
+from typing import Annotated
+
+import typer
+
+from compose_farm import __version__
+
+__all__ = ["app"]
+
+
+def _version_callback(value: bool) -> None:
+    """Print version and exit."""
+    if value:
+        typer.echo(f"compose-farm {__version__}")
+        raise typer.Exit
+
+
+app = typer.Typer(
+    name="compose-farm",
+    help="Compose Farm - run docker compose commands across multiple hosts",
+    no_args_is_help=True,
+    context_settings={"help_option_names": ["-h", "--help"]},
+)
+
+
+@app.callback()
+def main(
+    version: Annotated[
+        bool,
+        typer.Option(
+            "--version",
+            "-v",
+            help="Show version and exit",
+            callback=_version_callback,
+            is_eager=True,
+        ),
+    ] = False,
+) -> None:
+    """Compose Farm - run docker compose commands across multiple hosts."""
--- a/src/compose_farm/cli/common.py
+++ b/src/compose_farm/cli/common.py
@@ -0,0 +1,354 @@
+"""Shared CLI helpers, options, and utilities."""
+
+from __future__ import annotations
+
+import asyncio
+import contextlib
+from pathlib import Path
+from typing import TYPE_CHECKING, Annotated, TypeVar
+
+import typer
+from rich.progress import (
+    BarColumn,
+    MofNCompleteColumn,
+    Progress,
+    SpinnerColumn,
+    TaskID,
+    TextColumn,
+    TimeElapsedColumn,
+)
+
+from compose_farm.console import (
+    MSG_HOST_NOT_FOUND,
+    MSG_SERVICE_NOT_FOUND,
+    console,
+    print_error,
+    print_hint,
+    print_success,
+    print_warning,
+)
+
+if TYPE_CHECKING:
+    from collections.abc import Callable, Coroutine, Generator
+
+    from compose_farm.config import Config
+    from compose_farm.executor import CommandResult
+
+_T = TypeVar("_T")
+_R = TypeVar("_R")
+
+
+# --- Shared CLI Options ---
+ServicesArg = Annotated[
+    list[str] | None,
+    typer.Argument(help="Services to operate on"),
+]
+AllOption = Annotated[
+    bool,
+    typer.Option("--all", "-a", help="Run on all services"),
+]
+ConfigOption = Annotated[
+    Path | None,
+    typer.Option("--config", "-c", help="Path to config file"),
+]
+LogPathOption = Annotated[
+    Path | None,
+    typer.Option("--log-path", "-l", help="Path to Dockerfarm TOML log"),
+]
+HostOption = Annotated[
+    str | None,
+    typer.Option("--host", "-H", help="Filter to services on this host"),
+]
+
+# --- Constants (internal) ---
+_MISSING_PATH_PREVIEW_LIMIT = 2
+_STATS_PREVIEW_LIMIT = 3  # Max number of pending migrations to show by name
+
+
+def format_host(host: str | list[str]) -> str:
+    """Format a host value for display."""
+    if isinstance(host, list):
+        return ", ".join(host)
+    return host
+
+
+@contextlib.contextmanager
+def progress_bar(
+    label: str, total: int, *, initial_description: str = "[dim]connecting...[/]"
+) -> Generator[tuple[Progress, TaskID], None, None]:
+    """Create a standardized progress bar with consistent styling.
+
+    Yields (progress, task_id). Use progress.update(task_id, advance=1, description=...)
+    to advance.
+    """
+    with Progress(
+        SpinnerColumn(),
+        TextColumn(f"[bold blue]{label}[/]"),
+        BarColumn(),
+        MofNCompleteColumn(),
+        TextColumn("•"),
+        TimeElapsedColumn(),
+        TextColumn("•"),
+        TextColumn("[progress.description]{task.description}"),
+        console=console,
+        transient=True,
+    ) as progress:
+        task_id = progress.add_task(initial_description, total=total)
+        yield progress, task_id
+
+
+def run_parallel_with_progress(
+    label: str,
+    items: list[_T],
+    async_fn: Callable[[_T], Coroutine[None, None, _R]],
+) -> list[_R]:
+    """Run async tasks in parallel with a progress bar.
+
+    Args:
+        label: Progress bar label (e.g., "Discovering", "Querying hosts")
+        items: List of items to process
+        async_fn: Async function to call for each item, returns tuple where
+                  first element is used for progress description
+
+    Returns:
+        List of results from async_fn in completion order.
+
+    """
+
+    async def gather() -> list[_R]:
+        with progress_bar(label, len(items)) as (progress, task_id):
+            tasks = [asyncio.create_task(async_fn(item)) for item in items]
+            results: list[_R] = []
+            for coro in asyncio.as_completed(tasks):
+                result = await coro
+                results.append(result)
+                progress.update(task_id, advance=1, description=f"[cyan]{result[0]}[/]")  # type: ignore[index]
+            return results
+
+    return asyncio.run(gather())
+
+
+def load_config_or_exit(config_path: Path | None) -> Config:
+    """Load config or exit with a friendly error message."""
+    # Lazy import: pydantic adds ~50ms to startup, only load when actually needed
+    from compose_farm.config import load_config  # noqa: PLC0415
+
+    try:
+        return load_config(config_path)
+    except FileNotFoundError as e:
+        print_error(str(e))
+        raise typer.Exit(1) from e
+
+
+def get_services(
+    services: list[str],
+    all_services: bool,
+    config_path: Path | None,
+    *,
+    host: str | None = None,
+    default_all: bool = False,
+) -> tuple[list[str], Config]:
+    """Resolve service list and load config.
+
+    Handles three mutually exclusive selection methods:
+    - Explicit service names
+    - --all flag
+    - --host filter
+
+    Args:
+        services: Explicit service names
+        all_services: Whether --all was specified
+        config_path: Path to config file
+        host: Filter to services on this host
+        default_all: If True, default to all services when nothing specified (for ps)
+
+    Supports "." as shorthand for the current directory name.
+
+    """
+    validate_service_selection(services, all_services, host)
+    config = load_config_or_exit(config_path)
+
+    if host is not None:
+        validate_host(config, host)
+        svc_list = [s for s in config.services if host in config.get_hosts(s)]
+        if not svc_list:
+            print_warning(f"No services configured for host [magenta]{host}[/]")
+            raise typer.Exit(0)
+        return svc_list, config
+
+    if all_services:
+        return list(config.services.keys()), config
+
+    if not services:
+        if default_all:
+            return list(config.services.keys()), config
+        print_error("Specify services or use [bold]--all[/] / [bold]--host[/]")
+        raise typer.Exit(1)
+
+    # Resolve "." to current directory name
+    resolved = [Path.cwd().name if svc == "." else svc for svc in services]
+
+    # Validate all services exist in config
+    validate_services(
+        config, resolved, hint="Add the service to compose-farm.yaml or use [bold]--all[/]"
+    )
+
+    return resolved, config
+
+
+def run_async(coro: Coroutine[None, None, _T]) -> _T:
+    """Run async coroutine."""
+    try:
+        return asyncio.run(coro)
+    except KeyboardInterrupt:
+        console.print("\n[yellow]Interrupted[/]")
+        raise typer.Exit(130) from None  # Standard exit code for SIGINT
+
+
+def report_results(results: list[CommandResult]) -> None:
+    """Report command results and exit with appropriate code."""
+    succeeded = [r for r in results if r.success]
+    failed = [r for r in results if not r.success]
+
+    # Always print summary when there are multiple results
+    if len(results) > 1:
+        console.print()  # Blank line before summary
+        if failed:
+            for r in failed:
+                print_error(f"[cyan]{r.service}[/] failed with exit code {r.exit_code}")
+            console.print()
+            console.print(
+                f"[green]✓[/] {len(succeeded)}/{len(results)} services succeeded, "
+                f"[red]✗[/] {len(failed)} failed"
+            )
+        else:
+            print_success(f"All {len(results)} services succeeded")
+
+    elif failed:
+        # Single service failed
+        r = failed[0]
+        print_error(f"[cyan]{r.service}[/] failed with exit code {r.exit_code}")
+
+    if failed:
+        raise typer.Exit(1)
+
+
+def maybe_regenerate_traefik(
+    cfg: Config,
+    results: list[CommandResult] | None = None,
+) -> None:
+    """Regenerate traefik config if traefik_file is configured.
+
+    If results are provided, skips regeneration if all services failed.
+    """
+    if cfg.traefik_file is None:
+        return
+
+    # Skip if all services failed
+    if results and not any(r.success for r in results):
+        return
+
+    # Lazy import: traefik/yaml adds startup time, only load when traefik_file is configured
+    from compose_farm.traefik import (  # noqa: PLC0415
+        generate_traefik_config,
+        render_traefik_config,
+    )
+
+    try:
+        dynamic, warnings = generate_traefik_config(cfg, list(cfg.services.keys()))
+        new_content = render_traefik_config(dynamic)
+
+        # Check if content changed
+        old_content = ""
+        if cfg.traefik_file.exists():
+            old_content = cfg.traefik_file.read_text()
+
+        if new_content != old_content:
+            cfg.traefik_file.parent.mkdir(parents=True, exist_ok=True)
+            cfg.traefik_file.write_text(new_content)
+            console.print()  # Ensure we're on a new line after streaming output
+            print_success(f"Traefik config updated: {cfg.traefik_file}")
+
+        for warning in warnings:
+            print_warning(warning)
+    except (FileNotFoundError, ValueError) as exc:
+        print_warning(f"Failed to update traefik config: {exc}")
+
+
+def validate_services(cfg: Config, services: list[str], *, hint: str | None = None) -> None:
+    """Validate that all services exist in config. Exits with error if any not found."""
+    invalid = [s for s in services if s not in cfg.services]
+    if invalid:
+        for svc in invalid:
+            print_error(MSG_SERVICE_NOT_FOUND.format(name=svc))
+        if hint:
+            print_hint(hint)
+        raise typer.Exit(1)
+
+
+def validate_host(cfg: Config, host: str) -> None:
+    """Validate that a host exists in config. Exits with error if not found."""
+    if host not in cfg.hosts:
+        print_error(MSG_HOST_NOT_FOUND.format(name=host))
+        raise typer.Exit(1)
+
+
+def validate_hosts(cfg: Config, hosts: list[str]) -> None:
+    """Validate that all hosts exist in config. Exits with error if any not found."""
+    invalid = [h for h in hosts if h not in cfg.hosts]
+    if invalid:
+        for h in invalid:
+            print_error(MSG_HOST_NOT_FOUND.format(name=h))
+        raise typer.Exit(1)
+
+
+def validate_host_for_service(cfg: Config, service: str, host: str) -> None:
+    """Validate that a host is valid for a service."""
+    validate_host(cfg, host)
+    allowed_hosts = cfg.get_hosts(service)
+    if host not in allowed_hosts:
+        print_error(
+            f"Service [cyan]{service}[/] is not configured for host [magenta]{host}[/] "
+            f"(configured: {', '.join(allowed_hosts)})"
+        )
+        raise typer.Exit(1)
+
+
+def validate_service_selection(
+    services: list[str] | None,
+    all_services: bool,
+    host: str | None,
+) -> None:
+    """Validate that only one service selection method is used.
+
+    The three selection methods (explicit services, --all, --host) are mutually
+    exclusive. This ensures consistent behavior across all commands.
+    """
+    methods = sum([bool(services), all_services, host is not None])
+    if methods > 1:
+        print_error("Use only one of: service names, [bold]--all[/], or [bold]--host[/]")
+        raise typer.Exit(1)
+
+
+def run_host_operation(
+    cfg: Config,
+    svc_list: list[str],
+    host: str,
+    command: str,
+    action_verb: str,
+    state_callback: Callable[[Config, str, str], None],
+) -> None:
+    """Run an operation on a specific host for multiple services."""
+    from compose_farm.executor import run_compose_on_host  # noqa: PLC0415
+
+    results: list[CommandResult] = []
+    for service in svc_list:
+        validate_host_for_service(cfg, service, host)
+        console.print(f"[cyan]\\[{service}][/] {action_verb} on [magenta]{host}[/]...")
+        result = run_async(run_compose_on_host(cfg, service, host, command, raw=True))
+        print()  # Newline after raw output
+        results.append(result)
+        if result.success:
+            state_callback(cfg, service, host)
+    maybe_regenerate_traefik(cfg, results)
+    report_results(results)
--- a/src/compose_farm/cli/config.py
+++ b/src/compose_farm/cli/config.py
@@ -0,0 +1,313 @@
+"""Configuration management commands for compose-farm."""
+
+from __future__ import annotations
+
+import os
+import platform
+import shlex
+import shutil
+import subprocess
+from importlib import resources
+from pathlib import Path
+from typing import Annotated
+
+import typer
+
+from compose_farm.cli.app import app
+from compose_farm.console import MSG_CONFIG_NOT_FOUND, console, print_error, print_success
+from compose_farm.paths import config_search_paths, default_config_path, find_config_path
+
+config_app = typer.Typer(
+    name="config",
+    help="Manage compose-farm configuration files.",
+    no_args_is_help=True,
+)
+
+
+# --- CLI Options (same pattern as cli.py) ---
+_PathOption = Annotated[
+    Path | None,
+    typer.Option("--path", "-p", help="Path to config file. Uses auto-detection if not specified."),
+]
+_ForceOption = Annotated[
+    bool,
+    typer.Option("--force", "-f", help="Overwrite existing config without confirmation."),
+]
+_RawOption = Annotated[
+    bool,
+    typer.Option("--raw", "-r", help="Output raw file contents (for copy-paste)."),
+]
+
+
+def _get_editor() -> str:
+    """Get the user's preferred editor.
+
+    Checks $EDITOR, then $VISUAL, then falls back to platform defaults.
+    """
+    for env_var in ("EDITOR", "VISUAL"):
+        editor = os.environ.get(env_var)
+        if editor:
+            return editor
+
+    if platform.system() == "Windows":
+        return "notepad"
+
+    # Try common editors on Unix-like systems
+    for editor in ("nano", "vim", "vi"):
+        if shutil.which(editor):
+            return editor
+
+    return "vi"
+
+
+def _generate_template() -> str:
+    """Generate a config template with documented schema."""
+    try:
+        template_file = resources.files("compose_farm") / "example-config.yaml"
+        return template_file.read_text(encoding="utf-8")
+    except FileNotFoundError as e:
+        print_error("Example config template is missing from the package")
+        console.print("Reinstall compose-farm or report this issue.")
+        raise typer.Exit(1) from e
+
+
+def _get_config_file(path: Path | None) -> Path | None:
+    """Resolve config path, or auto-detect from standard locations."""
+    if path:
+        return path.expanduser().resolve()
+
+    config_path = find_config_path()
+    return config_path.resolve() if config_path else None
+
+
+def _report_no_config_found() -> None:
+    """Report that no config file was found in search paths."""
+    console.print("[yellow]No config file found.[/yellow]")
+    console.print("\nSearched locations:")
+    for p in config_search_paths():
+        status = "[green]exists[/green]" if p.exists() else "[dim]not found[/dim]"
+        console.print(f"  - {p} ({status})")
+    console.print("\nRun [bold cyan]cf config init[/bold cyan] to create one.")
+
+
+def _report_config_path_not_exists(config_file: Path) -> None:
+    """Report that an explicit config path doesn't exist."""
+    console.print("[yellow]Config file not found.[/yellow]")
+    console.print(f"\nProvided path does not exist: [cyan]{config_file}[/cyan]")
+    console.print("\nRun [bold cyan]cf config init[/bold cyan] to create one.")
+
+
+@config_app.command("init")
+def config_init(
+    path: _PathOption = None,
+    force: _ForceOption = False,
+) -> None:
+    """Create a new config file with documented example.
+
+    The generated config file serves as a template showing all available
+    options with explanatory comments.
+    """
+    target_path = (path.expanduser().resolve() if path else None) or default_config_path()
+
+    if target_path.exists() and not force:
+        console.print(
+            f"[bold yellow]Config file already exists at:[/bold yellow] [cyan]{target_path}[/cyan]",
+        )
+        if not typer.confirm("Overwrite existing config file?"):
+            console.print("[dim]Aborted.[/dim]")
+            raise typer.Exit(0)
+
+    # Create parent directories
+    target_path.parent.mkdir(parents=True, exist_ok=True)
+
+    # Generate and write template
+    template_content = _generate_template()
+    target_path.write_text(template_content, encoding="utf-8")
+
+    print_success(f"Config file created at: {target_path}")
+    console.print("\n[dim]Edit the file to customize your settings:[/dim]")
+    console.print("  [cyan]cf config edit[/cyan]")
+
+
+@config_app.command("edit")
+def config_edit(
+    path: _PathOption = None,
+) -> None:
+    """Open the config file in your default editor.
+
+    The editor is determined by: $EDITOR > $VISUAL > platform default.
+    """
+    config_file = _get_config_file(path)
+
+    if config_file is None:
+        _report_no_config_found()
+        raise typer.Exit(1)
+
+    if not config_file.exists():
+        _report_config_path_not_exists(config_file)
+        raise typer.Exit(1)
+
+    editor = _get_editor()
+    console.print(f"[dim]Opening {config_file} with {editor}...[/dim]")
+
+    try:
+        editor_cmd = shlex.split(editor, posix=os.name != "nt")
+    except ValueError as e:
+        print_error("Invalid editor command. Check [bold]$EDITOR[/]/[bold]$VISUAL[/]")
+        raise typer.Exit(1) from e
+
+    if not editor_cmd:
+        print_error("Editor command is empty")
+        raise typer.Exit(1)
+
+    try:
+        subprocess.run([*editor_cmd, str(config_file)], check=True)
+    except FileNotFoundError:
+        print_error(f"Editor [cyan]{editor_cmd[0]}[/] not found")
+        console.print("Set [bold]$EDITOR[/] environment variable to your preferred editor.")
+        raise typer.Exit(1) from None
+    except subprocess.CalledProcessError as e:
+        print_error(f"Editor exited with error code {e.returncode}")
+        raise typer.Exit(e.returncode) from None
+
+
+@config_app.command("show")
+def config_show(
+    path: _PathOption = None,
+    raw: _RawOption = False,
+) -> None:
+    """Display the config file location and contents."""
+    config_file = _get_config_file(path)
+
+    if config_file is None:
+        _report_no_config_found()
+        raise typer.Exit(0)
+
+    if not config_file.exists():
+        _report_config_path_not_exists(config_file)
+        raise typer.Exit(1)
+
+    content = config_file.read_text(encoding="utf-8")
+
+    if raw:
+        print(content, end="")
+        return
+
+    from rich.syntax import Syntax  # noqa: PLC0415
+
+    console.print(f"[bold green]Config file:[/bold green] [cyan]{config_file}[/cyan]")
+    console.print()
+    syntax = Syntax(content, "yaml", theme="monokai", line_numbers=True, word_wrap=True)
+    console.print(syntax)
+    console.print()
+    console.print("[dim]Tip: Use -r for copy-paste friendly output[/dim]")
+
+
+@config_app.command("path")
+def config_path(
+    path: _PathOption = None,
+) -> None:
+    """Print the config file path (useful for scripting)."""
+    config_file = _get_config_file(path)
+
+    if config_file is None:
+        _report_no_config_found()
+        raise typer.Exit(1)
+
+    # Just print the path for easy piping
+    print(config_file)
+
+
+@config_app.command("validate")
+def config_validate(
+    path: _PathOption = None,
+) -> None:
+    """Validate the config file syntax and schema."""
+    config_file = _get_config_file(path)
+
+    if config_file is None:
+        print_error(MSG_CONFIG_NOT_FOUND)
+        raise typer.Exit(1)
+
+    # Lazy import: pydantic adds ~50ms to startup, only load when actually needed
+    from compose_farm.config import load_config  # noqa: PLC0415
+
+    try:
+        cfg = load_config(config_file)
+    except FileNotFoundError as e:
+        print_error(str(e))
+        raise typer.Exit(1) from e
+    except Exception as e:
+        print_error(f"Invalid config: {e}")
+        raise typer.Exit(1) from e
+
+    print_success(f"Valid config: {config_file}")
+    console.print(f"  Hosts: {len(cfg.hosts)}")
+    console.print(f"  Services: {len(cfg.services)}")
+
+
+@config_app.command("symlink")
+def config_symlink(
+    target: Annotated[
+        Path | None,
+        typer.Argument(help="Config file to link to. Defaults to ./compose-farm.yaml"),
+    ] = None,
+    force: _ForceOption = False,
+) -> None:
+    """Create a symlink from the default config location to a config file.
+
+    This makes a local config file discoverable globally without copying.
+    Always uses absolute paths to avoid broken symlinks.
+
+    Examples:
+        cf config symlink                    # Link to ./compose-farm.yaml
+        cf config symlink /opt/compose/config.yaml  # Link to specific file
+
+    """
+    # Default to compose-farm.yaml in current directory
+    target_path = (target or Path("compose-farm.yaml")).expanduser().resolve()
+
+    if not target_path.exists():
+        print_error(f"Target config file not found: {target_path}")
+        raise typer.Exit(1)
+
+    if not target_path.is_file():
+        print_error(f"Target is not a file: {target_path}")
+        raise typer.Exit(1)
+
+    symlink_path = default_config_path()
+
+    # Check if symlink location already exists
+    if symlink_path.exists() or symlink_path.is_symlink():
+        if symlink_path.is_symlink():
+            current_target = symlink_path.resolve() if symlink_path.exists() else None
+            if current_target == target_path:
+                print_success(f"Symlink already points to: {target_path}")
+                return
+            # Update existing symlink
+            if not force:
+                existing = symlink_path.readlink()
+                console.print(f"[yellow]Symlink exists:[/] {symlink_path} -> {existing}")
+                if not typer.confirm(f"Update to point to {target_path}?"):
+                    console.print("[dim]Aborted.[/dim]")
+                    raise typer.Exit(0)
+            symlink_path.unlink()
+        else:
+            # Regular file exists
+            print_error(f"A regular file exists at: {symlink_path}")
+            console.print("    Back it up or remove it first, then retry.")
+            raise typer.Exit(1)
+
+    # Create parent directories
+    symlink_path.parent.mkdir(parents=True, exist_ok=True)
+
+    # Create symlink with absolute path
+    symlink_path.symlink_to(target_path)
+
+    print_success("Created symlink:")
+    console.print(f"    {symlink_path}")
+    console.print(f"    -> {target_path}")
+
+
+# Register config subcommand on the shared app
+app.add_typer(config_app, name="config", rich_help_panel="Configuration")
--- a/src/compose_farm/cli/lifecycle.py
+++ b/src/compose_farm/cli/lifecycle.py
@@ -0,0 +1,278 @@
+"""Lifecycle commands: up, down, pull, restart, update, apply."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Annotated
+
+import typer
+
+if TYPE_CHECKING:
+    from compose_farm.config import Config
+
+from compose_farm.cli.app import app
+from compose_farm.cli.common import (
+    AllOption,
+    ConfigOption,
+    HostOption,
+    ServicesArg,
+    format_host,
+    get_services,
+    load_config_or_exit,
+    maybe_regenerate_traefik,
+    report_results,
+    run_async,
+)
+from compose_farm.console import MSG_DRY_RUN, console, print_error, print_success
+from compose_farm.executor import run_on_services, run_sequential_on_services
+from compose_farm.operations import stop_orphaned_services, up_services
+from compose_farm.state import (
+    get_orphaned_services,
+    get_service_host,
+    get_services_needing_migration,
+    get_services_not_in_state,
+    remove_service,
+)
+
+
+@app.command(rich_help_panel="Lifecycle")
+def up(
+    services: ServicesArg = None,
+    all_services: AllOption = False,
+    host: HostOption = None,
+    config: ConfigOption = None,
+) -> None:
+    """Start services (docker compose up -d). Auto-migrates if host changed."""
+    svc_list, cfg = get_services(services or [], all_services, config, host=host)
+    results = run_async(up_services(cfg, svc_list, raw=True))
+    maybe_regenerate_traefik(cfg, results)
+    report_results(results)
+
+
+@app.command(rich_help_panel="Lifecycle")
+def down(
+    services: ServicesArg = None,
+    all_services: AllOption = False,
+    orphaned: Annotated[
+        bool,
+        typer.Option(
+            "--orphaned", help="Stop orphaned services (in state but removed from config)"
+        ),
+    ] = False,
+    host: HostOption = None,
+    config: ConfigOption = None,
+) -> None:
+    """Stop services (docker compose down)."""
+    # Handle --orphaned flag (mutually exclusive with other selection methods)
+    if orphaned:
+        if services or all_services or host:
+            print_error(
+                "Cannot combine [bold]--orphaned[/] with services, [bold]--all[/], or [bold]--host[/]"
+            )
+            raise typer.Exit(1)
+
+        cfg = load_config_or_exit(config)
+        orphaned_services = get_orphaned_services(cfg)
+
+        if not orphaned_services:
+            print_success("No orphaned services to stop")
+            return
+
+        console.print(
+            f"[yellow]Stopping {len(orphaned_services)} orphaned service(s):[/] "
+            f"{', '.join(orphaned_services.keys())}"
+        )
+        results = run_async(stop_orphaned_services(cfg))
+        report_results(results)
+        return
+
+    svc_list, cfg = get_services(services or [], all_services, config, host=host)
+    raw = len(svc_list) == 1
+    results = run_async(run_on_services(cfg, svc_list, "down", raw=raw))
+
+    # Remove from state on success
+    # For multi-host services, result.service is "svc@host", extract base name
+    removed_services: set[str] = set()
+    for result in results:
+        if result.success:
+            base_service = result.service.split("@")[0]
+            if base_service not in removed_services:
+                remove_service(cfg, base_service)
+                removed_services.add(base_service)
+
+    maybe_regenerate_traefik(cfg, results)
+    report_results(results)
+
+
+@app.command(rich_help_panel="Lifecycle")
+def pull(
+    services: ServicesArg = None,
+    all_services: AllOption = False,
+    config: ConfigOption = None,
+) -> None:
+    """Pull latest images (docker compose pull)."""
+    svc_list, cfg = get_services(services or [], all_services, config)
+    raw = len(svc_list) == 1
+    results = run_async(run_on_services(cfg, svc_list, "pull", raw=raw))
+    report_results(results)
+
+
+@app.command(rich_help_panel="Lifecycle")
+def restart(
+    services: ServicesArg = None,
+    all_services: AllOption = False,
+    config: ConfigOption = None,
+) -> None:
+    """Restart services (down + up)."""
+    svc_list, cfg = get_services(services or [], all_services, config)
+    raw = len(svc_list) == 1
+    results = run_async(run_sequential_on_services(cfg, svc_list, ["down", "up -d"], raw=raw))
+    maybe_regenerate_traefik(cfg, results)
+    report_results(results)
+
+
+@app.command(rich_help_panel="Lifecycle")
+def update(
+    services: ServicesArg = None,
+    all_services: AllOption = False,
+    config: ConfigOption = None,
+) -> None:
+    """Update services (pull + build + down + up)."""
+    svc_list, cfg = get_services(services or [], all_services, config)
+    raw = len(svc_list) == 1
+    results = run_async(
+        run_sequential_on_services(
+            cfg, svc_list, ["pull --ignore-buildable", "build", "down", "up -d"], raw=raw
+        )
+    )
+    maybe_regenerate_traefik(cfg, results)
+    report_results(results)
+
+
+def _report_pending_migrations(cfg: Config, migrations: list[str]) -> None:
+    """Report services that need migration."""
+    console.print(f"[cyan]Services to migrate ({len(migrations)}):[/]")
+    for svc in migrations:
+        current = get_service_host(cfg, svc)
+        target = cfg.get_hosts(svc)[0]
+        console.print(f"  [cyan]{svc}[/]: [magenta]{current}[/] → [magenta]{target}[/]")
+
+
+def _report_pending_orphans(orphaned: dict[str, str | list[str]]) -> None:
+    """Report orphaned services that will be stopped."""
+    console.print(f"[yellow]Orphaned services to stop ({len(orphaned)}):[/]")
+    for svc, hosts in orphaned.items():
+        console.print(f"  [cyan]{svc}[/] on [magenta]{format_host(hosts)}[/]")
+
+
+def _report_pending_starts(cfg: Config, missing: list[str]) -> None:
+    """Report services that will be started."""
+    console.print(f"[green]Services to start ({len(missing)}):[/]")
+    for svc in missing:
+        target = format_host(cfg.get_hosts(svc))
+        console.print(f"  [cyan]{svc}[/] on [magenta]{target}[/]")
+
+
+def _report_pending_refresh(cfg: Config, to_refresh: list[str]) -> None:
+    """Report services that will be refreshed."""
+    console.print(f"[blue]Services to refresh ({len(to_refresh)}):[/]")
+    for svc in to_refresh:
+        target = format_host(cfg.get_hosts(svc))
+        console.print(f"  [cyan]{svc}[/] on [magenta]{target}[/]")
+
+
+@app.command(rich_help_panel="Lifecycle")
+def apply(
+    dry_run: Annotated[
+        bool,
+        typer.Option("--dry-run", "-n", help="Show what would change without executing"),
+    ] = False,
+    no_orphans: Annotated[
+        bool,
+        typer.Option("--no-orphans", help="Only migrate, don't stop orphaned services"),
+    ] = False,
+    full: Annotated[
+        bool,
+        typer.Option("--full", "-f", help="Also run up on all services to apply config changes"),
+    ] = False,
+    config: ConfigOption = None,
+) -> None:
+    """Make reality match config (start, migrate, stop as needed).
+
+    This is the "reconcile" command that ensures running services match your
+    config file. It will:
+
+    1. Stop orphaned services (in state but removed from config)
+    2. Migrate services on wrong host (host in state ≠ host in config)
+    3. Start missing services (in config but not in state)
+
+    Use --dry-run to preview changes before applying.
+    Use --no-orphans to only migrate/start without stopping orphaned services.
+    Use --full to also run 'up' on all services (picks up compose/env changes).
+    """
+    cfg = load_config_or_exit(config)
+    orphaned = get_orphaned_services(cfg)
+    migrations = get_services_needing_migration(cfg)
+    missing = get_services_not_in_state(cfg)
+
+    # For --full: refresh all services not already being started/migrated
+    handled = set(migrations) | set(missing)
+    to_refresh = [svc for svc in cfg.services if svc not in handled] if full else []
+
+    has_orphans = bool(orphaned) and not no_orphans
+    has_migrations = bool(migrations)
+    has_missing = bool(missing)
+    has_refresh = bool(to_refresh)
+
+    if not has_orphans and not has_migrations and not has_missing and not has_refresh:
+        print_success("Nothing to apply - reality matches config")
+        return
+
+    # Report what will be done
+    if has_orphans:
+        _report_pending_orphans(orphaned)
+    if has_migrations:
+        _report_pending_migrations(cfg, migrations)
+    if has_missing:
+        _report_pending_starts(cfg, missing)
+    if has_refresh:
+        _report_pending_refresh(cfg, to_refresh)
+
+    if dry_run:
+        console.print(f"\n{MSG_DRY_RUN}")
+        return
+
+    # Execute changes
+    console.print()
+    all_results = []
+
+    # 1. Stop orphaned services first
+    if has_orphans:
+        console.print("[yellow]Stopping orphaned services...[/]")
+        all_results.extend(run_async(stop_orphaned_services(cfg)))
+
+    # 2. Migrate services on wrong host
+    if has_migrations:
+        console.print("[cyan]Migrating services...[/]")
+        migrate_results = run_async(up_services(cfg, migrations, raw=True))
+        all_results.extend(migrate_results)
+        maybe_regenerate_traefik(cfg, migrate_results)
+
+    # 3. Start missing services (reuse up_services which handles state updates)
+    if has_missing:
+        console.print("[green]Starting missing services...[/]")
+        start_results = run_async(up_services(cfg, missing, raw=True))
+        all_results.extend(start_results)
+        maybe_regenerate_traefik(cfg, start_results)
+
+    # 4. Refresh remaining services (--full: run up to apply config changes)
+    if has_refresh:
+        console.print("[blue]Refreshing services...[/]")
+        refresh_results = run_async(up_services(cfg, to_refresh, raw=True))
+        all_results.extend(refresh_results)
+        maybe_regenerate_traefik(cfg, refresh_results)
+
+    report_results(all_results)
+
+
+# Alias: cf a = cf apply
+app.command("a", hidden=True)(apply)
--- a/src/compose_farm/cli/management.py
+++ b/src/compose_farm/cli/management.py
@@ -0,0 +1,564 @@
+"""Management commands: sync, check, init-network, traefik-file."""
+
+from __future__ import annotations
+
+import asyncio
+from datetime import UTC, datetime
+from pathlib import Path  # noqa: TC003
+from typing import TYPE_CHECKING, Annotated
+
+import typer
+
+from compose_farm.cli.app import app
+from compose_farm.cli.common import (
+    _MISSING_PATH_PREVIEW_LIMIT,
+    AllOption,
+    ConfigOption,
+    LogPathOption,
+    ServicesArg,
+    format_host,
+    get_services,
+    load_config_or_exit,
+    run_async,
+    run_parallel_with_progress,
+    validate_hosts,
+    validate_services,
+)
+
+if TYPE_CHECKING:
+    from compose_farm.config import Config
+
+from compose_farm.console import (
+    MSG_DRY_RUN,
+    console,
+    print_error,
+    print_success,
+    print_warning,
+)
+from compose_farm.executor import (
+    CommandResult,
+    is_local,
+    run_command,
+)
+from compose_farm.logs import (
+    DEFAULT_LOG_PATH,
+    SnapshotEntry,
+    collect_service_entries,
+    isoformat,
+    load_existing_entries,
+    merge_entries,
+    write_toml,
+)
+from compose_farm.operations import (
+    check_host_compatibility,
+    check_service_requirements,
+    discover_service_host,
+)
+from compose_farm.state import get_orphaned_services, load_state, save_state
+from compose_farm.traefik import generate_traefik_config, render_traefik_config
+
+# --- Sync helpers ---
+
+
+def _discover_services(cfg: Config) -> dict[str, str | list[str]]:
+    """Discover running services with a progress bar."""
+    results = run_parallel_with_progress(
+        "Discovering",
+        list(cfg.services),
+        lambda s: discover_service_host(cfg, s),
+    )
+    return {svc: host for svc, host in results if host is not None}
+
+
+def _snapshot_services(
+    cfg: Config,
+    services: list[str],
+    log_path: Path | None,
+) -> Path:
+    """Capture image digests with a progress bar."""
+    effective_log_path = log_path or DEFAULT_LOG_PATH
+    now_dt = datetime.now(UTC)
+    now_iso = isoformat(now_dt)
+
+    async def collect_service(service: str) -> tuple[str, list[SnapshotEntry]]:
+        try:
+            return service, await collect_service_entries(cfg, service, now=now_dt)
+        except RuntimeError:
+            return service, []
+
+    results = run_parallel_with_progress(
+        "Capturing",
+        services,
+        collect_service,
+    )
+    snapshot_entries = [entry for _, entries in results for entry in entries]
+
+    if not snapshot_entries:
+        msg = "No image digests were captured"
+        raise RuntimeError(msg)
+
+    existing_entries = load_existing_entries(effective_log_path)
+    merged_entries = merge_entries(existing_entries, snapshot_entries, now_iso=now_iso)
+    meta = {"generated_at": now_iso, "compose_dir": str(cfg.compose_dir)}
+    write_toml(effective_log_path, meta=meta, entries=merged_entries)
+    return effective_log_path
+
+
+def _report_sync_changes(
+    added: list[str],
+    removed: list[str],
+    changed: list[tuple[str, str | list[str], str | list[str]]],
+    discovered: dict[str, str | list[str]],
+    current_state: dict[str, str | list[str]],
+) -> None:
+    """Report sync changes to the user."""
+    if added:
+        console.print(f"\nNew services found ({len(added)}):")
+        for service in sorted(added):
+            host_str = format_host(discovered[service])
+            console.print(f"  [green]+[/] [cyan]{service}[/] on [magenta]{host_str}[/]")
+
+    if changed:
+        console.print(f"\nServices on different hosts ({len(changed)}):")
+        for service, old_host, new_host in sorted(changed):
+            old_str = format_host(old_host)
+            new_str = format_host(new_host)
+            console.print(
+                f"  [yellow]~[/] [cyan]{service}[/]: [magenta]{old_str}[/] → [magenta]{new_str}[/]"
+            )
+
+    if removed:
+        console.print(f"\nServices no longer running ({len(removed)}):")
+        for service in sorted(removed):
+            host_str = format_host(current_state[service])
+            console.print(f"  [red]-[/] [cyan]{service}[/] (was on [magenta]{host_str}[/])")
+
+
+# --- Check helpers ---
+
+
+def _check_ssh_connectivity(cfg: Config) -> list[str]:
+    """Check SSH connectivity to all hosts. Returns list of unreachable hosts."""
+    # Filter out local hosts - no SSH needed
+    remote_hosts = [h for h in cfg.hosts if not is_local(cfg.hosts[h])]
+
+    if not remote_hosts:
+        return []
+
+    console.print()  # Spacing before progress bar
+
+    async def check_host(host_name: str) -> tuple[str, bool]:
+        host = cfg.hosts[host_name]
+        try:
+            result = await asyncio.wait_for(
+                run_command(host, "echo ok", host_name, stream=False),
+                timeout=5.0,
+            )
+            return host_name, result.success
+        except TimeoutError:
+            return host_name, False
+
+    results = run_parallel_with_progress(
+        "Checking SSH connectivity",
+        remote_hosts,
+        check_host,
+    )
+    return [host for host, success in results if not success]
+
+
+def _check_service_requirements(
+    cfg: Config,
+    services: list[str],
+) -> tuple[list[tuple[str, str, str]], list[tuple[str, str, str]], list[tuple[str, str, str]]]:
+    """Check mounts, networks, and devices for all services with a progress bar.
+
+    Returns (mount_errors, network_errors, device_errors) where each is a list of
+    (service, host, missing_item) tuples.
+    """
+
+    async def check_service(
+        service: str,
+    ) -> tuple[
+        str,
+        list[tuple[str, str, str]],
+        list[tuple[str, str, str]],
+        list[tuple[str, str, str]],
+    ]:
+        """Check requirements for a single service on all its hosts."""
+        host_names = cfg.get_hosts(service)
+        mount_errors: list[tuple[str, str, str]] = []
+        network_errors: list[tuple[str, str, str]] = []
+        device_errors: list[tuple[str, str, str]] = []
+
+        for host_name in host_names:
+            missing_paths, missing_nets, missing_devs = await check_service_requirements(
+                cfg, service, host_name
+            )
+            mount_errors.extend((service, host_name, p) for p in missing_paths)
+            network_errors.extend((service, host_name, n) for n in missing_nets)
+            device_errors.extend((service, host_name, d) for d in missing_devs)
+
+        return service, mount_errors, network_errors, device_errors
+
+    results = run_parallel_with_progress(
+        "Checking requirements",
+        services,
+        check_service,
+    )
+
+    all_mount_errors: list[tuple[str, str, str]] = []
+    all_network_errors: list[tuple[str, str, str]] = []
+    all_device_errors: list[tuple[str, str, str]] = []
+    for _, mount_errs, net_errs, dev_errs in results:
+        all_mount_errors.extend(mount_errs)
+        all_network_errors.extend(net_errs)
+        all_device_errors.extend(dev_errs)
+
+    return all_mount_errors, all_network_errors, all_device_errors
+
+
+def _report_config_status(cfg: Config) -> bool:
+    """Check and report config vs disk status. Returns True if errors found."""
+    configured = set(cfg.services.keys())
+    on_disk = cfg.discover_compose_dirs()
+    unmanaged = sorted(on_disk - configured)
+    missing_from_disk = sorted(configured - on_disk)
+
+    if unmanaged:
+        console.print(f"\n[yellow]Unmanaged[/] (on disk but not in config, {len(unmanaged)}):")
+        for name in unmanaged:
+            console.print(f"  [yellow]+[/] [cyan]{name}[/]")
+
+    if missing_from_disk:
+        console.print(f"\n[red]In config but no compose file[/] ({len(missing_from_disk)}):")
+        for name in missing_from_disk:
+            console.print(f"  [red]-[/] [cyan]{name}[/]")
+
+    if not unmanaged and not missing_from_disk:
+        print_success("Config matches disk")
+
+    return bool(missing_from_disk)
+
+
+def _report_orphaned_services(cfg: Config) -> bool:
+    """Check for services in state but not in config. Returns True if orphans found."""
+    orphaned = get_orphaned_services(cfg)
+
+    if orphaned:
+        console.print("\n[yellow]Orphaned services[/] (in state but not in config):")
+        console.print(
+            "[dim]Run [bold]cf apply[/bold] to stop them, or [bold]cf down --orphaned[/bold] for just orphans.[/]"
+        )
+        for name, hosts in sorted(orphaned.items()):
+            console.print(f"  [yellow]![/] [cyan]{name}[/] on [magenta]{format_host(hosts)}[/]")
+        return True
+
+    return False
+
+
+def _report_traefik_status(cfg: Config, services: list[str]) -> None:
+    """Check and report traefik label status."""
+    try:
+        _, warnings = generate_traefik_config(cfg, services, check_all=True)
+    except (FileNotFoundError, ValueError):
+        return
+
+    if warnings:
+        console.print(f"\n[yellow]Traefik issues[/] ({len(warnings)}):")
+        for warning in warnings:
+            print_warning(warning)
+    else:
+        print_success("Traefik labels valid")
+
+
+def _report_requirement_errors(errors: list[tuple[str, str, str]], category: str) -> None:
+    """Report requirement errors (mounts, networks, devices) grouped by service."""
+    by_service: dict[str, list[tuple[str, str]]] = {}
+    for svc, host, item in errors:
+        by_service.setdefault(svc, []).append((host, item))
+
+    console.print(f"[red]Missing {category}[/] ({len(errors)}):")
+    for svc, items in sorted(by_service.items()):
+        host = items[0][0]
+        missing = [i for _, i in items]
+        console.print(f"  [cyan]{svc}[/] on [magenta]{host}[/]:")
+        for item in missing:
+            console.print(f"    [red]✗[/] {item}")
+
+
+def _report_ssh_status(unreachable_hosts: list[str]) -> bool:
+    """Report SSH connectivity status. Returns True if there are errors."""
+    if unreachable_hosts:
+        console.print(f"[red]Unreachable hosts[/] ({len(unreachable_hosts)}):")
+        for host in sorted(unreachable_hosts):
+            print_error(f"[magenta]{host}[/]")
+        return True
+    print_success("All hosts reachable")
+    return False
+
+
+def _report_host_compatibility(
+    compat: dict[str, tuple[int, int, list[str]]],
+    assigned_hosts: list[str],
+) -> None:
+    """Report host compatibility for a service."""
+    for host_name, (found, total, missing) in sorted(compat.items()):
+        is_assigned = host_name in assigned_hosts
+        marker = " [dim](assigned)[/]" if is_assigned else ""
+
+        if found == total:
+            console.print(f"  [green]✓[/] [magenta]{host_name}[/] {found}/{total}{marker}")
+        else:
+            preview = ", ".join(missing[:_MISSING_PATH_PREVIEW_LIMIT])
+            if len(missing) > _MISSING_PATH_PREVIEW_LIMIT:
+                preview += f", +{len(missing) - _MISSING_PATH_PREVIEW_LIMIT} more"
+            console.print(
+                f"  [red]✗[/] [magenta]{host_name}[/] {found}/{total} "
+                f"[dim](missing: {preview})[/]{marker}"
+            )
+
+
+def _run_remote_checks(cfg: Config, svc_list: list[str], *, show_host_compat: bool) -> bool:
+    """Run SSH-based checks for mounts, networks, and host compatibility.
+
+    Returns True if any errors were found.
+    """
+    has_errors = False
+
+    # Check SSH connectivity first
+    if _report_ssh_status(_check_ssh_connectivity(cfg)):
+        has_errors = True
+
+    console.print()  # Spacing before mounts/networks check
+
+    # Check mounts, networks, and devices
+    mount_errors, network_errors, device_errors = _check_service_requirements(cfg, svc_list)
+
+    if mount_errors:
+        _report_requirement_errors(mount_errors, "mounts")
+        has_errors = True
+    if network_errors:
+        _report_requirement_errors(network_errors, "networks")
+        has_errors = True
+    if device_errors:
+        _report_requirement_errors(device_errors, "devices")
+        has_errors = True
+    if not mount_errors and not network_errors and not device_errors:
+        print_success("All mounts, networks, and devices exist")
+
+    if show_host_compat:
+        for service in svc_list:
+            console.print(f"\n[bold]Host compatibility for[/] [cyan]{service}[/]:")
+            compat = run_async(check_host_compatibility(cfg, service))
+            assigned_hosts = cfg.get_hosts(service)
+            _report_host_compatibility(compat, assigned_hosts)
+
+    return has_errors
+
+
+# Default network settings for cross-host Docker networking
+_DEFAULT_NETWORK_NAME = "mynetwork"
+_DEFAULT_NETWORK_SUBNET = "172.20.0.0/16"
+_DEFAULT_NETWORK_GATEWAY = "172.20.0.1"
+
+
+@app.command("traefik-file", rich_help_panel="Configuration")
+def traefik_file(
+    services: ServicesArg = None,
+    all_services: AllOption = False,
+    output: Annotated[
+        Path | None,
+        typer.Option(
+            "--output",
+            "-o",
+            help="Write Traefik file-provider YAML to this path (stdout if omitted)",
+        ),
+    ] = None,
+    config: ConfigOption = None,
+) -> None:
+    """Generate a Traefik file-provider fragment from compose Traefik labels."""
+    svc_list, cfg = get_services(services or [], all_services, config)
+    try:
+        dynamic, warnings = generate_traefik_config(cfg, svc_list)
+    except (FileNotFoundError, ValueError) as exc:
+        print_error(str(exc))
+        raise typer.Exit(1) from exc
+
+    rendered = render_traefik_config(dynamic)
+
+    if output:
+        output.parent.mkdir(parents=True, exist_ok=True)
+        output.write_text(rendered)
+        print_success(f"Traefik config written to {output}")
+    else:
+        console.print(rendered)
+
+    for warning in warnings:
+        print_warning(warning)
+
+
+@app.command(rich_help_panel="Configuration")
+def refresh(
+    config: ConfigOption = None,
+    log_path: LogPathOption = None,
+    dry_run: Annotated[
+        bool,
+        typer.Option("--dry-run", "-n", help="Show what would change without writing"),
+    ] = False,
+) -> None:
+    """Update local state from running services.
+
+    Discovers which services are running on which hosts, updates the state
+    file, and captures image digests. This is a read operation - it updates
+    your local state to match reality, not the other way around.
+
+    Use 'cf apply' to make reality match your config (stop orphans, migrate).
+    """
+    cfg = load_config_or_exit(config)
+    current_state = load_state(cfg)
+
+    discovered = _discover_services(cfg)
+
+    # Calculate changes
+    added = [s for s in discovered if s not in current_state]
+    removed = [s for s in current_state if s not in discovered]
+    changed = [
+        (s, current_state[s], discovered[s])
+        for s in discovered
+        if s in current_state and current_state[s] != discovered[s]
+    ]
+
+    # Report state changes
+    state_changed = bool(added or removed or changed)
+    if state_changed:
+        _report_sync_changes(added, removed, changed, discovered, current_state)
+    else:
+        print_success("State is already in sync.")
+
+    if dry_run:
+        console.print(f"\n{MSG_DRY_RUN}")
+        return
+
+    # Update state file
+    if state_changed:
+        save_state(cfg, discovered)
+        print_success(f"State updated: {len(discovered)} services tracked.")
+
+    # Capture image digests for running services
+    if discovered:
+        try:
+            path = _snapshot_services(cfg, list(discovered.keys()), log_path)
+            print_success(f"Digests written to {path}")
+        except RuntimeError as exc:
+            print_warning(str(exc))
+
+
+@app.command(rich_help_panel="Configuration")
+def check(
+    services: ServicesArg = None,
+    local: Annotated[
+        bool,
+        typer.Option("--local", help="Skip SSH-based checks (faster)"),
+    ] = False,
+    config: ConfigOption = None,
+) -> None:
+    """Validate configuration, traefik labels, mounts, and networks.
+
+    Without arguments: validates all services against configured hosts.
+    With service arguments: validates specific services and shows host compatibility.
+
+    Use --local to skip SSH-based checks for faster validation.
+    """
+    cfg = load_config_or_exit(config)
+
+    # Determine which services to check and whether to show host compatibility
+    if services:
+        svc_list = list(services)
+        validate_services(cfg, svc_list)
+        show_host_compat = True
+    else:
+        svc_list = list(cfg.services.keys())
+        show_host_compat = False
+
+    # Run checks
+    has_errors = _report_config_status(cfg)
+    _report_traefik_status(cfg, svc_list)
+
+    if not local and _run_remote_checks(cfg, svc_list, show_host_compat=show_host_compat):
+        has_errors = True
+
+    # Check for orphaned services (in state but removed from config)
+    if _report_orphaned_services(cfg):
+        has_errors = True
+
+    if has_errors:
+        raise typer.Exit(1)
+
+
+@app.command("init-network", rich_help_panel="Configuration")
+def init_network(
+    hosts: Annotated[
+        list[str] | None,
+        typer.Argument(help="Hosts to create network on (default: all)"),
+    ] = None,
+    network: Annotated[
+        str,
+        typer.Option("--network", "-n", help="Network name"),
+    ] = _DEFAULT_NETWORK_NAME,
+    subnet: Annotated[
+        str,
+        typer.Option("--subnet", "-s", help="Network subnet"),
+    ] = _DEFAULT_NETWORK_SUBNET,
+    gateway: Annotated[
+        str,
+        typer.Option("--gateway", "-g", help="Network gateway"),
+    ] = _DEFAULT_NETWORK_GATEWAY,
+    config: ConfigOption = None,
+) -> None:
+    """Create Docker network on hosts with consistent settings.
+
+    Creates an external Docker network that services can use for cross-host
+    communication. Uses the same subnet/gateway on all hosts to ensure
+    consistent networking.
+    """
+    cfg = load_config_or_exit(config)
+
+    target_hosts = list(hosts) if hosts else list(cfg.hosts.keys())
+    validate_hosts(cfg, target_hosts)
+
+    async def create_network_on_host(host_name: str) -> CommandResult:
+        host = cfg.hosts[host_name]
+        # Check if network already exists
+        check_cmd = f"docker network inspect '{network}' >/dev/null 2>&1"
+        check_result = await run_command(host, check_cmd, host_name, stream=False)
+
+        if check_result.success:
+            console.print(f"[cyan]\\[{host_name}][/] Network '{network}' already exists")
+            return CommandResult(service=host_name, exit_code=0, success=True)
+
+        # Create the network
+        create_cmd = (
+            f"docker network create "
+            f"--driver bridge "
+            f"--subnet '{subnet}' "
+            f"--gateway '{gateway}' "
+            f"'{network}'"
+        )
+        result = await run_command(host, create_cmd, host_name, stream=False)
+
+        if result.success:
+            console.print(f"[cyan]\\[{host_name}][/] [green]✓[/] Created network '{network}'")
+        else:
+            print_error(
+                f"[cyan]\\[{host_name}][/] Failed to create network: {result.stderr.strip()}"
+            )
+
+        return result
+
+    async def run_all() -> list[CommandResult]:
+        return await asyncio.gather(*[create_network_on_host(h) for h in target_hosts])
+
+    results = run_async(run_all())
+    failed = [r for r in results if not r.success]
+    if failed:
+        raise typer.Exit(1)
--- a/src/compose_farm/cli/monitoring.py
+++ b/src/compose_farm/cli/monitoring.py
@@ -0,0 +1,190 @@
+"""Monitoring commands: logs, ps, stats."""
+
+from __future__ import annotations
+
+import contextlib
+from typing import TYPE_CHECKING, Annotated
+
+import typer
+from rich.table import Table
+
+from compose_farm.cli.app import app
+from compose_farm.cli.common import (
+    _STATS_PREVIEW_LIMIT,
+    AllOption,
+    ConfigOption,
+    HostOption,
+    ServicesArg,
+    get_services,
+    load_config_or_exit,
+    report_results,
+    run_async,
+    run_parallel_with_progress,
+)
+from compose_farm.console import console
+from compose_farm.executor import run_command, run_on_services
+from compose_farm.state import get_services_needing_migration, group_services_by_host, load_state
+
+if TYPE_CHECKING:
+    from compose_farm.config import Config
+
+
+def _get_container_counts(cfg: Config) -> dict[str, int]:
+    """Get container counts from all hosts with a progress bar."""
+
+    async def get_count(host_name: str) -> tuple[str, int]:
+        host = cfg.hosts[host_name]
+        result = await run_command(host, "docker ps -q | wc -l", host_name, stream=False)
+        count = 0
+        if result.success:
+            with contextlib.suppress(ValueError):
+                count = int(result.stdout.strip())
+        return host_name, count
+
+    results = run_parallel_with_progress(
+        "Querying hosts",
+        list(cfg.hosts.keys()),
+        get_count,
+    )
+    return dict(results)
+
+
+def _build_host_table(
+    cfg: Config,
+    services_by_host: dict[str, list[str]],
+    running_by_host: dict[str, list[str]],
+    container_counts: dict[str, int],
+    *,
+    show_containers: bool,
+) -> Table:
+    """Build the hosts table."""
+    table = Table(title="Hosts", show_header=True, header_style="bold cyan")
+    table.add_column("Host", style="magenta")
+    table.add_column("Address")
+    table.add_column("Configured", justify="right")
+    table.add_column("Running", justify="right")
+    if show_containers:
+        table.add_column("Containers", justify="right")
+
+    for host_name in sorted(cfg.hosts.keys()):
+        host = cfg.hosts[host_name]
+        configured = len(services_by_host[host_name])
+        running = len(running_by_host[host_name])
+
+        row = [
+            host_name,
+            host.address,
+            str(configured),
+            str(running) if running > 0 else "[dim]0[/]",
+        ]
+        if show_containers:
+            count = container_counts.get(host_name, 0)
+            row.append(str(count) if count > 0 else "[dim]0[/]")
+
+        table.add_row(*row)
+    return table
+
+
+def _build_summary_table(
+    cfg: Config, state: dict[str, str | list[str]], pending: list[str]
+) -> Table:
+    """Build the summary table."""
+    on_disk = cfg.discover_compose_dirs()
+
+    table = Table(title="Summary", show_header=False)
+    table.add_column("Label", style="dim")
+    table.add_column("Value", style="bold")
+
+    table.add_row("Total hosts", str(len(cfg.hosts)))
+    table.add_row("Services (configured)", str(len(cfg.services)))
+    table.add_row("Services (tracked)", str(len(state)))
+    table.add_row("Compose files on disk", str(len(on_disk)))
+
+    if pending:
+        preview = ", ".join(pending[:_STATS_PREVIEW_LIMIT])
+        suffix = "..." if len(pending) > _STATS_PREVIEW_LIMIT else ""
+        table.add_row("Pending migrations", f"[yellow]{len(pending)}[/] ({preview}{suffix})")
+    else:
+        table.add_row("Pending migrations", "[green]0[/]")
+
+    return table
+
+
+# --- Command functions ---
+
+
+@app.command(rich_help_panel="Monitoring")
+def logs(
+    services: ServicesArg = None,
+    all_services: AllOption = False,
+    host: HostOption = None,
+    follow: Annotated[bool, typer.Option("--follow", "-f", help="Follow logs")] = False,
+    tail: Annotated[
+        int | None,
+        typer.Option("--tail", "-n", help="Number of lines (default: 20 for --all, 100 otherwise)"),
+    ] = None,
+    config: ConfigOption = None,
+) -> None:
+    """Show service logs."""
+    svc_list, cfg = get_services(services or [], all_services, config, host=host)
+
+    # Default to fewer lines when showing multiple services
+    many_services = all_services or host is not None or len(svc_list) > 1
+    effective_tail = tail if tail is not None else (20 if many_services else 100)
+    cmd = f"logs --tail {effective_tail}"
+    if follow:
+        cmd += " -f"
+    results = run_async(run_on_services(cfg, svc_list, cmd))
+    report_results(results)
+
+
+@app.command(rich_help_panel="Monitoring")
+def ps(
+    services: ServicesArg = None,
+    all_services: AllOption = False,
+    host: HostOption = None,
+    config: ConfigOption = None,
+) -> None:
+    """Show status of services.
+
+    Without arguments: shows all services (same as --all).
+    With service names: shows only those services.
+    With --host: shows services on that host.
+    """
+    svc_list, cfg = get_services(services or [], all_services, config, host=host, default_all=True)
+    results = run_async(run_on_services(cfg, svc_list, "ps"))
+    report_results(results)
+
+
+@app.command(rich_help_panel="Monitoring")
+def stats(
+    live: Annotated[
+        bool,
+        typer.Option("--live", "-l", help="Query Docker for live container stats"),
+    ] = False,
+    config: ConfigOption = None,
+) -> None:
+    """Show overview statistics for hosts and services.
+
+    Without --live: Shows config/state info (hosts, services, pending migrations).
+    With --live: Also queries Docker on each host for container counts.
+    """
+    cfg = load_config_or_exit(config)
+    state = load_state(cfg)
+    pending = get_services_needing_migration(cfg)
+
+    all_hosts = list(cfg.hosts.keys())
+    services_by_host = group_services_by_host(cfg.services, cfg.hosts, all_hosts)
+    running_by_host = group_services_by_host(state, cfg.hosts, all_hosts)
+
+    container_counts: dict[str, int] = {}
+    if live:
+        container_counts = _get_container_counts(cfg)
+
+    host_table = _build_host_table(
+        cfg, services_by_host, running_by_host, container_counts, show_containers=live
+    )
+    console.print(host_table)
+
+    console.print()
+    console.print(_build_summary_table(cfg, state, pending))
--- a/src/compose_farm/cli/ssh.py
+++ b/src/compose_farm/cli/ssh.py
@@ -0,0 +1,282 @@
+"""SSH key management commands for compose-farm."""
+
+from __future__ import annotations
+
+import asyncio
+import subprocess
+from typing import TYPE_CHECKING, Annotated
+
+import typer
+
+from compose_farm.cli.app import app
+from compose_farm.cli.common import ConfigOption, load_config_or_exit, run_parallel_with_progress
+from compose_farm.console import console, err_console
+from compose_farm.executor import run_command
+
+if TYPE_CHECKING:
+    from compose_farm.config import Host
+
+from compose_farm.ssh_keys import (
+    SSH_KEY_PATH,
+    SSH_PUBKEY_PATH,
+    get_pubkey_content,
+    get_ssh_env,
+    key_exists,
+)
+
+_DEFAULT_SSH_PORT = 22
+_PUBKEY_DISPLAY_THRESHOLD = 60
+
+ssh_app = typer.Typer(
+    name="ssh",
+    help="Manage SSH keys for passwordless authentication.",
+    no_args_is_help=True,
+)
+
+_ForceOption = Annotated[
+    bool,
+    typer.Option("--force", "-f", help="Regenerate key even if it exists."),
+]
+
+
+def _generate_key(*, force: bool = False) -> bool:
+    """Generate an ED25519 SSH key with no passphrase.
+
+    Returns True if key was generated, False if skipped.
+    """
+    if key_exists() and not force:
+        console.print(f"[yellow]![/] SSH key already exists: {SSH_KEY_PATH}")
+        console.print("[dim]Use --force to regenerate[/]")
+        return False
+
+    # Create .ssh directory if it doesn't exist
+    SSH_KEY_PATH.parent.mkdir(parents=True, exist_ok=True, mode=0o700)
+
+    # Remove existing key if forcing regeneration
+    if force:
+        SSH_KEY_PATH.unlink(missing_ok=True)
+        SSH_PUBKEY_PATH.unlink(missing_ok=True)
+
+    console.print(f"[dim]Generating SSH key at {SSH_KEY_PATH}...[/]")
+
+    try:
+        subprocess.run(
+            [  # noqa: S607
+                "ssh-keygen",
+                "-t",
+                "ed25519",
+                "-N",
+                "",  # No passphrase
+                "-f",
+                str(SSH_KEY_PATH),
+                "-C",
+                "compose-farm",
+            ],
+            check=True,
+            capture_output=True,
+        )
+    except subprocess.CalledProcessError as e:
+        err_console.print(f"[red]Failed to generate SSH key:[/] {e.stderr.decode()}")
+        return False
+    except FileNotFoundError:
+        err_console.print("[red]ssh-keygen not found. Is OpenSSH installed?[/]")
+        return False
+
+    # Set correct permissions
+    SSH_KEY_PATH.chmod(0o600)
+    SSH_PUBKEY_PATH.chmod(0o644)
+
+    console.print(f"[green]Generated SSH key:[/] {SSH_KEY_PATH}")
+    return True
+
+
+def _copy_key_to_host(host_name: str, address: str, user: str, port: int) -> bool:
+    """Copy public key to a host's authorized_keys.
+
+    Uses ssh-copy-id which handles agent vs password fallback automatically.
+    Returns True on success, False on failure.
+    """
+    target = f"{user}@{address}"
+    console.print(f"[dim]Copying key to {host_name} ({target})...[/]")
+
+    cmd = ["ssh-copy-id"]
+
+    # Disable strict host key checking (consistent with executor.py)
+    cmd.extend(["-o", "StrictHostKeyChecking=no"])
+    cmd.extend(["-o", "UserKnownHostsFile=/dev/null"])
+
+    if port != _DEFAULT_SSH_PORT:
+        cmd.extend(["-p", str(port)])
+
+    cmd.extend(["-i", str(SSH_PUBKEY_PATH), target])
+
+    try:
+        # Don't capture output so user can see password prompt
+        result = subprocess.run(cmd, check=False, env=get_ssh_env())
+        if result.returncode == 0:
+            console.print(f"[green]Key copied to {host_name}[/]")
+            return True
+        err_console.print(f"[red]Failed to copy key to {host_name}[/]")
+        return False
+    except FileNotFoundError:
+        err_console.print("[red]ssh-copy-id not found. Is OpenSSH installed?[/]")
+        return False
+
+
+@ssh_app.command("keygen")
+def ssh_keygen(
+    force: _ForceOption = False,
+) -> None:
+    """Generate SSH key (does not distribute to hosts).
+
+    Creates an ED25519 key at ~/.ssh/compose-farm/id_ed25519 with no passphrase.
+    Use 'cf ssh setup' to also distribute the key to all configured hosts.
+    """
+    success = _generate_key(force=force)
+    if not success and not key_exists():
+        raise typer.Exit(1)
+
+
+@ssh_app.command("setup")
+def ssh_setup(
+    config: ConfigOption = None,
+    force: _ForceOption = False,
+) -> None:
+    """Generate SSH key and distribute to all configured hosts.
+
+    Creates an ED25519 key at ~/.ssh/compose-farm/id_ed25519 (no passphrase)
+    and copies the public key to authorized_keys on each host.
+
+    For each host, tries SSH agent first. If agent is unavailable,
+    prompts for password.
+    """
+    cfg = load_config_or_exit(config)
+
+    # Skip localhost hosts
+    remote_hosts = {
+        name: host
+        for name, host in cfg.hosts.items()
+        if host.address.lower() not in ("localhost", "127.0.0.1")
+    }
+
+    if not remote_hosts:
+        console.print("[yellow]No remote hosts configured.[/]")
+        raise typer.Exit(0)
+
+    # Generate key if needed
+    if not key_exists() or force:
+        if not _generate_key(force=force):
+            raise typer.Exit(1)
+    else:
+        console.print(f"[dim]Using existing key: {SSH_KEY_PATH}[/]")
+
+    console.print()
+    console.print(f"[bold]Distributing key to {len(remote_hosts)} host(s)...[/]")
+    console.print()
+
+    # Copy key to each host
+    succeeded = 0
+    failed = 0
+
+    for host_name, host in remote_hosts.items():
+        if _copy_key_to_host(host_name, host.address, host.user, host.port):
+            succeeded += 1
+        else:
+            failed += 1
+
+    console.print()
+    if failed == 0:
+        console.print(
+            f"[green]Setup complete.[/] {succeeded}/{len(remote_hosts)} hosts configured."
+        )
+    else:
+        console.print(
+            f"[yellow]Setup partially complete.[/] {succeeded}/{len(remote_hosts)} hosts configured, "
+            f"[red]{failed} failed[/]."
+        )
+        raise typer.Exit(1)
+
+
+@ssh_app.command("status")
+def ssh_status(
+    config: ConfigOption = None,
+) -> None:
+    """Show SSH key status and host connectivity."""
+    from rich.table import Table  # noqa: PLC0415
+
+    cfg = load_config_or_exit(config)
+
+    # Key status
+    console.print("[bold]SSH Key Status[/]")
+    console.print()
+
+    if key_exists():
+        console.print(f"  [green]Key exists:[/] {SSH_KEY_PATH}")
+        pubkey = get_pubkey_content()
+        if pubkey:
+            # Show truncated public key
+            if len(pubkey) > _PUBKEY_DISPLAY_THRESHOLD:
+                console.print(f"  [dim]Public key:[/] {pubkey[:30]}...{pubkey[-20:]}")
+            else:
+                console.print(f"  [dim]Public key:[/] {pubkey}")
+    else:
+        console.print(f"  [yellow]No key found:[/] {SSH_KEY_PATH}")
+        console.print("  [dim]Run 'cf ssh setup' to generate and distribute a key[/]")
+
+    console.print()
+    console.print("[bold]Host Connectivity[/]")
+    console.print()
+
+    # Skip localhost hosts
+    remote_hosts = {
+        name: host
+        for name, host in cfg.hosts.items()
+        if host.address.lower() not in ("localhost", "127.0.0.1")
+    }
+
+    if not remote_hosts:
+        console.print("  [dim]No remote hosts configured[/]")
+        return
+
+    async def check_host(item: tuple[str, Host]) -> tuple[str, str, str]:
+        """Check connectivity to a single host."""
+        host_name, host = item
+        target = f"{host.user}@{host.address}"
+        if host.port != _DEFAULT_SSH_PORT:
+            target += f":{host.port}"
+
+        try:
+            result = await asyncio.wait_for(
+                run_command(host, "echo ok", host_name, stream=False),
+                timeout=5.0,
+            )
+            status = "[green]OK[/]" if result.success else "[red]Auth failed[/]"
+        except TimeoutError:
+            status = "[red]Timeout (5s)[/]"
+        except Exception as e:
+            status = f"[red]Error: {e}[/]"
+
+        return host_name, target, status
+
+    # Check connectivity in parallel with progress bar
+    results = run_parallel_with_progress(
+        "Checking hosts",
+        list(remote_hosts.items()),
+        check_host,
+    )
+
+    # Build table from results
+    table = Table(show_header=True, header_style="bold")
+    table.add_column("Host")
+    table.add_column("Address")
+    table.add_column("Status")
+
+    # Sort by host name for consistent order
+    for host_name, target, status in sorted(results, key=lambda r: r[0]):
+        table.add_row(host_name, target, status)
+
+    console.print(table)
+
+
+# Register ssh subcommand on the shared app
+app.add_typer(ssh_app, name="ssh", rich_help_panel="Configuration")
--- a/src/compose_farm/cli/web.py
+++ b/src/compose_farm/cli/web.py
@@ -0,0 +1,48 @@
+"""Web server command."""
+
+from __future__ import annotations
+
+from typing import Annotated
+
+import typer
+
+from compose_farm.cli.app import app
+from compose_farm.console import console
+
+
+@app.command(rich_help_panel="Server")
+def web(
+    host: Annotated[
+        str,
+        typer.Option("--host", "-H", help="Host to bind to"),
+    ] = "0.0.0.0",  # noqa: S104
+    port: Annotated[
+        int,
+        typer.Option("--port", "-p", help="Port to listen on"),
+    ] = 8000,
+    reload: Annotated[
+        bool,
+        typer.Option("--reload", "-r", help="Enable auto-reload for development"),
+    ] = False,
+) -> None:
+    """Start the web UI server."""
+    try:
+        import uvicorn  # noqa: PLC0415
+    except ImportError:
+        console.print(
+            "[red]Error:[/] Web dependencies not installed. "
+            "Install with: [cyan]pip install compose-farm[web][/]"
+        )
+        raise typer.Exit(1) from None
+
+    console.print(f"[green]Starting Compose Farm Web UI[/] at http://{host}:{port}")
+    console.print("[dim]Press Ctrl+C to stop[/]")
+
+    uvicorn.run(
+        "compose_farm.web:create_app",
+        factory=True,
+        host=host,
+        port=port,
+        reload=reload,
+        log_level="info",
+    )
--- a/src/compose_farm/compose.py
+++ b/src/compose_farm/compose.py
@@ -0,0 +1,350 @@
+"""Compose file parsing utilities.
+
+Handles .env loading, variable interpolation, port/volume/network extraction.
+"""
+
+from __future__ import annotations
+
+import os
+import re
+import stat
+from dataclasses import dataclass
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
+
+import yaml
+
+if TYPE_CHECKING:
+    from .config import Config
+
+# Port parsing constants
+_SINGLE_PART = 1
+_PUBLISHED_TARGET_PARTS = 2
+_HOST_PUBLISHED_PARTS = 3
+_MIN_VOLUME_PARTS = 2
+
+_VAR_PATTERN = re.compile(r"\$\{([A-Za-z_][A-Za-z0-9_]*)(?::-(.*?))?\}")
+
+
+@dataclass(frozen=True)
+class PortMapping:
+    """Port mapping for a compose service."""
+
+    target: int
+    published: int | None
+
+
+def _load_env(compose_path: Path) -> dict[str, str]:
+    """Load environment variables for compose interpolation.
+
+    Reads from .env file in the same directory as compose file,
+    then overlays current environment variables.
+    """
+    env: dict[str, str] = {}
+    env_path = compose_path.parent / ".env"
+    if env_path.exists():
+        for line in env_path.read_text().splitlines():
+            stripped = line.strip()
+            if not stripped or stripped.startswith("#") or "=" not in stripped:
+                continue
+            key, value = stripped.split("=", 1)
+            key = key.strip()
+            value = value.strip()
+            if (value.startswith('"') and value.endswith('"')) or (
+                value.startswith("'") and value.endswith("'")
+            ):
+                value = value[1:-1]
+            env[key] = value
+    env.update({k: v for k, v in os.environ.items() if isinstance(v, str)})
+    return env
+
+
+def _interpolate(value: str, env: dict[str, str]) -> str:
+    """Perform ${VAR} and ${VAR:-default} interpolation."""
+
+    def replace(match: re.Match[str]) -> str:
+        var = match.group(1)
+        default = match.group(2)
+        resolved = env.get(var)
+        if resolved:
+            return resolved
+        return default or ""
+
+    return _VAR_PATTERN.sub(replace, value)
+
+
+def _parse_ports(raw: Any, env: dict[str, str]) -> list[PortMapping]:  # noqa: PLR0912
+    """Parse port specifications from compose file.
+
+    Handles string formats like "8080", "8080:80", "0.0.0.0:8080:80",
+    and dict formats with target/published keys.
+    """
+    if raw is None:
+        return []
+    mappings: list[PortMapping] = []
+
+    items = raw if isinstance(raw, list) else [raw]
+
+    for item in items:
+        if isinstance(item, str):
+            interpolated = _interpolate(item, env)
+            port_spec, _, _ = interpolated.partition("/")
+            parts = port_spec.split(":")
+            published: int | None = None
+            target: int | None = None
+
+            if len(parts) == _SINGLE_PART and parts[0].isdigit():
+                target = int(parts[0])
+            elif (
+                len(parts) == _PUBLISHED_TARGET_PARTS and parts[0].isdigit() and parts[1].isdigit()
+            ):
+                published = int(parts[0])
+                target = int(parts[1])
+            elif (
+                len(parts) == _HOST_PUBLISHED_PARTS and parts[-2].isdigit() and parts[-1].isdigit()
+            ):
+                published = int(parts[-2])
+                target = int(parts[-1])
+
+            if target is not None:
+                mappings.append(PortMapping(target=target, published=published))
+        elif isinstance(item, dict):
+            target_raw = item.get("target")
+            if isinstance(target_raw, str):
+                target_raw = _interpolate(target_raw, env)
+            if target_raw is None:
+                continue
+            try:
+                target_val = int(str(target_raw))
+            except (TypeError, ValueError):
+                continue
+
+            published_raw = item.get("published")
+            if isinstance(published_raw, str):
+                published_raw = _interpolate(published_raw, env)
+            published_val: int | None
+            try:
+                published_val = int(str(published_raw)) if published_raw is not None else None
+            except (TypeError, ValueError):
+                published_val = None
+            mappings.append(PortMapping(target=target_val, published=published_val))
+
+    return mappings
+
+
+def _resolve_host_path(host_path: str, compose_dir: Path) -> str | None:
+    """Resolve a host path from volume mount, returning None for named volumes."""
+    if host_path.startswith("/"):
+        return host_path
+    if host_path.startswith(("./", "../")):
+        return str((compose_dir / host_path).resolve())
+    return None  # Named volume
+
+
+def _is_socket(path: str) -> bool:
+    """Check if a path is a socket (e.g., SSH agent socket)."""
+    try:
+        return stat.S_ISSOCK(Path(path).stat().st_mode)
+    except (FileNotFoundError, PermissionError, OSError):
+        return False
+
+
+def _parse_volume_item(
+    item: str | dict[str, Any],
+    env: dict[str, str],
+    compose_dir: Path,
+) -> str | None:
+    """Parse a single volume item and return host path if it's a bind mount.
+
+    Skips socket paths (e.g., SSH_AUTH_SOCK) since they're machine-local
+    and shouldn't be validated on remote hosts.
+    """
+    host_path: str | None = None
+
+    if isinstance(item, str):
+        interpolated = _interpolate(item, env)
+        parts = interpolated.split(":")
+        if len(parts) >= _MIN_VOLUME_PARTS:
+            host_path = _resolve_host_path(parts[0], compose_dir)
+    elif isinstance(item, dict) and item.get("type") == "bind":
+        source = item.get("source")
+        if source:
+            interpolated = _interpolate(str(source), env)
+            host_path = _resolve_host_path(interpolated, compose_dir)
+
+    # Skip sockets - they're machine-local (e.g., SSH agent)
+    if host_path and _is_socket(host_path):
+        return None
+
+    return host_path
+
+
+def parse_host_volumes(config: Config, service: str) -> list[str]:
+    """Extract host bind mount paths from a service's compose file.
+
+    Returns a list of absolute host paths used as volume mounts.
+    Skips named volumes and resolves relative paths.
+    """
+    compose_path = config.get_compose_path(service)
+    if not compose_path.exists():
+        return []
+
+    env = _load_env(compose_path)
+    compose_data = yaml.safe_load(compose_path.read_text()) or {}
+    raw_services = compose_data.get("services", {})
+    if not isinstance(raw_services, dict):
+        return []
+
+    paths: list[str] = []
+    compose_dir = compose_path.parent
+
+    for definition in raw_services.values():
+        if not isinstance(definition, dict):
+            continue
+
+        volumes = definition.get("volumes")
+        if not volumes:
+            continue
+
+        items = volumes if isinstance(volumes, list) else [volumes]
+        for item in items:
+            host_path = _parse_volume_item(item, env, compose_dir)
+            if host_path:
+                paths.append(host_path)
+
+    # Return unique paths, preserving order
+    seen: set[str] = set()
+    unique: list[str] = []
+    for p in paths:
+        if p not in seen:
+            seen.add(p)
+            unique.append(p)
+    return unique
+
+
+def parse_devices(config: Config, service: str) -> list[str]:
+    """Extract host device paths from a service's compose file.
+
+    Returns a list of host device paths (e.g., /dev/dri, /dev/dri/renderD128).
+    """
+    compose_path = config.get_compose_path(service)
+    if not compose_path.exists():
+        return []
+
+    env = _load_env(compose_path)
+    compose_data = yaml.safe_load(compose_path.read_text()) or {}
+    raw_services = compose_data.get("services", {})
+    if not isinstance(raw_services, dict):
+        return []
+
+    devices: list[str] = []
+    for definition in raw_services.values():
+        if not isinstance(definition, dict):
+            continue
+
+        device_list = definition.get("devices")
+        if not device_list or not isinstance(device_list, list):
+            continue
+
+        for item in device_list:
+            if not isinstance(item, str):
+                continue
+            interpolated = _interpolate(item, env)
+            # Format: host_path:container_path[:options]
+            parts = interpolated.split(":")
+            if parts:
+                host_path = parts[0]
+                if host_path.startswith("/dev/"):
+                    devices.append(host_path)
+
+    # Return unique devices, preserving order
+    seen: set[str] = set()
+    unique: list[str] = []
+    for d in devices:
+        if d not in seen:
+            seen.add(d)
+            unique.append(d)
+    return unique
+
+
+def parse_external_networks(config: Config, service: str) -> list[str]:
+    """Extract external network names from a service's compose file.
+
+    Returns a list of network names marked as external: true.
+    """
+    compose_path = config.get_compose_path(service)
+    if not compose_path.exists():
+        return []
+
+    compose_data = yaml.safe_load(compose_path.read_text()) or {}
+    networks = compose_data.get("networks", {})
+    if not isinstance(networks, dict):
+        return []
+
+    external_networks: list[str] = []
+    for name, definition in networks.items():
+        if isinstance(definition, dict) and definition.get("external") is True:
+            external_networks.append(name)
+
+    return external_networks
+
+
+def load_compose_services(
+    config: Config,
+    stack: str,
+) -> tuple[dict[str, Any], dict[str, str], str]:
+    """Load services from a compose file with environment interpolation.
+
+    Returns (services_dict, env_dict, host_address).
+    """
+    compose_path = config.get_compose_path(stack)
+    if not compose_path.exists():
+        message = f"[{stack}] Compose file not found: {compose_path}"
+        raise FileNotFoundError(message)
+
+    env = _load_env(compose_path)
+    compose_data = yaml.safe_load(compose_path.read_text()) or {}
+    raw_services = compose_data.get("services", {})
+    if not isinstance(raw_services, dict):
+        return {}, env, config.get_host(stack).address
+    return raw_services, env, config.get_host(stack).address
+
+
+def normalize_labels(raw: Any, env: dict[str, str]) -> dict[str, str]:
+    """Normalize labels from list or dict format, with interpolation."""
+    if raw is None:
+        return {}
+    if isinstance(raw, dict):
+        return {
+            _interpolate(str(k), env): _interpolate(str(v), env)
+            for k, v in raw.items()
+            if k is not None
+        }
+    if isinstance(raw, list):
+        labels: dict[str, str] = {}
+        for item in raw:
+            if not isinstance(item, str) or "=" not in item:
+                continue
+            key_raw, value_raw = item.split("=", 1)
+            key = _interpolate(key_raw.strip(), env)
+            value = _interpolate(value_raw.strip(), env)
+            labels[key] = value
+        return labels
+    return {}
+
+
+def get_ports_for_service(
+    definition: dict[str, Any],
+    all_services: dict[str, Any],
+    env: dict[str, str],
+) -> list[PortMapping]:
+    """Get ports for a service, following network_mode: service:X if present."""
+    network_mode = definition.get("network_mode", "")
+    if isinstance(network_mode, str) and network_mode.startswith("service:"):
+        # Service uses another service's network - get ports from that service
+        ref_service = network_mode[len("service:") :]
+        if ref_service in all_services:
+            ref_def = all_services[ref_service]
+            if isinstance(ref_def, dict):
+                return _parse_ports(ref_def.get("ports"), env)
+    return _parse_ports(definition.get("ports"), env)
--- a/src/compose_farm/config.py
+++ b/src/compose_farm/config.py
@@ -8,6 +8,8 @@ from pathlib import Path
 import yaml
 from pydantic import BaseModel, Field, model_validator

+from .paths import config_search_paths, find_config_path
+

 class Host(BaseModel):
    """SSH host configuration."""
@@ -22,7 +24,7 @@ class Config(BaseModel):

    compose_dir: Path = Path("/opt/compose")
    hosts: dict[str, Host]
-    services: dict[str, str]  # service_name -> host_name
+    services: dict[str, str | list[str]]  # service_name -> host_name or list of hosts
    traefik_file: Path | None = None  # Auto-regenerate traefik config after up/down
    traefik_service: str | None = None  # Service name for Traefik (skip its host in file-provider)
    config_path: Path = Path()  # Set by load_config()
@@ -32,20 +34,60 @@ class Config(BaseModel):
        return self.config_path.parent / "compose-farm-state.yaml"

    @model_validator(mode="after")
-    def validate_service_hosts(self) -> Config:
-        """Ensure all services reference valid hosts."""
-        for service, host_name in self.services.items():
-            if host_name not in self.hosts:
-                msg = f"Service '{service}' references unknown host '{host_name}'"
-                raise ValueError(msg)
+    def validate_hosts_and_services(self) -> Config:
+        """Validate host names and service configurations."""
+        # "all" is reserved keyword, cannot be used as host name
+        if "all" in self.hosts:
+            msg = "'all' is a reserved keyword and cannot be used as a host name"
+            raise ValueError(msg)
+
+        for service, host_value in self.services.items():
+            # Validate list configurations
+            if isinstance(host_value, list):
+                if not host_value:
+                    msg = f"Service '{service}' has empty host list"
+                    raise ValueError(msg)
+                if len(host_value) != len(set(host_value)):
+                    msg = f"Service '{service}' has duplicate hosts in list"
+                    raise ValueError(msg)
+
+            # Validate all referenced hosts exist
+            host_names = self.get_hosts(service)
+            for host_name in host_names:
+                if host_name not in self.hosts:
+                    msg = f"Service '{service}' references unknown host '{host_name}'"
+                    raise ValueError(msg)
        return self

-    def get_host(self, service: str) -> Host:
-        """Get host config for a service."""
+    def get_hosts(self, service: str) -> list[str]:
+        """Get list of host names for a service.
+
+        Supports:
+        - Single host: "truenas-debian" -> ["truenas-debian"]
+        - All hosts: "all" -> list of all configured hosts
+        - Explicit list: ["host1", "host2"] -> ["host1", "host2"]
+        """
        if service not in self.services:
            msg = f"Unknown service: {service}"
            raise ValueError(msg)
-        return self.hosts[self.services[service]]
+        host_value = self.services[service]
+        if isinstance(host_value, list):
+            return host_value
+        if host_value == "all":
+            return list(self.hosts.keys())
+        return [host_value]
+
+    def is_multi_host(self, service: str) -> bool:
+        """Check if a service runs on multiple hosts."""
+        return len(self.get_hosts(service)) > 1
+
+    def get_host(self, service: str) -> Host:
+        """Get host config for a service (first host if multi-host)."""
+        if service not in self.services:
+            msg = f"Unknown service: {service}"
+            raise ValueError(msg)
+        host_names = self.get_hosts(service)
+        return self.hosts[host_names[0]]

    def get_compose_path(self, service: str) -> Path:
        """Get compose file path for a service.
@@ -102,26 +144,22 @@ def load_config(path: Path | None = None) -> Config:
    """Load configuration from YAML file.

    Search order:
-    1. Explicit path if provided
-    2. ./compose-farm.yaml
-    3. ~/.config/compose-farm/compose-farm.yaml
+    1. Explicit path if provided via --config
+    2. CF_CONFIG environment variable
+    3. ./compose-farm.yaml
+    4. $XDG_CONFIG_HOME/compose-farm/compose-farm.yaml (defaults to ~/.config)
    """
-    search_paths = [
-        Path("compose-farm.yaml"),
-        Path.home() / ".config" / "compose-farm" / "compose-farm.yaml",
-    ]
-
-    if path:
-        config_path = path
-    else:
-        config_path = None
-        for p in search_paths:
-            if p.exists():
-                config_path = p
-                break
+    config_path = path or find_config_path()

    if config_path is None or not config_path.exists():
-        msg = f"Config file not found. Searched: {', '.join(str(p) for p in search_paths)}"
+        msg = f"Config file not found. Searched: {', '.join(str(p) for p in config_search_paths())}"
+        raise FileNotFoundError(msg)
+
+    if config_path.is_dir():
+        msg = (
+            f"Config path is a directory, not a file: {config_path}\n"
+            "This often happens when Docker creates an empty directory for a missing mount."
+        )
        raise FileNotFoundError(msg)

    with config_path.open() as f:
--- a/src/compose_farm/console.py
+++ b/src/compose_farm/console.py
@@ -0,0 +1,38 @@
+"""Shared console instances for consistent output styling."""
+
+from rich.console import Console
+
+console = Console(highlight=False)
+err_console = Console(stderr=True, highlight=False)
+
+
+# --- Message Constants ---
+# Standardized message templates for consistent user-facing output
+
+MSG_SERVICE_NOT_FOUND = "Service [cyan]{name}[/] not found in config"
+MSG_HOST_NOT_FOUND = "Host [magenta]{name}[/] not found in config"
+MSG_CONFIG_NOT_FOUND = "Config file not found"
+MSG_DRY_RUN = "[dim](dry-run: no changes made)[/]"
+
+
+# --- Message Helper Functions ---
+
+
+def print_error(msg: str) -> None:
+    """Print error message with ✗ prefix to stderr."""
+    err_console.print(f"[red]✗[/] {msg}")
+
+
+def print_success(msg: str) -> None:
+    """Print success message with ✓ prefix to stdout."""
+    console.print(f"[green]✓[/] {msg}")
+
+
+def print_warning(msg: str) -> None:
+    """Print warning message with ! prefix to stderr."""
+    err_console.print(f"[yellow]![/] {msg}")
+
+
+def print_hint(msg: str) -> None:
+    """Print hint message in dim style to stdout."""
+    console.print(f"[dim]Hint: {msg}[/]")
--- a/src/compose_farm/example-config.yaml
+++ b/src/compose_farm/example-config.yaml
@@ -0,0 +1,89 @@
+# Compose Farm configuration
+# Documentation: https://github.com/basnijholt/compose-farm
+#
+# This file configures compose-farm to manage Docker Compose services
+# across multiple hosts via SSH.
+#
+# Place this file at:
+#   - ./compose-farm.yaml (current directory)
+#   - ~/.config/compose-farm/compose-farm.yaml
+#   - Or specify with: cf --config /path/to/config.yaml
+#   - Or set CF_CONFIG environment variable
+
+# ------------------------------------------------------------------------------
+# compose_dir: Directory containing service subdirectories with compose files
+# ------------------------------------------------------------------------------
+# Each subdirectory should contain a compose.yaml (or docker-compose.yml).
+# This path must be the same on all hosts (NFS mount recommended).
+#
+compose_dir: /opt/compose
+
+# ------------------------------------------------------------------------------
+# hosts: SSH connection details for each host
+# ------------------------------------------------------------------------------
+# Simple form:
+#   hostname: ip-or-fqdn
+#
+# Full form:
+#   hostname:
+#     address: ip-or-fqdn
+#     user: ssh-username      # default: current user
+#     port: 22                # default: 22
+#
+# Note: "all" is a reserved keyword and cannot be used as a host name.
+#
+hosts:
+  # Example: simple form (uses current user, port 22)
+  server1: 192.168.1.10
+
+  # Example: full form with explicit user
+  server2:
+    address: 192.168.1.20
+    user: admin
+
+  # Example: full form with custom port
+  server3:
+    address: 192.168.1.30
+    user: root
+    port: 2222
+
+# ------------------------------------------------------------------------------
+# services: Map service names to their target host(s)
+# ------------------------------------------------------------------------------
+# Each service name must match a subdirectory in compose_dir.
+#
+# Single host:
+#   service-name: hostname
+#
+# Multiple hosts (explicit list):
+#   service-name: [host1, host2]
+#
+# All hosts:
+#   service-name: all
+#
+services:
+  # Example: service runs on a single host
+  nginx: server1
+  postgres: server2
+
+  # Example: service runs on multiple specific hosts
+  # prometheus: [server1, server2]
+
+  # Example: service runs on ALL hosts (e.g., monitoring agents)
+  # node-exporter: all
+
+# ------------------------------------------------------------------------------
+# traefik_file: (optional) Auto-generate Traefik file-provider config
+# ------------------------------------------------------------------------------
+# When set, compose-farm automatically regenerates this file after
+# up/down/restart/update commands. Traefik watches this file for changes.
+#
+# traefik_file: /opt/compose/traefik/dynamic.d/compose-farm.yml
+
+# ------------------------------------------------------------------------------
+# traefik_service: (optional) Service name running Traefik
+# ------------------------------------------------------------------------------
+# When generating traefik_file, services on the same host as Traefik are
+# skipped (they're handled by Traefik's Docker provider directly).
+#
+# traefik_service: traefik
--- a/src/compose_farm/executor.py
+++ b/src/compose_farm/executor.py
@@ -0,0 +1,520 @@
+"""Command execution via SSH or locally."""
+
+from __future__ import annotations
+
+import asyncio
+import socket
+import subprocess
+from dataclasses import dataclass
+from functools import lru_cache
+from typing import TYPE_CHECKING, Any
+
+from rich.markup import escape
+
+from .console import console, err_console
+from .ssh_keys import get_key_path, get_ssh_auth_sock, get_ssh_env
+
+if TYPE_CHECKING:
+    from collections.abc import Callable
+
+    from .config import Config, Host
+
+LOCAL_ADDRESSES = frozenset({"local", "localhost", "127.0.0.1", "::1"})
+_DEFAULT_SSH_PORT = 22
+
+
+def build_ssh_command(host: Host, command: str, *, tty: bool = False) -> list[str]:
+    """Build SSH command args for executing a command on a remote host.
+
+    Args:
+        host: Host configuration with address, port, user
+        command: Command to run on the remote host
+        tty: Whether to allocate a TTY (for interactive/progress bar commands)
+
+    Returns:
+        List of command args suitable for subprocess
+
+    """
+    ssh_args = [
+        "ssh",
+        "-o",
+        "StrictHostKeyChecking=no",
+        "-o",
+        "UserKnownHostsFile=/dev/null",
+        "-o",
+        "LogLevel=ERROR",
+    ]
+    if tty:
+        ssh_args.insert(1, "-tt")  # Force TTY allocation
+
+    key_path = get_key_path()
+    if key_path:
+        ssh_args.extend(["-i", str(key_path)])
+
+    if host.port != _DEFAULT_SSH_PORT:
+        ssh_args.extend(["-p", str(host.port)])
+
+    ssh_args.append(f"{host.user}@{host.address}")
+    ssh_args.append(command)
+
+    return ssh_args
+
+
+@lru_cache(maxsize=1)
+def _get_local_ips() -> frozenset[str]:
+    """Get all IP addresses of the current machine."""
+    ips: set[str] = set()
+    try:
+        hostname = socket.gethostname()
+        # Get all addresses for hostname
+        for info in socket.getaddrinfo(hostname, None):
+            addr = info[4][0]
+            if isinstance(addr, str):
+                ips.add(addr)
+        # Also try getting the default outbound IP
+        with socket.socket(socket.AF_INET, socket.SOCK_DGRAM) as s:
+            s.connect(("8.8.8.8", 80))
+            ips.add(s.getsockname()[0])
+    except OSError:
+        pass
+    return frozenset(ips)
+
+
+@dataclass
+class CommandResult:
+    """Result of a command execution."""
+
+    service: str
+    exit_code: int
+    success: bool
+    stdout: str = ""
+    stderr: str = ""
+
+    # SSH returns 255 when connection is closed unexpectedly (e.g., Ctrl+C)
+    _SSH_CONNECTION_CLOSED = 255
+
+    @property
+    def interrupted(self) -> bool:
+        """Check if command was killed by SIGINT (Ctrl+C)."""
+        # Negative exit codes indicate signal termination; -2 = SIGINT
+        return self.exit_code < 0 or self.exit_code == self._SSH_CONNECTION_CLOSED
+
+
+def is_local(host: Host) -> bool:
+    """Check if host should run locally (no SSH)."""
+    addr = host.address.lower()
+    if addr in LOCAL_ADDRESSES:
+        return True
+    # Check if address matches any of this machine's IPs
+    return addr in _get_local_ips()
+
+
+def ssh_connect_kwargs(host: Host) -> dict[str, Any]:
+    """Get kwargs for asyncssh.connect() from a Host config."""
+    kwargs: dict[str, Any] = {
+        "host": host.address,
+        "port": host.port,
+        "username": host.user,
+        "known_hosts": None,
+    }
+    # Add SSH agent path (auto-detect forwarded agent if needed)
+    agent_path = get_ssh_auth_sock()
+    if agent_path:
+        kwargs["agent_path"] = agent_path
+    # Add key file fallback for when SSH agent is unavailable
+    key_path = get_key_path()
+    if key_path:
+        kwargs["client_keys"] = [str(key_path)]
+    return kwargs
+
+
+async def _run_local_command(
+    command: str,
+    service: str,
+    *,
+    stream: bool = True,
+    raw: bool = False,
+) -> CommandResult:
+    """Run a command locally with streaming output."""
+    try:
+        if raw:
+            # Run with inherited stdout/stderr for proper \r handling
+            proc = await asyncio.create_subprocess_shell(
+                command,
+                stdout=None,  # Inherit
+                stderr=None,  # Inherit
+            )
+            await proc.wait()
+            return CommandResult(
+                service=service,
+                exit_code=proc.returncode or 0,
+                success=proc.returncode == 0,
+            )
+
+        proc = await asyncio.create_subprocess_shell(
+            command,
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.PIPE,
+        )
+
+        if stream and proc.stdout and proc.stderr:
+
+            async def read_stream(
+                reader: asyncio.StreamReader,
+                prefix: str,
+                *,
+                is_stderr: bool = False,
+            ) -> None:
+                out = err_console if is_stderr else console
+                while True:
+                    line = await reader.readline()
+                    if not line:
+                        break
+                    text = line.decode()
+                    if text.strip():  # Skip empty lines
+                        out.print(f"[cyan]\\[{prefix}][/] {escape(text)}", end="")
+
+            await asyncio.gather(
+                read_stream(proc.stdout, service),
+                read_stream(proc.stderr, service, is_stderr=True),
+            )
+
+        stdout_data = b""
+        stderr_data = b""
+        if not stream:
+            stdout_data, stderr_data = await proc.communicate()
+        else:
+            await proc.wait()
+
+        return CommandResult(
+            service=service,
+            exit_code=proc.returncode or 0,
+            success=proc.returncode == 0,
+            stdout=stdout_data.decode() if stdout_data else "",
+            stderr=stderr_data.decode() if stderr_data else "",
+        )
+    except OSError as e:
+        err_console.print(f"[cyan]\\[{service}][/] [red]Local error:[/] {e}")
+        return CommandResult(service=service, exit_code=1, success=False)
+
+
+async def _run_ssh_command(
+    host: Host,
+    command: str,
+    service: str,
+    *,
+    stream: bool = True,
+    raw: bool = False,
+) -> CommandResult:
+    """Run a command on a remote host via SSH with streaming output."""
+    if raw:
+        # Use native ssh with TTY for proper progress bar rendering
+        ssh_args = build_ssh_command(host, command, tty=True)
+        # Run in thread to avoid blocking the event loop
+        # Use get_ssh_env() to auto-detect SSH agent socket
+        result = await asyncio.to_thread(subprocess.run, ssh_args, check=False, env=get_ssh_env())
+        return CommandResult(
+            service=service,
+            exit_code=result.returncode,
+            success=result.returncode == 0,
+        )
+
+    import asyncssh  # noqa: PLC0415 - lazy import for faster CLI startup
+
+    proc: asyncssh.SSHClientProcess[Any]
+    try:
+        async with asyncssh.connect(**ssh_connect_kwargs(host)) as conn:  # noqa: SIM117
+            async with conn.create_process(command) as proc:
+                if stream:
+
+                    async def read_stream(
+                        reader: Any,
+                        prefix: str,
+                        *,
+                        is_stderr: bool = False,
+                    ) -> None:
+                        out = err_console if is_stderr else console
+                        async for line in reader:
+                            if line.strip():  # Skip empty lines
+                                out.print(f"[cyan]\\[{prefix}][/] {escape(line)}", end="")
+
+                    await asyncio.gather(
+                        read_stream(proc.stdout, service),
+                        read_stream(proc.stderr, service, is_stderr=True),
+                    )
+
+                stdout_data = ""
+                stderr_data = ""
+                if not stream:
+                    stdout_data = await proc.stdout.read()
+                    stderr_data = await proc.stderr.read()
+
+                await proc.wait()
+                return CommandResult(
+                    service=service,
+                    exit_code=proc.exit_status or 0,
+                    success=proc.exit_status == 0,
+                    stdout=stdout_data,
+                    stderr=stderr_data,
+                )
+    except (OSError, asyncssh.Error) as e:
+        err_console.print(f"[cyan]\\[{service}][/] [red]SSH error:[/] {e}")
+        return CommandResult(service=service, exit_code=1, success=False)
+
+
+async def run_command(
+    host: Host,
+    command: str,
+    service: str,
+    *,
+    stream: bool = True,
+    raw: bool = False,
+) -> CommandResult:
+    """Run a command on a host (locally or via SSH).
+
+    Args:
+        host: Host configuration
+        command: Command to run
+        service: Service name (used as prefix in output)
+        stream: Whether to stream output (default True)
+        raw: Whether to use raw mode with TTY (default False)
+
+    """
+    if is_local(host):
+        return await _run_local_command(command, service, stream=stream, raw=raw)
+    return await _run_ssh_command(host, command, service, stream=stream, raw=raw)
+
+
+async def run_compose(
+    config: Config,
+    service: str,
+    compose_cmd: str,
+    *,
+    stream: bool = True,
+    raw: bool = False,
+) -> CommandResult:
+    """Run a docker compose command for a service."""
+    host = config.get_host(service)
+    compose_path = config.get_compose_path(service)
+
+    command = f"docker compose -f {compose_path} {compose_cmd}"
+    return await run_command(host, command, service, stream=stream, raw=raw)
+
+
+async def run_compose_on_host(
+    config: Config,
+    service: str,
+    host_name: str,
+    compose_cmd: str,
+    *,
+    stream: bool = True,
+    raw: bool = False,
+) -> CommandResult:
+    """Run a docker compose command for a service on a specific host.
+
+    Used for migration - running 'down' on the old host before 'up' on new host.
+    """
+    host = config.hosts[host_name]
+    compose_path = config.get_compose_path(service)
+
+    command = f"docker compose -f {compose_path} {compose_cmd}"
+    return await run_command(host, command, service, stream=stream, raw=raw)
+
+
+async def run_on_services(
+    config: Config,
+    services: list[str],
+    compose_cmd: str,
+    *,
+    stream: bool = True,
+    raw: bool = False,
+) -> list[CommandResult]:
+    """Run a docker compose command on multiple services in parallel.
+
+    For multi-host services, runs on all configured hosts.
+    Note: raw=True only makes sense for single-service operations.
+    """
+    return await run_sequential_on_services(config, services, [compose_cmd], stream=stream, raw=raw)
+
+
+async def _run_sequential_commands(
+    config: Config,
+    service: str,
+    commands: list[str],
+    *,
+    stream: bool = True,
+    raw: bool = False,
+) -> CommandResult:
+    """Run multiple compose commands sequentially for a service."""
+    for cmd in commands:
+        result = await run_compose(config, service, cmd, stream=stream, raw=raw)
+        if not result.success:
+            return result
+    return CommandResult(service=service, exit_code=0, success=True)
+
+
+async def _run_sequential_commands_multi_host(
+    config: Config,
+    service: str,
+    commands: list[str],
+    *,
+    stream: bool = True,
+    raw: bool = False,
+) -> list[CommandResult]:
+    """Run multiple compose commands sequentially for a multi-host service.
+
+    Commands are run sequentially, but each command runs on all hosts in parallel.
+    """
+    host_names = config.get_hosts(service)
+    compose_path = config.get_compose_path(service)
+    final_results: list[CommandResult] = []
+
+    for cmd in commands:
+        command = f"docker compose -f {compose_path} {cmd}"
+        tasks = []
+        for host_name in host_names:
+            host = config.hosts[host_name]
+            label = f"{service}@{host_name}" if len(host_names) > 1 else service
+            tasks.append(run_command(host, command, label, stream=stream, raw=raw))
+
+        results = await asyncio.gather(*tasks)
+        final_results = list(results)
+
+        # Check if any failed
+        if any(not r.success for r in results):
+            return final_results
+
+    return final_results
+
+
+async def run_sequential_on_services(
+    config: Config,
+    services: list[str],
+    commands: list[str],
+    *,
+    stream: bool = True,
+    raw: bool = False,
+) -> list[CommandResult]:
+    """Run sequential commands on multiple services in parallel.
+
+    For multi-host services, runs on all configured hosts.
+    Note: raw=True only makes sense for single-service operations.
+    """
+    # Separate multi-host and single-host services for type-safe gathering
+    multi_host_tasks = []
+    single_host_tasks = []
+
+    for service in services:
+        if config.is_multi_host(service):
+            multi_host_tasks.append(
+                _run_sequential_commands_multi_host(
+                    config, service, commands, stream=stream, raw=raw
+                )
+            )
+        else:
+            single_host_tasks.append(
+                _run_sequential_commands(config, service, commands, stream=stream, raw=raw)
+            )
+
+    # Gather results separately to maintain type safety
+    flat_results: list[CommandResult] = []
+
+    if multi_host_tasks:
+        multi_results = await asyncio.gather(*multi_host_tasks)
+        for result_list in multi_results:
+            flat_results.extend(result_list)
+
+    if single_host_tasks:
+        single_results = await asyncio.gather(*single_host_tasks)
+        flat_results.extend(single_results)
+
+    return flat_results
+
+
+async def check_service_running(
+    config: Config,
+    service: str,
+    host_name: str,
+) -> bool:
+    """Check if a service has running containers on a specific host."""
+    host = config.hosts[host_name]
+    compose_path = config.get_compose_path(service)
+
+    # Use ps --status running to check for running containers
+    command = f"docker compose -f {compose_path} ps --status running -q"
+    result = await run_command(host, command, service, stream=False)
+
+    # If command succeeded and has output, containers are running
+    return result.success and bool(result.stdout.strip())
+
+
+async def _batch_check_existence(
+    config: Config,
+    host_name: str,
+    items: list[str],
+    cmd_template: Callable[[str], str],
+    context: str,
+) -> dict[str, bool]:
+    """Check existence of multiple items on a host using a command template."""
+    if not items:
+        return {}
+
+    host = config.hosts[host_name]
+    checks = []
+    for item in items:
+        escaped = item.replace("'", "'\\''")
+        checks.append(cmd_template(escaped))
+
+    command = "; ".join(checks)
+    result = await run_command(host, command, context, stream=False)
+
+    exists: dict[str, bool] = dict.fromkeys(items, False)
+    for raw_line in result.stdout.splitlines():
+        line = raw_line.strip()
+        if line.startswith("Y:"):
+            exists[line[2:]] = True
+        elif line.startswith("N:"):
+            exists[line[2:]] = False
+
+    return exists
+
+
+async def check_paths_exist(
+    config: Config,
+    host_name: str,
+    paths: list[str],
+) -> dict[str, bool]:
+    """Check if multiple paths exist on a specific host.
+
+    Returns a dict mapping path -> exists.
+    Handles permission denied as "exists" (path is there, just not accessible).
+    """
+    # Only report missing if stat says "No such file", otherwise assume exists
+    # (handles permission denied correctly - path exists, just not accessible)
+    return await _batch_check_existence(
+        config,
+        host_name,
+        paths,
+        lambda esc: f"stat '{esc}' 2>&1 | grep -q 'No such file' && echo 'N:{esc}' || echo 'Y:{esc}'",
+        "mount-check",
+    )
+
+
+async def check_networks_exist(
+    config: Config,
+    host_name: str,
+    networks: list[str],
+) -> dict[str, bool]:
+    """Check if Docker networks exist on a specific host.
+
+    Returns a dict mapping network_name -> exists.
+    """
+    return await _batch_check_existence(
+        config,
+        host_name,
+        networks,
+        lambda esc: (
+            f"docker network inspect '{esc}' >/dev/null 2>&1 && echo 'Y:{esc}' || echo 'N:{esc}'"
+        ),
+        "network-check",
+    )
--- a/src/compose_farm/logs.py
+++ b/src/compose_farm/logs.py
@@ -6,20 +6,21 @@ import json
 import tomllib
 from dataclasses import dataclass
 from datetime import UTC, datetime
-from pathlib import Path
 from typing import TYPE_CHECKING, Any

-from .ssh import run_compose
+from .executor import run_compose
+from .paths import xdg_config_home

 if TYPE_CHECKING:
    from collections.abc import Awaitable, Callable, Iterable
+    from pathlib import Path

    from .config import Config
-    from .ssh import CommandResult
+    from .executor import CommandResult


-DEFAULT_LOG_PATH = Path.home() / ".config" / "compose-farm" / "dockerfarm-log.toml"
-DIGEST_HEX_LENGTH = 64
+DEFAULT_LOG_PATH = xdg_config_home() / "compose-farm" / "dockerfarm-log.toml"
+_DIGEST_HEX_LENGTH = 64


@dataclass(frozen=True)
@@ -46,7 +47,8 @@ class SnapshotEntry:
        }


-def _isoformat(dt: datetime) -> str:
+def isoformat(dt: datetime) -> str:
+    """Format a datetime as an ISO 8601 string with Z suffix for UTC."""
    return dt.astimezone(UTC).replace(microsecond=0).isoformat().replace("+00:00", "Z")


@@ -95,13 +97,13 @@ def _extract_image_fields(record: dict[str, Any]) -> tuple[str, str]:
        or ""
    )

-    if digest and not digest.startswith("sha256:") and len(digest) == DIGEST_HEX_LENGTH:
+    if digest and not digest.startswith("sha256:") and len(digest) == _DIGEST_HEX_LENGTH:
        digest = f"sha256:{digest}"

    return image, digest


-async def _collect_service_entries(
+async def collect_service_entries(
    config: Config,
    service: str,
    *,
@@ -116,7 +118,8 @@ async def _collect_service_entries(
        raise RuntimeError(error)

    records = _parse_images_output(result.stdout)
-    host_name = config.services[service]
+    # Use first host for snapshots (multi-host services use same images on all hosts)
+    host_name = config.get_hosts(service)[0]
    compose_path = config.get_compose_path(service)

    entries: list[SnapshotEntry] = []
@@ -137,19 +140,21 @@ async def _collect_service_entries(
    return entries


-def _load_existing_entries(log_path: Path) -> list[dict[str, str]]:
+def load_existing_entries(log_path: Path) -> list[dict[str, str]]:
+    """Load existing snapshot entries from a TOML log file."""
    if not log_path.exists():
        return []
    data = tomllib.loads(log_path.read_text())
    return list(data.get("entries", []))


-def _merge_entries(
+def merge_entries(
    existing: Iterable[dict[str, str]],
    new_entries: Iterable[SnapshotEntry],
    *,
    now_iso: str,
 ) -> list[dict[str, str]]:
+    """Merge new snapshot entries with existing ones, preserving first_seen timestamps."""
    merged: dict[tuple[str, str, str], dict[str, str]] = {
        (e["service"], e["host"], e["digest"]): dict(e) for e in existing
    }
@@ -162,7 +167,8 @@ def _merge_entries(
    return list(merged.values())


-def _write_toml(log_path: Path, *, meta: dict[str, str], entries: list[dict[str, str]]) -> None:
+def write_toml(log_path: Path, *, meta: dict[str, str], entries: list[dict[str, str]]) -> None:
+    """Write snapshot entries to a TOML log file."""
    lines: list[str] = ["[meta]"]
    lines.extend(f'{key} = "{_escape(meta[key])}"' for key in sorted(meta))

@@ -187,45 +193,3 @@ def _write_toml(log_path: Path, *, meta: dict[str, str], entries: list[dict[str,
    content = "\n".join(lines).rstrip() + "\n"
    log_path.parent.mkdir(parents=True, exist_ok=True)
    log_path.write_text(content)
-
-
-async def snapshot_services(
-    config: Config,
-    services: list[str],
-    *,
-    log_path: Path | None = None,
-    now: datetime | None = None,
-    run_compose_fn: Callable[..., Awaitable[CommandResult]] = run_compose,
-) -> Path:
-    """Capture current image digests for services and write them to a TOML log.
-
-    - Preserves the earliest `first_seen` per (service, host, digest)
-    - Updates `last_seen` for digests observed in this snapshot
-    - Leaves untouched digests that were not part of this run (history is kept)
-    """
-    if not services:
-        error = "No services specified for snapshot"
-        raise RuntimeError(error)
-
-    log_path = log_path or DEFAULT_LOG_PATH
-    now_dt = now or datetime.now(UTC)
-    now_iso = _isoformat(now_dt)
-
-    existing_entries = _load_existing_entries(log_path)
-
-    snapshot_entries: list[SnapshotEntry] = []
-    for service in services:
-        snapshot_entries.extend(
-            await _collect_service_entries(
-                config, service, now=now_dt, run_compose_fn=run_compose_fn
-            )
-        )
-
-    if not snapshot_entries:
-        error = "No image digests were captured"
-        raise RuntimeError(error)
-
-    merged_entries = _merge_entries(existing_entries, snapshot_entries, now_iso=now_iso)
-    meta = {"generated_at": now_iso, "compose_dir": str(config.compose_dir)}
-    _write_toml(log_path, meta=meta, entries=merged_entries)
-    return log_path
--- a/src/compose_farm/operations.py
+++ b/src/compose_farm/operations.py
@@ -0,0 +1,434 @@
+"""High-level operations for compose-farm.
+
+Contains the business logic for up, down, sync, check, and migration operations.
+CLI commands are thin wrappers around these functions.
+"""
+
+from __future__ import annotations
+
+import asyncio
+from typing import TYPE_CHECKING, NamedTuple
+
+from .compose import parse_devices, parse_external_networks, parse_host_volumes
+from .console import console, err_console, print_error, print_success, print_warning
+from .executor import (
+    CommandResult,
+    check_networks_exist,
+    check_paths_exist,
+    check_service_running,
+    run_command,
+    run_compose,
+    run_compose_on_host,
+)
+from .state import (
+    get_orphaned_services,
+    get_service_host,
+    remove_service,
+    set_multi_host_service,
+    set_service_host,
+)
+
+if TYPE_CHECKING:
+    from .config import Config
+
+
+class OperationInterruptedError(Exception):
+    """Raised when a command is interrupted by Ctrl+C."""
+
+
+class PreflightResult(NamedTuple):
+    """Result of pre-flight checks for a service on a host."""
+
+    missing_paths: list[str]
+    missing_networks: list[str]
+    missing_devices: list[str]
+
+    @property
+    def ok(self) -> bool:
+        """Return True if all checks passed."""
+        return not (self.missing_paths or self.missing_networks or self.missing_devices)
+
+
+async def _run_compose_step(
+    cfg: Config,
+    service: str,
+    command: str,
+    *,
+    raw: bool,
+    host: str | None = None,
+) -> CommandResult:
+    """Run a compose command, handle raw output newline, and check for interrupts."""
+    if host:
+        result = await run_compose_on_host(cfg, service, host, command, raw=raw)
+    else:
+        result = await run_compose(cfg, service, command, raw=raw)
+    if raw:
+        print()  # Ensure newline after raw output
+    if result.interrupted:
+        raise OperationInterruptedError
+    return result
+
+
+def get_service_paths(cfg: Config, service: str) -> list[str]:
+    """Get all required paths for a service (compose_dir + volumes)."""
+    paths = [str(cfg.compose_dir)]
+    paths.extend(parse_host_volumes(cfg, service))
+    return paths
+
+
+async def discover_service_host(cfg: Config, service: str) -> tuple[str, str | list[str] | None]:
+    """Discover where a service is running.
+
+    For multi-host services, checks all assigned hosts in parallel.
+    For single-host, checks assigned host first, then others.
+
+    Returns (service_name, host_or_hosts_or_none).
+    """
+    assigned_hosts = cfg.get_hosts(service)
+
+    if cfg.is_multi_host(service):
+        # Check all assigned hosts in parallel
+        checks = await asyncio.gather(
+            *[check_service_running(cfg, service, h) for h in assigned_hosts]
+        )
+        running = [h for h, is_running in zip(assigned_hosts, checks, strict=True) if is_running]
+        return service, running if running else None
+
+    # Single-host: check assigned host first, then others
+    if await check_service_running(cfg, service, assigned_hosts[0]):
+        return service, assigned_hosts[0]
+    for host in cfg.hosts:
+        if host != assigned_hosts[0] and await check_service_running(cfg, service, host):
+            return service, host
+    return service, None
+
+
+async def check_service_requirements(
+    cfg: Config,
+    service: str,
+    host_name: str,
+) -> PreflightResult:
+    """Check if a service can run on a specific host.
+
+    Verifies that all required paths (volumes), networks, and devices exist.
+    """
+    # Check mount paths
+    paths = get_service_paths(cfg, service)
+    path_exists = await check_paths_exist(cfg, host_name, paths)
+    missing_paths = [p for p, found in path_exists.items() if not found]
+
+    # Check external networks
+    networks = parse_external_networks(cfg, service)
+    missing_networks: list[str] = []
+    if networks:
+        net_exists = await check_networks_exist(cfg, host_name, networks)
+        missing_networks = [n for n, found in net_exists.items() if not found]
+
+    # Check devices
+    devices = parse_devices(cfg, service)
+    missing_devices: list[str] = []
+    if devices:
+        dev_exists = await check_paths_exist(cfg, host_name, devices)
+        missing_devices = [d for d, found in dev_exists.items() if not found]
+
+    return PreflightResult(missing_paths, missing_networks, missing_devices)
+
+
+async def _cleanup_and_rollback(
+    cfg: Config,
+    service: str,
+    target_host: str,
+    current_host: str,
+    prefix: str,
+    *,
+    was_running: bool,
+    raw: bool = False,
+) -> None:
+    """Clean up failed start and attempt rollback to old host if it was running."""
+    print_warning(f"{prefix} Cleaning up failed start on [magenta]{target_host}[/]")
+    await run_compose(cfg, service, "down", raw=raw)
+
+    if not was_running:
+        err_console.print(
+            f"{prefix} [dim]Service was not running on [magenta]{current_host}[/], skipping rollback[/]"
+        )
+        return
+
+    print_warning(f"{prefix} Rolling back to [magenta]{current_host}[/]...")
+    rollback_result = await run_compose_on_host(cfg, service, current_host, "up -d", raw=raw)
+    if rollback_result.success:
+        print_success(f"{prefix} Rollback succeeded on [magenta]{current_host}[/]")
+    else:
+        print_error(f"{prefix} Rollback failed - service is down")
+
+
+def _report_preflight_failures(
+    service: str,
+    target_host: str,
+    preflight: PreflightResult,
+) -> None:
+    """Report pre-flight check failures."""
+    print_error(f"[cyan]\\[{service}][/] Cannot start on [magenta]{target_host}[/]:")
+    for path in preflight.missing_paths:
+        print_error(f"  missing path: {path}")
+    for net in preflight.missing_networks:
+        print_error(f"  missing network: {net}")
+    if preflight.missing_networks:
+        err_console.print(f"  [dim]Hint: cf init-network {target_host}[/]")
+    for dev in preflight.missing_devices:
+        print_error(f"  missing device: {dev}")
+
+
+async def _up_multi_host_service(
+    cfg: Config,
+    service: str,
+    prefix: str,
+    *,
+    raw: bool = False,
+) -> list[CommandResult]:
+    """Start a multi-host service on all configured hosts."""
+    host_names = cfg.get_hosts(service)
+    results: list[CommandResult] = []
+    compose_path = cfg.get_compose_path(service)
+    command = f"docker compose -f {compose_path} up -d"
+
+    # Pre-flight checks on all hosts
+    for host_name in host_names:
+        preflight = await check_service_requirements(cfg, service, host_name)
+        if not preflight.ok:
+            _report_preflight_failures(service, host_name, preflight)
+            results.append(
+                CommandResult(service=f"{service}@{host_name}", exit_code=1, success=False)
+            )
+            return results
+
+    # Start on all hosts
+    hosts_str = ", ".join(f"[magenta]{h}[/]" for h in host_names)
+    console.print(f"{prefix} Starting on {hosts_str}...")
+
+    succeeded_hosts: list[str] = []
+    for host_name in host_names:
+        host = cfg.hosts[host_name]
+        label = f"{service}@{host_name}"
+        result = await run_command(host, command, label, stream=not raw, raw=raw)
+        if raw:
+            print()  # Ensure newline after raw output
+        results.append(result)
+        if result.success:
+            succeeded_hosts.append(host_name)
+
+    # Update state with hosts that succeeded (partial success is tracked)
+    if succeeded_hosts:
+        set_multi_host_service(cfg, service, succeeded_hosts)
+
+    return results
+
+
+async def _migrate_service(
+    cfg: Config,
+    service: str,
+    current_host: str,
+    target_host: str,
+    prefix: str,
+    *,
+    raw: bool = False,
+) -> CommandResult | None:
+    """Migrate a service from current_host to target_host.
+
+    Pre-pulls/builds images on target, then stops service on current host.
+    Returns failure result if migration prep fails, None on success.
+    """
+    console.print(
+        f"{prefix} Migrating from [magenta]{current_host}[/] → [magenta]{target_host}[/]..."
+    )
+
+    # Prepare images on target host before stopping old service to minimize downtime.
+    # Pull handles image-based services; build handles Dockerfile-based services.
+    # --ignore-buildable makes pull skip images that have build: defined.
+    for cmd, label in [("pull --ignore-buildable", "Pull"), ("build", "Build")]:
+        result = await _run_compose_step(cfg, service, cmd, raw=raw)
+        if not result.success:
+            print_error(
+                f"{prefix} {label} failed on [magenta]{target_host}[/], "
+                "leaving service on current host"
+            )
+            return result
+
+    # Stop on current host
+    down_result = await _run_compose_step(cfg, service, "down", raw=raw, host=current_host)
+    return down_result if not down_result.success else None
+
+
+async def _up_single_service(
+    cfg: Config,
+    service: str,
+    prefix: str,
+    *,
+    raw: bool,
+) -> CommandResult:
+    """Start a single-host service with migration support."""
+    target_host = cfg.get_hosts(service)[0]
+    current_host = get_service_host(cfg, service)
+
+    # Pre-flight check: verify paths, networks, and devices exist on target
+    preflight = await check_service_requirements(cfg, service, target_host)
+    if not preflight.ok:
+        _report_preflight_failures(service, target_host, preflight)
+        return CommandResult(service=service, exit_code=1, success=False)
+
+    # If service is deployed elsewhere, migrate it
+    did_migration = False
+    was_running = False
+    if current_host and current_host != target_host:
+        if current_host in cfg.hosts:
+            was_running = await check_service_running(cfg, service, current_host)
+            failure = await _migrate_service(
+                cfg, service, current_host, target_host, prefix, raw=raw
+            )
+            if failure:
+                return failure
+            did_migration = True
+        else:
+            print_warning(
+                f"{prefix} was on [magenta]{current_host}[/] (not in config), skipping down"
+            )
+
+    # Start on target host
+    console.print(f"{prefix} Starting on [magenta]{target_host}[/]...")
+    up_result = await _run_compose_step(cfg, service, "up -d", raw=raw)
+
+    # Update state on success, or rollback on failure
+    if up_result.success:
+        set_service_host(cfg, service, target_host)
+    elif did_migration and current_host:
+        await _cleanup_and_rollback(
+            cfg,
+            service,
+            target_host,
+            current_host,
+            prefix,
+            was_running=was_running,
+            raw=raw,
+        )
+
+    return up_result
+
+
+async def up_services(
+    cfg: Config,
+    services: list[str],
+    *,
+    raw: bool = False,
+) -> list[CommandResult]:
+    """Start services with automatic migration if host changed."""
+    results: list[CommandResult] = []
+    total = len(services)
+
+    try:
+        for idx, service in enumerate(services, 1):
+            prefix = f"[dim][{idx}/{total}][/] [cyan]\\[{service}][/]"
+
+            if cfg.is_multi_host(service):
+                results.extend(await _up_multi_host_service(cfg, service, prefix, raw=raw))
+            else:
+                results.append(await _up_single_service(cfg, service, prefix, raw=raw))
+    except OperationInterruptedError:
+        raise KeyboardInterrupt from None
+
+    return results
+
+
+async def check_host_compatibility(
+    cfg: Config,
+    service: str,
+) -> dict[str, tuple[int, int, list[str]]]:
+    """Check which hosts can run a service based on paths, networks, and devices.
+
+    Returns dict of host_name -> (found_count, total_count, missing_items).
+    """
+    # Get total requirements count
+    paths = get_service_paths(cfg, service)
+    networks = parse_external_networks(cfg, service)
+    devices = parse_devices(cfg, service)
+    total = len(paths) + len(networks) + len(devices)
+
+    results: dict[str, tuple[int, int, list[str]]] = {}
+
+    for host_name in cfg.hosts:
+        preflight = await check_service_requirements(cfg, service, host_name)
+        all_missing = (
+            preflight.missing_paths + preflight.missing_networks + preflight.missing_devices
+        )
+        found = total - len(all_missing)
+        results[host_name] = (found, total, all_missing)
+
+    return results
+
+
+async def stop_orphaned_services(cfg: Config) -> list[CommandResult]:
+    """Stop orphaned services (in state but not in config).
+
+    Runs docker compose down on each service on its tracked host(s).
+    Only removes from state on successful stop.
+
+    Returns list of CommandResults for each service@host.
+    """
+    orphaned = get_orphaned_services(cfg)
+    if not orphaned:
+        return []
+
+    results: list[CommandResult] = []
+    tasks: list[tuple[str, str, asyncio.Task[CommandResult]]] = []
+
+    # Build list of (service, host, task) for all orphaned services
+    for service, hosts in orphaned.items():
+        host_list = hosts if isinstance(hosts, list) else [hosts]
+        for host in host_list:
+            # Skip hosts no longer in config
+            if host not in cfg.hosts:
+                print_warning(f"{service}@{host}: host no longer in config, skipping")
+                results.append(
+                    CommandResult(
+                        service=f"{service}@{host}",
+                        exit_code=1,
+                        success=False,
+                        stderr="host no longer in config",
+                    )
+                )
+                continue
+            coro = run_compose_on_host(cfg, service, host, "down")
+            tasks.append((service, host, asyncio.create_task(coro)))
+
+    # Run all down commands in parallel
+    if tasks:
+        for service, host, task in tasks:
+            try:
+                result = await task
+                results.append(result)
+                if result.success:
+                    print_success(f"{service}@{host}: stopped")
+                else:
+                    print_error(f"{service}@{host}: {result.stderr or 'failed'}")
+            except Exception as e:
+                print_error(f"{service}@{host}: {e}")
+                results.append(
+                    CommandResult(
+                        service=f"{service}@{host}",
+                        exit_code=1,
+                        success=False,
+                        stderr=str(e),
+                    )
+                )
+
+    # Remove from state only for services where ALL hosts succeeded
+    for service, hosts in orphaned.items():
+        host_list = hosts if isinstance(hosts, list) else [hosts]
+        all_succeeded = all(
+            r.success
+            for r in results
+            if r.service.startswith(f"{service}@") or r.service == service
+        )
+        if all_succeeded:
+            remove_service(cfg, service)
+
+    return results
--- a/src/compose_farm/paths.py
+++ b/src/compose_farm/paths.py
@@ -0,0 +1,33 @@
+"""Path utilities - lightweight module with no heavy dependencies."""
+
+from __future__ import annotations
+
+import os
+from pathlib import Path
+
+
+def xdg_config_home() -> Path:
+    """Get XDG config directory, respecting XDG_CONFIG_HOME env var."""
+    return Path(os.environ.get("XDG_CONFIG_HOME", Path.home() / ".config"))
+
+
+def default_config_path() -> Path:
+    """Get the default user config path."""
+    return xdg_config_home() / "compose-farm" / "compose-farm.yaml"
+
+
+def config_search_paths() -> list[Path]:
+    """Get search paths for config files."""
+    return [Path("compose-farm.yaml"), default_config_path()]
+
+
+def find_config_path() -> Path | None:
+    """Find the config file path, checking CF_CONFIG env var and search paths."""
+    if env_path := os.environ.get("CF_CONFIG"):
+        p = Path(env_path)
+        if p.exists() and p.is_file():
+            return p
+    for p in config_search_paths():
+        if p.exists() and p.is_file():
+            return p
+    return None
--- a/src/compose_farm/ssh.py
+++ b/src/compose_farm/ssh.py
@@ -1,277 +0,0 @@
-"""Command execution via SSH or locally."""
-
-from __future__ import annotations
-
-import asyncio
-import socket
-from dataclasses import dataclass
-from functools import lru_cache
-from typing import TYPE_CHECKING, Any
-
-import asyncssh
-from rich.console import Console
-from rich.markup import escape
-
-if TYPE_CHECKING:
-    from .config import Config, Host
-
-_console = Console(highlight=False)
-_err_console = Console(stderr=True, highlight=False)
-
-LOCAL_ADDRESSES = frozenset({"local", "localhost", "127.0.0.1", "::1"})
-
-
-@lru_cache(maxsize=1)
-def _get_local_ips() -> frozenset[str]:
-    """Get all IP addresses of the current machine."""
-    ips: set[str] = set()
-    try:
-        hostname = socket.gethostname()
-        # Get all addresses for hostname
-        for info in socket.getaddrinfo(hostname, None):
-            addr = info[4][0]
-            if isinstance(addr, str):
-                ips.add(addr)
-        # Also try getting the default outbound IP
-        with socket.socket(socket.AF_INET, socket.SOCK_DGRAM) as s:
-            s.connect(("8.8.8.8", 80))
-            ips.add(s.getsockname()[0])
-    except OSError:
-        pass
-    return frozenset(ips)
-
-
-@dataclass
-class CommandResult:
-    """Result of a command execution."""
-
-    service: str
-    exit_code: int
-    success: bool
-    stdout: str = ""
-    stderr: str = ""
-
-
-def _is_local(host: Host) -> bool:
-    """Check if host should run locally (no SSH)."""
-    addr = host.address.lower()
-    if addr in LOCAL_ADDRESSES:
-        return True
-    # Check if address matches any of this machine's IPs
-    return addr in _get_local_ips()
-
-
-async def _run_local_command(
-    command: str,
-    service: str,
-    *,
-    stream: bool = True,
-) -> CommandResult:
-    """Run a command locally with streaming output."""
-    try:
-        proc = await asyncio.create_subprocess_shell(
-            command,
-            stdout=asyncio.subprocess.PIPE,
-            stderr=asyncio.subprocess.PIPE,
-        )
-
-        if stream and proc.stdout and proc.stderr:
-
-            async def read_stream(
-                reader: asyncio.StreamReader,
-                prefix: str,
-                *,
-                is_stderr: bool = False,
-            ) -> None:
-                console = _err_console if is_stderr else _console
-                while True:
-                    line = await reader.readline()
-                    if not line:
-                        break
-                    text = line.decode()
-                    if text.strip():  # Skip empty lines
-                        console.print(f"[cyan]\\[{prefix}][/] {escape(text)}", end="")
-
-            await asyncio.gather(
-                read_stream(proc.stdout, service),
-                read_stream(proc.stderr, service, is_stderr=True),
-            )
-
-        stdout_data = b""
-        stderr_data = b""
-        if not stream:
-            stdout_data, stderr_data = await proc.communicate()
-        else:
-            await proc.wait()
-
-        return CommandResult(
-            service=service,
-            exit_code=proc.returncode or 0,
-            success=proc.returncode == 0,
-            stdout=stdout_data.decode() if stdout_data else "",
-            stderr=stderr_data.decode() if stderr_data else "",
-        )
-    except OSError as e:
-        _err_console.print(f"[cyan]\\[{service}][/] [red]Local error:[/] {e}")
-        return CommandResult(service=service, exit_code=1, success=False)
-
-
-async def _run_ssh_command(
-    host: Host,
-    command: str,
-    service: str,
-    *,
-    stream: bool = True,
-) -> CommandResult:
-    """Run a command on a remote host via SSH with streaming output."""
-    proc: asyncssh.SSHClientProcess[Any]
-    try:
-        async with (
-            asyncssh.connect(
-                host.address,
-                port=host.port,
-                username=host.user,
-                known_hosts=None,
-            ) as conn,
-            conn.create_process(command) as proc,
-        ):
-            if stream:
-
-                async def read_stream(
-                    reader: Any,
-                    prefix: str,
-                    *,
-                    is_stderr: bool = False,
-                ) -> None:
-                    console = _err_console if is_stderr else _console
-                    async for line in reader:
-                        if line.strip():  # Skip empty lines
-                            console.print(f"[cyan]\\[{prefix}][/] {escape(line)}", end="")
-
-                await asyncio.gather(
-                    read_stream(proc.stdout, service),
-                    read_stream(proc.stderr, service, is_stderr=True),
-                )
-
-            stdout_data = ""
-            stderr_data = ""
-            if not stream:
-                stdout_data = await proc.stdout.read()
-                stderr_data = await proc.stderr.read()
-
-            await proc.wait()
-            return CommandResult(
-                service=service,
-                exit_code=proc.exit_status or 0,
-                success=proc.exit_status == 0,
-                stdout=stdout_data,
-                stderr=stderr_data,
-            )
-    except (OSError, asyncssh.Error) as e:
-        _err_console.print(f"[cyan]\\[{service}][/] [red]SSH error:[/] {e}")
-        return CommandResult(service=service, exit_code=1, success=False)
-
-
-async def run_command(
-    host: Host,
-    command: str,
-    service: str,
-    *,
-    stream: bool = True,
-) -> CommandResult:
-    """Run a command on a host (locally or via SSH)."""
-    if _is_local(host):
-        return await _run_local_command(command, service, stream=stream)
-    return await _run_ssh_command(host, command, service, stream=stream)
-
-
-async def run_compose(
-    config: Config,
-    service: str,
-    compose_cmd: str,
-    *,
-    stream: bool = True,
-) -> CommandResult:
-    """Run a docker compose command for a service."""
-    host = config.get_host(service)
-    compose_path = config.get_compose_path(service)
-
-    command = f"docker compose -f {compose_path} {compose_cmd}"
-    return await run_command(host, command, service, stream=stream)
-
-
-async def run_compose_on_host(
-    config: Config,
-    service: str,
-    host_name: str,
-    compose_cmd: str,
-    *,
-    stream: bool = True,
-) -> CommandResult:
-    """Run a docker compose command for a service on a specific host.
-
-    Used for migration - running 'down' on the old host before 'up' on new host.
-    """
-    host = config.hosts[host_name]
-    compose_path = config.get_compose_path(service)
-
-    command = f"docker compose -f {compose_path} {compose_cmd}"
-    return await run_command(host, command, service, stream=stream)
-
-
-async def run_on_services(
-    config: Config,
-    services: list[str],
-    compose_cmd: str,
-    *,
-    stream: bool = True,
-) -> list[CommandResult]:
-    """Run a docker compose command on multiple services in parallel."""
-    tasks = [run_compose(config, service, compose_cmd, stream=stream) for service in services]
-    return await asyncio.gather(*tasks)
-
-
-async def run_sequential_commands(
-    config: Config,
-    service: str,
-    commands: list[str],
-    *,
-    stream: bool = True,
-) -> CommandResult:
-    """Run multiple compose commands sequentially for a service."""
-    for cmd in commands:
-        result = await run_compose(config, service, cmd, stream=stream)
-        if not result.success:
-            return result
-    return CommandResult(service=service, exit_code=0, success=True)
-
-
-async def run_sequential_on_services(
-    config: Config,
-    services: list[str],
-    commands: list[str],
-    *,
-    stream: bool = True,
-) -> list[CommandResult]:
-    """Run sequential commands on multiple services in parallel."""
-    tasks = [
-        run_sequential_commands(config, service, commands, stream=stream) for service in services
-    ]
-    return await asyncio.gather(*tasks)
-
-
-async def check_service_running(
-    config: Config,
-    service: str,
-    host_name: str,
-) -> bool:
-    """Check if a service has running containers on a specific host."""
-    host = config.hosts[host_name]
-    compose_path = config.get_compose_path(service)
-
-    # Use ps --status running to check for running containers
-    command = f"docker compose -f {compose_path} ps --status running -q"
-    result = await run_command(host, command, service, stream=False)
-
-    # If command succeeded and has output, containers are running
-    return result.success and bool(result.stdout.strip())
--- a/src/compose_farm/ssh_keys.py
+++ b/src/compose_farm/ssh_keys.py
@@ -0,0 +1,67 @@
+"""SSH key utilities for compose-farm."""
+
+from __future__ import annotations
+
+import os
+from pathlib import Path
+
+# Default key paths for compose-farm SSH key
+# Keys are stored in a subdirectory for cleaner docker volume mounting
+SSH_KEY_DIR = Path.home() / ".ssh" / "compose-farm"
+SSH_KEY_PATH = SSH_KEY_DIR / "id_ed25519"
+SSH_PUBKEY_PATH = SSH_KEY_PATH.with_suffix(".pub")
+
+
+def get_ssh_auth_sock() -> str | None:
+    """Get SSH_AUTH_SOCK, auto-detecting forwarded agent if needed.
+
+    Checks in order:
+    1. SSH_AUTH_SOCK environment variable (if socket exists)
+    2. Forwarded agent sockets in ~/.ssh/agent/ (most recent first)
+
+    Returns the socket path or None if no valid socket found.
+    """
+    sock = os.environ.get("SSH_AUTH_SOCK")
+    if sock and Path(sock).is_socket():
+        return sock
+
+    # Try to find a forwarded SSH agent socket
+    agent_dir = Path.home() / ".ssh" / "agent"
+    if agent_dir.is_dir():
+        sockets = sorted(
+            agent_dir.glob("s.*.sshd.*"), key=lambda p: p.stat().st_mtime, reverse=True
+        )
+        for s in sockets:
+            if s.is_socket():
+                return str(s)
+    return None
+
+
+def get_ssh_env() -> dict[str, str]:
+    """Get environment dict for SSH subprocess with auto-detected agent.
+
+    Returns a copy of the current environment with SSH_AUTH_SOCK set
+    to the auto-detected agent socket (if found).
+    """
+    env = os.environ.copy()
+    sock = get_ssh_auth_sock()
+    if sock:
+        env["SSH_AUTH_SOCK"] = sock
+    return env
+
+
+def key_exists() -> bool:
+    """Check if the compose-farm SSH key pair exists."""
+    return SSH_KEY_PATH.exists() and SSH_PUBKEY_PATH.exists()
+
+
+def get_key_path() -> Path | None:
+    """Get the SSH key path if it exists, None otherwise."""
+    return SSH_KEY_PATH if key_exists() else None
+
+
+def get_pubkey_content() -> str | None:
+    """Get the public key content if it exists, None otherwise."""
+    if not SSH_PUBKEY_PATH.exists():
+        return None
+    return SSH_PUBKEY_PATH.read_text().strip()
--- a/src/compose_farm/state.py
+++ b/src/compose_farm/state.py
@@ -2,18 +2,55 @@

 from __future__ import annotations

+import contextlib
 from typing import TYPE_CHECKING, Any

 import yaml

 if TYPE_CHECKING:
+    from collections.abc import Generator, Mapping
+
    from .config import Config


-def load_state(config: Config) -> dict[str, str]:
+def group_services_by_host(
+    services: dict[str, str | list[str]],
+    hosts: Mapping[str, object],
+    all_hosts: list[str] | None = None,
+) -> dict[str, list[str]]:
+    """Group services by their assigned host(s).
+
+    For multi-host services (list or "all"), the service appears in multiple host lists.
+    """
+    by_host: dict[str, list[str]] = {h: [] for h in hosts}
+    for service, host_value in services.items():
+        if isinstance(host_value, list):
+            for host_name in host_value:
+                if host_name in by_host:
+                    by_host[host_name].append(service)
+        elif host_value == "all" and all_hosts:
+            for host_name in all_hosts:
+                if host_name in by_host:
+                    by_host[host_name].append(service)
+        elif host_value in by_host:
+            by_host[host_value].append(service)
+    return by_host
+
+
+def group_running_services_by_host(
+    state: dict[str, str | list[str]],
+    hosts: Mapping[str, object],
+) -> dict[str, list[str]]:
+    """Group running services by host, filtering out hosts with no services."""
+    by_host = group_services_by_host(state, hosts)
+    return {h: svcs for h, svcs in by_host.items() if svcs}
+
+
+def load_state(config: Config) -> dict[str, str | list[str]]:
    """Load the current deployment state.

-    Returns a dict mapping service names to host names.
+    Returns a dict mapping service names to host name(s).
+    Multi-host services store a list of hosts.
    """
    state_path = config.get_state_path()
    if not state_path.exists():
@@ -22,32 +59,141 @@ def load_state(config: Config) -> dict[str, str]:
    with state_path.open() as f:
        data: dict[str, Any] = yaml.safe_load(f) or {}

-    deployed: dict[str, str] = data.get("deployed", {})
+    deployed: dict[str, str | list[str]] = data.get("deployed", {})
    return deployed


-def save_state(config: Config, deployed: dict[str, str]) -> None:
+def _sorted_dict(d: dict[str, str | list[str]]) -> dict[str, str | list[str]]:
+    """Return a dictionary sorted by keys."""
+    return dict(sorted(d.items(), key=lambda item: item[0]))
+
+
+def save_state(config: Config, deployed: dict[str, str | list[str]]) -> None:
    """Save the deployment state."""
    state_path = config.get_state_path()
    with state_path.open("w") as f:
-        yaml.safe_dump({"deployed": deployed}, f, sort_keys=False)
+        yaml.safe_dump({"deployed": _sorted_dict(deployed)}, f, sort_keys=False)
+
+
+@contextlib.contextmanager
+def _modify_state(config: Config) -> Generator[dict[str, str | list[str]], None, None]:
+    """Context manager to load, modify, and save state."""
+    state = load_state(config)
+    yield state
+    save_state(config, state)


 def get_service_host(config: Config, service: str) -> str | None:
-    """Get the host where a service is currently deployed."""
+    """Get the host where a service is currently deployed.
+
+    For multi-host services, returns the first host or None.
+    """
    state = load_state(config)
-    return state.get(service)
+    value = state.get(service)
+    if value is None:
+        return None
+    if isinstance(value, list):
+        return value[0] if value else None
+    return value


 def set_service_host(config: Config, service: str, host: str) -> None:
    """Record that a service is deployed on a host."""
-    state = load_state(config)
-    state[service] = host
-    save_state(config, state)
+    with _modify_state(config) as state:
+        state[service] = host
+
+
+def set_multi_host_service(config: Config, service: str, hosts: list[str]) -> None:
+    """Record that a multi-host service is deployed on multiple hosts."""
+    with _modify_state(config) as state:
+        state[service] = hosts


 def remove_service(config: Config, service: str) -> None:
    """Remove a service from the state (after down)."""
+    with _modify_state(config) as state:
+        state.pop(service, None)
+
+
+def add_service_to_host(config: Config, service: str, host: str) -> None:
+    """Add a specific host to a service's state.
+
+    For multi-host services, adds the host to the list if not present.
+    For single-host services, sets the host.
+    """
+    with _modify_state(config) as state:
+        current = state.get(service)
+
+        if config.is_multi_host(service):
+            # Multi-host: add to list if not present
+            if isinstance(current, list):
+                if host not in current:
+                    state[service] = [*current, host]
+            else:
+                state[service] = [host]
+        else:
+            # Single-host: just set it
+            state[service] = host
+
+
+def remove_service_from_host(config: Config, service: str, host: str) -> None:
+    """Remove a specific host from a service's state.
+
+    For multi-host services, removes just that host from the list.
+    For single-host services, removes the service entirely if host matches.
+    """
+    with _modify_state(config) as state:
+        current = state.get(service)
+        if current is None:
+            return
+
+        if isinstance(current, list):
+            # Multi-host: remove this host from list
+            remaining = [h for h in current if h != host]
+            if remaining:
+                state[service] = remaining
+            else:
+                state.pop(service, None)
+        elif current == host:
+            # Single-host: remove if matches
+            state.pop(service, None)
+
+
+def get_services_needing_migration(config: Config) -> list[str]:
+    """Get services where current host differs from configured host.
+
+    Multi-host services are never considered for migration.
+    """
+    needs_migration = []
+    for service in config.services:
+        # Skip multi-host services
+        if config.is_multi_host(service):
+            continue
+
+        configured_host = config.get_hosts(service)[0]
+        current_host = get_service_host(config, service)
+        if current_host and current_host != configured_host:
+            needs_migration.append(service)
+    return needs_migration
+
+
+def get_orphaned_services(config: Config) -> dict[str, str | list[str]]:
+    """Get services that are in state but not in config.
+
+    These are services that were previously deployed but have been
+    removed from the config file (e.g., commented out).
+
+    Returns a dict mapping service name to host(s) where it's deployed.
+    """
    state = load_state(config)
-    state.pop(service, None)
-    save_state(config, state)
+    return {service: hosts for service, hosts in state.items() if service not in config.services}
+
+
+def get_services_not_in_state(config: Config) -> list[str]:
+    """Get services that are in config but not in state.
+
+    These are services that should be running but aren't tracked
+    (e.g., newly added to config, or previously stopped as orphans).
+    """
+    state = load_state(config)
+    return [service for service in config.services if service not in state]
--- a/src/compose_farm/traefik.py
+++ b/src/compose_farm/traefik.py
@@ -8,31 +8,25 @@ use host-published ports for cross-host reachability.

 from __future__ import annotations

-import os
-import re
 from dataclasses import dataclass
 from typing import TYPE_CHECKING, Any

 import yaml

-from .ssh import LOCAL_ADDRESSES
+from .compose import (
+    PortMapping,
+    get_ports_for_service,
+    load_compose_services,
+    normalize_labels,
+)
+from .executor import LOCAL_ADDRESSES

 if TYPE_CHECKING:
-    from pathlib import Path
-
    from .config import Config


-@dataclass(frozen=True)
-class PortMapping:
-    """Port mapping for a compose service."""
-
-    target: int
-    published: int | None
-
-
@dataclass
-class TraefikServiceSource:
+class _TraefikServiceSource:
    """Source information to build an upstream for a Traefik service."""

    traefik_service: str
@@ -44,120 +38,9 @@ class TraefikServiceSource:
    scheme: str | None = None


-LIST_VALUE_KEYS = {"entrypoints", "middlewares"}
-SINGLE_PART = 1
-PUBLISHED_TARGET_PARTS = 2
-HOST_PUBLISHED_PARTS = 3
-MIN_ROUTER_PARTS = 3
-MIN_SERVICE_LABEL_PARTS = 6
-_VAR_PATTERN = re.compile(r"\$\{([A-Za-z_][A-Za-z0-9_]*)(?::-(.*?))?\}")
-
-
-def _load_env(compose_path: Path) -> dict[str, str]:
-    """Load environment variables for compose interpolation."""
-    env: dict[str, str] = {}
-    env_path = compose_path.parent / ".env"
-    if env_path.exists():
-        for line in env_path.read_text().splitlines():
-            stripped = line.strip()
-            if not stripped or stripped.startswith("#") or "=" not in stripped:
-                continue
-            key, value = stripped.split("=", 1)
-            key = key.strip()
-            value = value.strip()
-            if (value.startswith('"') and value.endswith('"')) or (
-                value.startswith("'") and value.endswith("'")
-            ):
-                value = value[1:-1]
-            env[key] = value
-    env.update({k: v for k, v in os.environ.items() if isinstance(v, str)})
-    return env
-
-
-def _interpolate(value: str, env: dict[str, str]) -> str:
-    """Perform a minimal `${VAR}`/`${VAR:-default}` interpolation."""
-
-    def replace(match: re.Match[str]) -> str:
-        var = match.group(1)
-        default = match.group(2)
-        resolved = env.get(var)
-        if resolved:
-            return resolved
-        return default or ""
-
-    return _VAR_PATTERN.sub(replace, value)
-
-
-def _normalize_labels(raw: Any, env: dict[str, str]) -> dict[str, str]:
-    if raw is None:
-        return {}
-    if isinstance(raw, dict):
-        return {
-            _interpolate(str(k), env): _interpolate(str(v), env)
-            for k, v in raw.items()
-            if k is not None
-        }
-    if isinstance(raw, list):
-        labels: dict[str, str] = {}
-        for item in raw:
-            if not isinstance(item, str) or "=" not in item:
-                continue
-            key_raw, value_raw = item.split("=", 1)
-            key = _interpolate(key_raw.strip(), env)
-            value = _interpolate(value_raw.strip(), env)
-            labels[key] = value
-        return labels
-    return {}
-
-
-def _parse_ports(raw: Any, env: dict[str, str]) -> list[PortMapping]:  # noqa: PLR0912
-    if raw is None:
-        return []
-    mappings: list[PortMapping] = []
-
-    items = raw if isinstance(raw, list) else [raw]
-
-    for item in items:
-        if isinstance(item, str):
-            interpolated = _interpolate(item, env)
-            port_spec, _, _ = interpolated.partition("/")
-            parts = port_spec.split(":")
-            published: int | None = None
-            target: int | None = None
-
-            if len(parts) == SINGLE_PART and parts[0].isdigit():
-                target = int(parts[0])
-            elif len(parts) == PUBLISHED_TARGET_PARTS and parts[0].isdigit() and parts[1].isdigit():
-                published = int(parts[0])
-                target = int(parts[1])
-            elif len(parts) == HOST_PUBLISHED_PARTS and parts[-2].isdigit() and parts[-1].isdigit():
-                published = int(parts[-2])
-                target = int(parts[-1])
-
-            if target is not None:
-                mappings.append(PortMapping(target=target, published=published))
-        elif isinstance(item, dict):
-            target_raw = item.get("target")
-            if isinstance(target_raw, str):
-                target_raw = _interpolate(target_raw, env)
-            if target_raw is None:
-                continue
-            try:
-                target_val = int(str(target_raw))
-            except (TypeError, ValueError):
-                continue
-
-            published_raw = item.get("published")
-            if isinstance(published_raw, str):
-                published_raw = _interpolate(published_raw, env)
-            published_val: int | None
-            try:
-                published_val = int(str(published_raw)) if published_raw is not None else None
-            except (TypeError, ValueError):
-                published_val = None
-            mappings.append(PortMapping(target=target_val, published=published_val))
-
-    return mappings
+_LIST_VALUE_KEYS = {"entrypoints", "middlewares"}
+_MIN_ROUTER_PARTS = 3
+_MIN_SERVICE_LABEL_PARTS = 6


 def _parse_value(key: str, raw_value: str) -> Any:
@@ -168,7 +51,7 @@ def _parse_value(key: str, raw_value: str) -> Any:
    if value.isdigit():
        return int(value)
    last_segment = key.rsplit(".", 1)[-1]
-    if last_segment in LIST_VALUE_KEYS:
+    if last_segment in _LIST_VALUE_KEYS:
        parts = [v.strip() for v in value.split(",")] if "," in value else [value]
        return [part for part in parts if part]
    return value
@@ -219,7 +102,7 @@ def _insert(root: dict[str, Any], key_path: list[str], value: Any) -> None:  # n
            current = container_list[list_index]


-def _resolve_published_port(source: TraefikServiceSource) -> tuple[int | None, str | None]:
+def _resolve_published_port(source: _TraefikServiceSource) -> tuple[int | None, str | None]:
    """Resolve host-published port for a Traefik service.

    Returns (published_port, warning_message).
@@ -255,23 +138,9 @@ def _resolve_published_port(source: TraefikServiceSource) -> tuple[int | None, s
    )


-def _load_stack(config: Config, stack: str) -> tuple[dict[str, Any], dict[str, str], str]:
-    compose_path = config.get_compose_path(stack)
-    if not compose_path.exists():
-        message = f"[{stack}] Compose file not found: {compose_path}"
-        raise FileNotFoundError(message)
-
-    env = _load_env(compose_path)
-    compose_data = yaml.safe_load(compose_path.read_text()) or {}
-    raw_services = compose_data.get("services", {})
-    if not isinstance(raw_services, dict):
-        return {}, env, config.get_host(stack).address
-    return raw_services, env, config.get_host(stack).address
-
-
 def _finalize_http_services(
    dynamic: dict[str, Any],
-    sources: dict[str, TraefikServiceSource],
+    sources: dict[str, _TraefikServiceSource],
    warnings: list[str],
 ) -> None:
    for traefik_service, source in sources.items():
@@ -342,7 +211,7 @@ def _process_router_label(
    if not key_without_prefix.startswith("http.routers."):
        return
    router_parts = key_without_prefix.split(".")
-    if len(router_parts) < MIN_ROUTER_PARTS:
+    if len(router_parts) < _MIN_ROUTER_PARTS:
        return
    router_name = router_parts[2]
    router_remainder = router_parts[3:]
@@ -360,12 +229,12 @@ def _process_service_label(
    host_address: str,
    ports: list[PortMapping],
    service_names: set[str],
-    sources: dict[str, TraefikServiceSource],
+    sources: dict[str, _TraefikServiceSource],
 ) -> None:
    if not key_without_prefix.startswith("http.services."):
        return
    parts = key_without_prefix.split(".")
-    if len(parts) < MIN_SERVICE_LABEL_PARTS:
+    if len(parts) < _MIN_SERVICE_LABEL_PARTS:
        return
    traefik_service = parts[2]
    service_names.add(traefik_service)
@@ -373,7 +242,7 @@ def _process_service_label(

    source = sources.get(traefik_service)
    if source is None:
-        source = TraefikServiceSource(
+        source = _TraefikServiceSource(
            traefik_service=traefik_service,
            stack=stack,
            compose_service=compose_service,
@@ -390,23 +259,6 @@ def _process_service_label(
        source.scheme = str(_parse_value(key_without_prefix, label_value))


-def _get_ports_for_service(
-    definition: dict[str, Any],
-    all_services: dict[str, Any],
-    env: dict[str, str],
-) -> list[PortMapping]:
-    """Get ports for a service, following network_mode: service:X if present."""
-    network_mode = definition.get("network_mode", "")
-    if isinstance(network_mode, str) and network_mode.startswith("service:"):
-        # Service uses another service's network - get ports from that service
-        ref_service = network_mode[len("service:") :]
-        if ref_service in all_services:
-            ref_def = all_services[ref_service]
-            if isinstance(ref_def, dict):
-                return _parse_ports(ref_def.get("ports"), env)
-    return _parse_ports(definition.get("ports"), env)
-
-
 def _process_service_labels(
    stack: str,
    compose_service: str,
@@ -415,17 +267,17 @@ def _process_service_labels(
    host_address: str,
    env: dict[str, str],
    dynamic: dict[str, Any],
-    sources: dict[str, TraefikServiceSource],
+    sources: dict[str, _TraefikServiceSource],
    warnings: list[str],
 ) -> None:
-    labels = _normalize_labels(definition.get("labels"), env)
+    labels = normalize_labels(definition.get("labels"), env)
    if not labels:
        return
    enable_raw = labels.get("traefik.enable")
    if enable_raw is not None and _parse_value("enable", enable_raw) is False:
        return

-    ports = _get_ports_for_service(definition, all_services, env)
+    ports = get_ports_for_service(definition, all_services, env)
    routers: dict[str, bool] = {}
    service_names: set[str] = set()

@@ -476,7 +328,7 @@ def generate_traefik_config(
    """
    dynamic: dict[str, Any] = {}
    warnings: list[str] = []
-    sources: dict[str, TraefikServiceSource] = {}
+    sources: dict[str, _TraefikServiceSource] = {}

    # Determine Traefik's host from service assignment
    traefik_host = None
@@ -484,7 +336,7 @@ def generate_traefik_config(
        traefik_host = config.services.get(config.traefik_service)

    for stack in services:
-        raw_services, env, host_address = _load_stack(config, stack)
+        raw_services, env, host_address = load_compose_services(config, stack)
        stack_host = config.services.get(stack)

        # Skip services on Traefik's host - docker provider handles them directly
@@ -512,3 +364,22 @@ def generate_traefik_config(

    _finalize_http_services(dynamic, sources, warnings)
    return dynamic, warnings
+
+
+_TRAEFIK_CONFIG_HEADER = """\
+# Auto-generated by compose-farm
+# https://github.com/basnijholt/compose-farm
+#
+# This file routes traffic to services running on hosts other than Traefik's host.
+# Services on Traefik's host use the Docker provider directly.
+#
+# Regenerate with: compose-farm traefik-file --all -o <this-file>
+# Or configure traefik_file in compose-farm.yaml for automatic updates.
+
+"""
+
+
+def render_traefik_config(dynamic: dict[str, Any]) -> str:
+    """Render Traefik dynamic config as YAML with a header comment."""
+    body = yaml.safe_dump(dynamic, sort_keys=False)
+    return _TRAEFIK_CONFIG_HEADER + body
--- a/src/compose_farm/web/init.py
+++ b/src/compose_farm/web/init.py
@@ -0,0 +1,7 @@
+"""Compose Farm Web UI."""
+
+from __future__ import annotations
+
+from compose_farm.web.app import create_app
+
+__all__ = ["create_app"]
--- a/src/compose_farm/web/app.py
+++ b/src/compose_farm/web/app.py
@@ -0,0 +1,68 @@
+"""FastAPI application setup."""
+
+from __future__ import annotations
+
+import asyncio
+import sys
+from contextlib import asynccontextmanager, suppress
+from typing import TYPE_CHECKING
+
+from fastapi import FastAPI
+from fastapi.staticfiles import StaticFiles
+from pydantic import ValidationError
+
+from compose_farm.web.deps import STATIC_DIR, get_config
+from compose_farm.web.routes import actions, api, pages
+from compose_farm.web.streaming import TASK_TTL_SECONDS, cleanup_stale_tasks
+
+if TYPE_CHECKING:
+    from collections.abc import AsyncGenerator
+
+
+async def _task_cleanup_loop() -> None:
+    """Periodically clean up stale completed tasks."""
+    while True:
+        await asyncio.sleep(TASK_TTL_SECONDS // 2)  # Run every 5 minutes
+        cleanup_stale_tasks()
+
+
+@asynccontextmanager
+async def lifespan(_app: FastAPI) -> AsyncGenerator[None, None]:
+    """Application lifespan handler."""
+    # Startup: pre-load config (ignore errors - handled per-request)
+    with suppress(ValidationError, FileNotFoundError):
+        get_config()
+
+    # Start background cleanup task
+    cleanup_task = asyncio.create_task(_task_cleanup_loop())
+
+    yield
+
+    # Shutdown: cancel cleanup task
+    cleanup_task.cancel()
+    with suppress(asyncio.CancelledError):
+        await cleanup_task
+
+
+def create_app() -> FastAPI:
+    """Create and configure the FastAPI application."""
+    app = FastAPI(
+        title="Compose Farm",
+        description="Web UI for managing Docker Compose services across multiple hosts",
+        lifespan=lifespan,
+    )
+
+    # Mount static files
+    app.mount("/static", StaticFiles(directory=str(STATIC_DIR)), name="static")
+
+    app.include_router(pages.router)
+    app.include_router(api.router, prefix="/api")
+    app.include_router(actions.router, prefix="/api")
+
+    # WebSocket routes use Unix-only modules (fcntl, pty)
+    if sys.platform != "win32":
+        from compose_farm.web.ws import router as ws_router  # noqa: PLC0415
+
+        app.include_router(ws_router)
+
+    return app
--- a/src/compose_farm/web/deps.py
+++ b/src/compose_farm/web/deps.py
@@ -0,0 +1,40 @@
+"""Shared dependencies for web modules.
+
+This module contains shared config and template accessors to avoid circular imports
+between app.py and route modules.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+from fastapi.templating import Jinja2Templates
+from pydantic import ValidationError
+
+if TYPE_CHECKING:
+    from compose_farm.config import Config
+
+# Paths
+WEB_DIR = Path(__file__).parent
+TEMPLATES_DIR = WEB_DIR / "templates"
+STATIC_DIR = WEB_DIR / "static"
+
+
+def get_config() -> Config:
+    """Load config from disk (always fresh)."""
+    from compose_farm.config import load_config  # noqa: PLC0415
+
+    return load_config()
+
+
+def get_templates() -> Jinja2Templates:
+    """Get Jinja2 templates instance."""
+    return Jinja2Templates(directory=str(TEMPLATES_DIR))
+
+
+def extract_config_error(exc: Exception) -> str:
+    """Extract a user-friendly error message from a config exception."""
+    if isinstance(exc, ValidationError):
+        return "; ".join(err.get("msg", str(err)) for err in exc.errors())
+    return str(exc)
--- a/src/compose_farm/web/routes/init.py
+++ b/src/compose_farm/web/routes/init.py
@@ -0,0 +1,5 @@
+"""Web routes."""
+
+from compose_farm.web.routes import actions, api, pages
+
+__all__ = ["actions", "api", "pages"]
--- a/src/compose_farm/web/routes/actions.py
+++ b/src/compose_farm/web/routes/actions.py
@@ -0,0 +1,95 @@
+"""Action routes for service operations."""
+
+from __future__ import annotations
+
+import asyncio
+import uuid
+from typing import TYPE_CHECKING, Any
+
+from fastapi import APIRouter, HTTPException
+
+if TYPE_CHECKING:
+    from collections.abc import Callable, Coroutine
+
+from compose_farm.web.deps import get_config
+from compose_farm.web.streaming import run_cli_streaming, run_compose_streaming, tasks
+
+router = APIRouter(tags=["actions"])
+
+# Store task references to prevent garbage collection
+_background_tasks: set[asyncio.Task[None]] = set()
+
+
+def _start_task(coro_factory: Callable[[str], Coroutine[Any, Any, None]]) -> str:
+    """Create a task, register it, and return the task_id."""
+    task_id = str(uuid.uuid4())
+    tasks[task_id] = {"status": "running", "output": []}
+
+    task: asyncio.Task[None] = asyncio.create_task(coro_factory(task_id))
+    _background_tasks.add(task)
+    task.add_done_callback(_background_tasks.discard)
+
+    return task_id
+
+
+async def _run_service_action(name: str, command: str) -> dict[str, Any]:
+    """Run a compose command for a service."""
+    config = get_config()
+
+    if name not in config.services:
+        raise HTTPException(status_code=404, detail=f"Service '{name}' not found")
+
+    task_id = _start_task(lambda tid: run_compose_streaming(config, name, command, tid))
+    return {"task_id": task_id, "service": name, "command": command}
+
+
+@router.post("/service/{name}/up")
+async def up_service(name: str) -> dict[str, Any]:
+    """Start a service."""
+    return await _run_service_action(name, "up")
+
+
+@router.post("/service/{name}/down")
+async def down_service(name: str) -> dict[str, Any]:
+    """Stop a service."""
+    return await _run_service_action(name, "down")
+
+
+@router.post("/service/{name}/restart")
+async def restart_service(name: str) -> dict[str, Any]:
+    """Restart a service (down + up)."""
+    return await _run_service_action(name, "restart")
+
+
+@router.post("/service/{name}/pull")
+async def pull_service(name: str) -> dict[str, Any]:
+    """Pull latest images for a service."""
+    return await _run_service_action(name, "pull")
+
+
+@router.post("/service/{name}/update")
+async def update_service(name: str) -> dict[str, Any]:
+    """Update a service (pull + build + down + up)."""
+    return await _run_service_action(name, "update")
+
+
+@router.post("/service/{name}/logs")
+async def logs_service(name: str) -> dict[str, Any]:
+    """Show logs for a service."""
+    return await _run_service_action(name, "logs")
+
+
+@router.post("/apply")
+async def apply_all() -> dict[str, Any]:
+    """Run cf apply to reconcile all services."""
+    config = get_config()
+    task_id = _start_task(lambda tid: run_cli_streaming(config, ["apply"], tid))
+    return {"task_id": task_id, "command": "apply"}
+
+
+@router.post("/refresh")
+async def refresh_state() -> dict[str, Any]:
+    """Refresh state from running services."""
+    config = get_config()
+    task_id = _start_task(lambda tid: run_cli_streaming(config, ["refresh"], tid))
+    return {"task_id": task_id, "command": "refresh"}
--- a/src/compose_farm/web/routes/api.py
+++ b/src/compose_farm/web/routes/api.py
@@ -0,0 +1,360 @@
+"""JSON API routes."""
+
+from __future__ import annotations
+
+import asyncio
+import contextlib
+import json
+import shlex
+from datetime import UTC, datetime
+from pathlib import Path
+from typing import Annotated, Any
+
+import asyncssh
+import yaml
+from fastapi import APIRouter, Body, HTTPException, Query
+from fastapi.responses import HTMLResponse
+
+from compose_farm.executor import is_local, run_compose_on_host, ssh_connect_kwargs
+from compose_farm.paths import find_config_path
+from compose_farm.state import load_state
+from compose_farm.web.deps import get_config, get_templates
+
+router = APIRouter(tags=["api"])
+
+
+def _validate_yaml(content: str) -> None:
+    """Validate YAML content, raise HTTPException on error."""
+    try:
+        yaml.safe_load(content)
+    except yaml.YAMLError as e:
+        raise HTTPException(status_code=400, detail=f"Invalid YAML: {e}") from e
+
+
+def _backup_file(file_path: Path) -> Path | None:
+    """Create a timestamped backup of a file if it exists and content differs.
+
+    Backups are stored in a .backups directory alongside the file.
+    Returns the backup path if created, None if no backup was needed.
+    """
+    if not file_path.exists():
+        return None
+
+    # Create backup directory
+    backup_dir = file_path.parent / ".backups"
+    backup_dir.mkdir(exist_ok=True)
+
+    # Generate timestamped backup filename
+    timestamp = datetime.now(tz=UTC).strftime("%Y%m%d_%H%M%S")
+    backup_name = f"{file_path.name}.{timestamp}"
+    backup_path = backup_dir / backup_name
+
+    # Copy current content to backup
+    backup_path.write_text(file_path.read_text())
+
+    # Clean up old backups (keep last 200)
+    backups = sorted(backup_dir.glob(f"{file_path.name}.*"), reverse=True)
+    for old_backup in backups[200:]:
+        old_backup.unlink()
+
+    return backup_path
+
+
+def _save_with_backup(file_path: Path, content: str) -> bool:
+    """Save content to file, creating a backup first if content changed.
+
+    Returns True if file was saved, False if content was unchanged.
+    """
+    # Check if content actually changed
+    if file_path.exists():
+        current_content = file_path.read_text()
+        if current_content == content:
+            return False  # No change, skip save
+        _backup_file(file_path)
+
+    file_path.write_text(content)
+    return True
+
+
+def _get_service_compose_path(name: str) -> Path:
+    """Get compose path for service, raising HTTPException if not found."""
+    config = get_config()
+
+    if name not in config.services:
+        raise HTTPException(status_code=404, detail=f"Service '{name}' not found")
+
+    compose_path = config.get_compose_path(name)
+    if not compose_path:
+        raise HTTPException(status_code=404, detail="Compose file not found")
+
+    return compose_path
+
+
+def _get_compose_services(config: Any, service: str, hosts: list[str]) -> list[dict[str, Any]]:
+    """Get container info from compose file (fast, local read).
+
+    Returns one entry per container per host for multi-host services.
+    """
+    compose_path = config.get_compose_path(service)
+    if not compose_path or not compose_path.exists():
+        return []
+
+    compose_data = yaml.safe_load(compose_path.read_text()) or {}
+    raw_services = compose_data.get("services", {})
+    if not isinstance(raw_services, dict):
+        return []
+
+    # Project name is the directory name (docker compose default)
+    project_name = compose_path.parent.name
+
+    containers = []
+    for host in hosts:
+        for svc_name, svc_def in raw_services.items():
+            # Use container_name if set, otherwise default to {project}-{service}-1
+            if isinstance(svc_def, dict) and svc_def.get("container_name"):
+                container_name = svc_def["container_name"]
+            else:
+                container_name = f"{project_name}-{svc_name}-1"
+            containers.append(
+                {
+                    "Name": container_name,
+                    "Service": svc_name,
+                    "Host": host,
+                    "State": "unknown",  # Status requires Docker query
+                }
+            )
+    return containers
+
+
+async def _get_container_states(
+    config: Any, service: str, containers: list[dict[str, Any]]
+) -> list[dict[str, Any]]:
+    """Query Docker for actual container states on a single host."""
+    if not containers:
+        return containers
+
+    # All containers should be on the same host
+    host_name = containers[0]["Host"]
+
+    result = await run_compose_on_host(config, service, host_name, "ps --format json", stream=False)
+    if not result.success:
+        return containers
+
+    # Build state map
+    state_map: dict[str, str] = {}
+    for line in result.stdout.strip().split("\n"):
+        if line.strip():
+            with contextlib.suppress(json.JSONDecodeError):
+                data = json.loads(line)
+                state_map[data.get("Name", "")] = data.get("State", "unknown")
+
+    # Update container states
+    for c in containers:
+        if c["Name"] in state_map:
+            c["State"] = state_map[c["Name"]]
+
+    return containers
+
+
+def _render_containers(
+    service: str, host: str, containers: list[dict[str, Any]], *, show_header: bool = False
+) -> str:
+    """Render containers HTML using Jinja template."""
+    templates = get_templates()
+    template = templates.env.get_template("partials/containers.html")
+    module = template.make_module()
+    result: str = module.host_containers(service, host, containers, show_header=show_header)
+    return result
+
+
+@router.get("/service/{name}/containers", response_class=HTMLResponse)
+async def get_containers(name: str, host: str | None = None) -> HTMLResponse:
+    """Get containers for a service as HTML buttons.
+
+    If host is specified, queries Docker for that host's status.
+    Otherwise returns all hosts with loading spinners that auto-fetch.
+    """
+    config = get_config()
+
+    if name not in config.services:
+        raise HTTPException(status_code=404, detail=f"Service '{name}' not found")
+
+    # Get hosts where service is running from state
+    state = load_state(config)
+    current_hosts = state.get(name)
+    if not current_hosts:
+        return HTMLResponse('<span class="text-base-content/60">Service not running</span>')
+
+    all_hosts = current_hosts if isinstance(current_hosts, list) else [current_hosts]
+
+    # If host specified, return just that host's containers with status
+    if host:
+        if host not in all_hosts:
+            return HTMLResponse(f'<span class="text-error">Host {host} not found</span>')
+
+        containers = _get_compose_services(config, name, [host])
+        containers = await _get_container_states(config, name, containers)
+        return HTMLResponse(_render_containers(name, host, containers))
+
+    # Initial load: return all hosts with loading spinners, each fetches its own status
+    html_parts = []
+    is_multi_host = len(all_hosts) > 1
+
+    for h in all_hosts:
+        host_id = f"containers-{name}-{h}".replace(".", "-")
+        containers = _get_compose_services(config, name, [h])
+
+        if is_multi_host:
+            html_parts.append(f'<div class="font-semibold text-sm mt-3 mb-1">{h}</div>')
+
+        # Container for this host that auto-fetches its own status
+        html_parts.append(f"""
+            <div id="{host_id}"
+                 hx-get="/api/service/{name}/containers?host={h}"
+                 hx-trigger="load"
+                 hx-target="this"
+                 hx-select="unset"
+                 hx-swap="innerHTML">
+                {_render_containers(name, h, containers)}
+            </div>
+        """)
+
+    return HTMLResponse("".join(html_parts))
+
+
+@router.put("/service/{name}/compose")
+async def save_compose(
+    name: str, content: Annotated[str, Body(media_type="text/plain")]
+) -> dict[str, Any]:
+    """Save compose file content."""
+    compose_path = _get_service_compose_path(name)
+    _validate_yaml(content)
+    saved = _save_with_backup(compose_path, content)
+    msg = "Compose file saved" if saved else "No changes to save"
+    return {"success": True, "message": msg}
+
+
+@router.put("/service/{name}/env")
+async def save_env(
+    name: str, content: Annotated[str, Body(media_type="text/plain")]
+) -> dict[str, Any]:
+    """Save .env file content."""
+    env_path = _get_service_compose_path(name).parent / ".env"
+    saved = _save_with_backup(env_path, content)
+    msg = ".env file saved" if saved else "No changes to save"
+    return {"success": True, "message": msg}
+
+
+@router.put("/config")
+async def save_config(
+    content: Annotated[str, Body(media_type="text/plain")],
+) -> dict[str, Any]:
+    """Save compose-farm.yaml config file."""
+    config_path = find_config_path()
+    if not config_path:
+        raise HTTPException(status_code=404, detail="Config file not found")
+
+    _validate_yaml(content)
+    saved = _save_with_backup(config_path, content)
+    msg = "Config saved" if saved else "No changes to save"
+    return {"success": True, "message": msg}
+
+
+async def _read_file_local(path: str) -> str:
+    """Read a file from the local filesystem."""
+    expanded = Path(path).expanduser()
+    return await asyncio.to_thread(expanded.read_text, encoding="utf-8")
+
+
+async def _write_file_local(path: str, content: str) -> bool:
+    """Write content to a file on the local filesystem with backup.
+
+    Returns True if file was saved, False if content was unchanged.
+    """
+    expanded = Path(path).expanduser()
+    return await asyncio.to_thread(_save_with_backup, expanded, content)
+
+
+async def _read_file_remote(host: Any, path: str) -> str:
+    """Read a file from a remote host via SSH."""
+    # Expand ~ on remote by using shell
+    cmd = f"cat {shlex.quote(path)}"
+    if path.startswith("~/"):
+        cmd = f"cat ~/{shlex.quote(path[2:])}"
+
+    async with asyncssh.connect(**ssh_connect_kwargs(host)) as conn:
+        result = await conn.run(cmd, check=True)
+        stdout = result.stdout or ""
+        return stdout.decode() if isinstance(stdout, bytes) else stdout
+
+
+async def _write_file_remote(host: Any, path: str, content: str) -> None:
+    """Write content to a file on a remote host via SSH."""
+    # Expand ~ on remote by using shell
+    target_path = f"~/{path[2:]}" if path.startswith("~/") else path
+    cmd = f"cat > {shlex.quote(target_path)}"
+
+    async with asyncssh.connect(**ssh_connect_kwargs(host)) as conn:
+        result = await conn.run(cmd, input=content, check=True)
+        if result.returncode != 0:
+            stderr = result.stderr.decode() if isinstance(result.stderr, bytes) else result.stderr
+            msg = f"Failed to write file: {stderr}"
+            raise RuntimeError(msg)
+
+
+def _get_console_host(host: str, path: str) -> Any:
+    """Validate and return host config for console file operations."""
+    config = get_config()
+    host_config = config.hosts.get(host)
+
+    if not host_config:
+        raise HTTPException(status_code=404, detail=f"Host '{host}' not found")
+    if not path:
+        raise HTTPException(status_code=400, detail="Path is required")
+
+    return host_config
+
+
+@router.get("/console/file")
+async def read_console_file(
+    host: Annotated[str, Query(description="Host name")],
+    path: Annotated[str, Query(description="File path")],
+) -> dict[str, Any]:
+    """Read a file from a host for the console editor."""
+    host_config = _get_console_host(host, path)
+
+    try:
+        if is_local(host_config):
+            content = await _read_file_local(path)
+        else:
+            content = await _read_file_remote(host_config, path)
+        return {"success": True, "content": content}
+    except FileNotFoundError:
+        raise HTTPException(status_code=404, detail=f"File not found: {path}") from None
+    except PermissionError:
+        raise HTTPException(status_code=403, detail=f"Permission denied: {path}") from None
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e)) from e
+
+
+@router.put("/console/file")
+async def write_console_file(
+    host: Annotated[str, Query(description="Host name")],
+    path: Annotated[str, Query(description="File path")],
+    content: Annotated[str, Body(media_type="text/plain")],
+) -> dict[str, Any]:
+    """Write a file to a host from the console editor."""
+    host_config = _get_console_host(host, path)
+
+    try:
+        if is_local(host_config):
+            saved = await _write_file_local(path, content)
+            msg = f"Saved: {path}" if saved else "No changes to save"
+        else:
+            await _write_file_remote(host_config, path, content)
+            msg = f"Saved: {path}"  # Remote doesn't track changes
+        return {"success": True, "message": msg}
+    except PermissionError:
+        raise HTTPException(status_code=403, detail=f"Permission denied: {path}") from None
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e)) from e
--- a/src/compose_farm/web/routes/pages.py
+++ b/src/compose_farm/web/routes/pages.py
@@ -0,0 +1,284 @@
+"""HTML page routes."""
+
+from __future__ import annotations
+
+import yaml
+from fastapi import APIRouter, Request
+from fastapi.responses import HTMLResponse
+from pydantic import ValidationError
+
+from compose_farm.executor import is_local
+from compose_farm.paths import find_config_path
+from compose_farm.state import (
+    get_orphaned_services,
+    get_service_host,
+    get_services_needing_migration,
+    get_services_not_in_state,
+    group_running_services_by_host,
+    load_state,
+)
+from compose_farm.web.deps import (
+    extract_config_error,
+    get_config,
+    get_templates,
+)
+
+router = APIRouter()
+
+
+@router.get("/console", response_class=HTMLResponse)
+async def console(request: Request) -> HTMLResponse:
+    """Console page with terminal and editor."""
+    config = get_config()
+    templates = get_templates()
+
+    # Find local host and sort it first
+    local_host = None
+    for name, host in config.hosts.items():
+        if is_local(host):
+            local_host = name
+            break
+
+    # Sort hosts with local first
+    hosts = sorted(config.hosts.keys())
+    if local_host:
+        hosts = [local_host] + [h for h in hosts if h != local_host]
+
+    # Get config path for default editor file
+    config_path = str(config.config_path) if config.config_path else ""
+
+    return templates.TemplateResponse(
+        "console.html",
+        {
+            "request": request,
+            "hosts": hosts,
+            "local_host": local_host,
+            "config_path": config_path,
+        },
+    )
+
+
+@router.get("/", response_class=HTMLResponse)
+async def index(request: Request) -> HTMLResponse:
+    """Dashboard page - combined view of all cluster info."""
+    templates = get_templates()
+
+    # Try to load config, handle errors gracefully
+    config_error = None
+    try:
+        config = get_config()
+    except (ValidationError, FileNotFoundError) as e:
+        config_error = extract_config_error(e)
+
+        # Read raw config content for the editor
+        config_path = find_config_path()
+        config_content = config_path.read_text() if config_path else ""
+
+        return templates.TemplateResponse(
+            "index.html",
+            {
+                "request": request,
+                "config_error": config_error,
+                "hosts": {},
+                "services": {},
+                "config_content": config_content,
+                "state_content": "",
+                "running_count": 0,
+                "stopped_count": 0,
+                "orphaned": [],
+                "migrations": [],
+                "not_started": [],
+                "services_by_host": {},
+            },
+        )
+
+    # Get state
+    deployed = load_state(config)
+
+    # Stats
+    running_count = len(deployed)
+    stopped_count = len(config.services) - running_count
+
+    # Pending operations
+    orphaned = get_orphaned_services(config)
+    migrations = get_services_needing_migration(config)
+    not_started = get_services_not_in_state(config)
+
+    # Group services by host (filter out hosts with no running services)
+    services_by_host = group_running_services_by_host(deployed, config.hosts)
+
+    # Config file content
+    config_content = ""
+    if config.config_path and config.config_path.exists():
+        config_content = config.config_path.read_text()
+
+    # State file content
+    state_content = yaml.dump({"deployed": deployed}, default_flow_style=False, sort_keys=False)
+
+    return templates.TemplateResponse(
+        "index.html",
+        {
+            "request": request,
+            "config_error": None,
+            # Config data
+            "hosts": config.hosts,
+            "services": config.services,
+            "config_content": config_content,
+            # State data
+            "state_content": state_content,
+            # Stats
+            "running_count": running_count,
+            "stopped_count": stopped_count,
+            # Pending operations
+            "orphaned": orphaned,
+            "migrations": migrations,
+            "not_started": not_started,
+            # Services by host
+            "services_by_host": services_by_host,
+        },
+    )
+
+
+@router.get("/service/{name}", response_class=HTMLResponse)
+async def service_detail(request: Request, name: str) -> HTMLResponse:
+    """Service detail page."""
+    config = get_config()
+    templates = get_templates()
+
+    # Get compose file content
+    compose_path = config.get_compose_path(name)
+    compose_content = ""
+    if compose_path and compose_path.exists():
+        compose_content = compose_path.read_text()
+
+    # Get .env file content
+    env_content = ""
+    env_path = None
+    if compose_path:
+        env_path = compose_path.parent / ".env"
+        if env_path.exists():
+            env_content = env_path.read_text()
+
+    # Get host info
+    hosts = config.get_hosts(name)
+
+    # Get state
+    current_host = get_service_host(config, name)
+
+    return templates.TemplateResponse(
+        "service.html",
+        {
+            "request": request,
+            "name": name,
+            "hosts": hosts,
+            "current_host": current_host,
+            "compose_content": compose_content,
+            "compose_path": str(compose_path) if compose_path else None,
+            "env_content": env_content,
+            "env_path": str(env_path) if env_path else None,
+        },
+    )
+
+
+@router.get("/partials/sidebar", response_class=HTMLResponse)
+async def sidebar_partial(request: Request) -> HTMLResponse:
+    """Sidebar service list partial."""
+    config = get_config()
+    templates = get_templates()
+
+    state = load_state(config)
+
+    # Build service -> host mapping (empty string for multi-host services)
+    service_hosts = {
+        svc: "" if host_val == "all" or isinstance(host_val, list) else host_val
+        for svc, host_val in config.services.items()
+    }
+
+    return templates.TemplateResponse(
+        "partials/sidebar.html",
+        {
+            "request": request,
+            "services": sorted(config.services.keys()),
+            "service_hosts": service_hosts,
+            "hosts": sorted(config.hosts.keys()),
+            "state": state,
+        },
+    )
+
+
+@router.get("/partials/config-error", response_class=HTMLResponse)
+async def config_error_partial(request: Request) -> HTMLResponse:
+    """Config error banner partial."""
+    templates = get_templates()
+    try:
+        get_config()
+        return HTMLResponse("")  # No error
+    except (ValidationError, FileNotFoundError) as e:
+        error = extract_config_error(e)
+        return templates.TemplateResponse(
+            "partials/config_error.html", {"request": request, "config_error": error}
+        )
+
+
+@router.get("/partials/stats", response_class=HTMLResponse)
+async def stats_partial(request: Request) -> HTMLResponse:
+    """Stats cards partial."""
+    config = get_config()
+    templates = get_templates()
+
+    deployed = load_state(config)
+    running_count = len(deployed)
+    stopped_count = len(config.services) - running_count
+
+    return templates.TemplateResponse(
+        "partials/stats.html",
+        {
+            "request": request,
+            "hosts": config.hosts,
+            "services": config.services,
+            "running_count": running_count,
+            "stopped_count": stopped_count,
+        },
+    )
+
+
+@router.get("/partials/pending", response_class=HTMLResponse)
+async def pending_partial(request: Request, expanded: bool = True) -> HTMLResponse:
+    """Pending operations partial."""
+    config = get_config()
+    templates = get_templates()
+
+    orphaned = get_orphaned_services(config)
+    migrations = get_services_needing_migration(config)
+    not_started = get_services_not_in_state(config)
+
+    return templates.TemplateResponse(
+        "partials/pending.html",
+        {
+            "request": request,
+            "orphaned": orphaned,
+            "migrations": migrations,
+            "not_started": not_started,
+            "expanded": expanded,
+        },
+    )
+
+
+@router.get("/partials/services-by-host", response_class=HTMLResponse)
+async def services_by_host_partial(request: Request, expanded: bool = True) -> HTMLResponse:
+    """Services by host partial."""
+    config = get_config()
+    templates = get_templates()
+
+    deployed = load_state(config)
+    services_by_host = group_running_services_by_host(deployed, config.hosts)
+
+    return templates.TemplateResponse(
+        "partials/services_by_host.html",
+        {
+            "request": request,
+            "hosts": config.hosts,
+            "services_by_host": services_by_host,
+            "expanded": expanded,
+        },
+    )
--- a/src/compose_farm/web/static/app.css
+++ b/src/compose_farm/web/static/app.css
@@ -0,0 +1,131 @@
+/* Sidebar inputs - remove focus outline (DaisyUI 5 uses outline + outline-offset) */
+#sidebar .input:focus,
+#sidebar .input:focus-within,
+#sidebar .select:focus {
+    outline: none;
+    outline-offset: 0;
+}
+
+/* Editors (Monaco) - wrapper makes it resizable */
+.editor-wrapper {
+    resize: vertical;
+    overflow: hidden;
+    min-height: 150px;
+}
+
+.editor-wrapper .yaml-editor,
+.editor-wrapper .env-editor,
+.editor-wrapper .yaml-viewer {
+    height: 100%;
+    border: 1px solid oklch(var(--bc) / 0.2);
+    border-radius: 0.5rem;
+}
+
+.editor-wrapper.yaml-wrapper { height: 400px; }
+.editor-wrapper.env-wrapper { height: 250px; }
+.editor-wrapper.viewer-wrapper { height: 300px; }
+
+/* Terminal - no custom CSS needed, using h-full class in HTML */
+
+/* Prevent save button resize when text changes */
+#save-btn, #save-config-btn {
+    min-width: 5rem;
+}
+
+/* Rainbow hover effect for headers */
+.rainbow-hover {
+    transition: color 0.3s;
+}
+
+.rainbow-hover:hover {
+    background: linear-gradient(
+        90deg,
+        #e07070,
+        #e0a070,
+        #d0d070,
+        #70c080,
+        #7090d0,
+        #9080b0,
+        #b080a0,
+        #e07070
+    );
+    background-size: 16em 100%;
+    background-clip: text;
+    -webkit-background-clip: text;
+    color: transparent;
+    animation: rainbow 4s linear infinite;
+}
+
+@keyframes rainbow {
+    to {
+        background-position: 16em center;
+    }
+}
+
+/* Command palette FAB - rainbow glow effect */
+@property --cmd-pos { syntax: "<number>"; inherits: true; initial-value: 100; }
+@property --cmd-blur { syntax: "<number>"; inherits: true; initial-value: 10; }
+@property --cmd-scale { syntax: "<number>"; inherits: true; initial-value: 1; }
+@property --cmd-opacity { syntax: "<number>"; inherits: true; initial-value: 0.3; }
+
+#cmd-fab {
+    --g: linear-gradient(to right, #fff, #fff, #0ff, #00f, #8000ff, #e066a3, #f00, #ff0, #bfff80, #fff, #fff);
+    --bg-size: 900%;
+    all: unset;
+    position: fixed;
+    bottom: 1.5rem;
+    right: 1.5rem;
+    z-index: 50;
+    cursor: pointer;
+    transform: scale(var(--cmd-scale));
+    transition: --cmd-pos 3s, --cmd-blur 0.3s, --cmd-opacity 0.3s, --cmd-scale 0.2s cubic-bezier(.76,-.25,.51,1.13);
+}
+
+.cmd-fab-inner {
+    display: block;
+    padding: 0.6em 1em;
+    background: #1d232a;
+    border-radius: 8px;
+    font-size: 14px;
+    position: relative;
+}
+
+.cmd-fab-inner > span {
+    background: var(--g) no-repeat calc(var(--cmd-pos) * 1%) 0 / var(--bg-size);
+    -webkit-background-clip: text;
+    background-clip: text;
+    -webkit-text-fill-color: transparent;
+    letter-spacing: 0.15ch;
+    font-weight: 600;
+}
+
+.cmd-fab-inner::before, .cmd-fab-inner::after {
+    content: "";
+    position: absolute;
+    border-radius: 8px;
+}
+
+.cmd-fab-inner::before {
+    inset: -1.5px;
+    background: var(--g) no-repeat calc(var(--cmd-pos) * 1%) 0 / var(--bg-size);
+    border-radius: 9px;
+    z-index: -1;
+    opacity: var(--cmd-opacity);
+}
+
+.cmd-fab-inner::after {
+    inset: 0;
+    background: #000;
+    transform: translateY(10px);
+    z-index: -2;
+    filter: blur(calc(var(--cmd-blur) * 1px));
+}
+
+#cmd-fab:hover { --cmd-scale: 1.05; --cmd-pos: 0; --cmd-blur: 30; --cmd-opacity: 1; }
+#cmd-fab:hover .cmd-fab-inner::after { background: var(--g); opacity: 0.3; }
+#cmd-fab:active { --cmd-scale: 0.98; --cmd-blur: 15; }
+
+/* Smaller screens: reduce gradient size so more colors are visible */
+@media (max-width: 768px) {
+    #cmd-fab { --bg-size: 500%; }
+}
--- a/src/compose_farm/web/static/app.js
+++ b/src/compose_farm/web/static/app.js
@@ -0,0 +1,668 @@
+/**
+ * Compose Farm Web UI JavaScript
+ */
+
+// ANSI escape codes for terminal output
+const ANSI = {
+    RED: '\x1b[31m',
+    GREEN: '\x1b[32m',
+    DIM: '\x1b[2m',
+    RESET: '\x1b[0m',
+    CRLF: '\r\n'
+};
+
+// Store active terminals and editors
+const terminals = {};
+const editors = {};
+let monacoLoaded = false;
+let monacoLoading = false;
+
+// LocalStorage key prefix for active tasks (scoped by page)
+const TASK_KEY_PREFIX = 'cf_task:';
+const getTaskKey = () => TASK_KEY_PREFIX + window.location.pathname;
+
+// Language detection from file path
+const LANGUAGE_MAP = {
+    'yaml': 'yaml', 'yml': 'yaml',
+    'json': 'json',
+    'js': 'javascript', 'mjs': 'javascript',
+    'ts': 'typescript', 'tsx': 'typescript',
+    'py': 'python',
+    'sh': 'shell', 'bash': 'shell',
+    'md': 'markdown',
+    'html': 'html', 'htm': 'html',
+    'css': 'css',
+    'sql': 'sql',
+    'toml': 'toml',
+    'ini': 'ini', 'conf': 'ini',
+    'dockerfile': 'dockerfile',
+    'env': 'plaintext'
+};
+
+/**
+ * Get Monaco language from file path
+ * @param {string} path - File path
+ * @returns {string} Monaco language identifier
+ */
+function getLanguageFromPath(path) {
+    const ext = path.split('.').pop().toLowerCase();
+    return LANGUAGE_MAP[ext] || 'plaintext';
+}
+window.getLanguageFromPath = getLanguageFromPath;
+
+// Terminal color theme (dark mode matching PicoCSS)
+const TERMINAL_THEME = {
+    background: '#1a1a2e',
+    foreground: '#e4e4e7',
+    cursor: '#e4e4e7',
+    cursorAccent: '#1a1a2e',
+    black: '#18181b',
+    red: '#ef4444',
+    green: '#22c55e',
+    yellow: '#eab308',
+    blue: '#3b82f6',
+    magenta: '#a855f7',
+    cyan: '#06b6d4',
+    white: '#e4e4e7',
+    brightBlack: '#52525b',
+    brightRed: '#f87171',
+    brightGreen: '#4ade80',
+    brightYellow: '#facc15',
+    brightBlue: '#60a5fa',
+    brightMagenta: '#c084fc',
+    brightCyan: '#22d3ee',
+    brightWhite: '#fafafa'
+};
+
+/**
+ * Create a terminal with fit addon and resize observer
+ * @param {HTMLElement} container - Container element
+ * @param {object} extraOptions - Additional terminal options
+ * @param {function} onResize - Optional callback called with (cols, rows) after resize
+ * @returns {{term: Terminal, fitAddon: FitAddon}}
+ */
+function createTerminal(container, extraOptions = {}, onResize = null) {
+    container.innerHTML = '';
+
+    const term = new Terminal({
+        convertEol: true,
+        theme: TERMINAL_THEME,
+        fontSize: 13,
+        fontFamily: 'Monaco, Menlo, "Ubuntu Mono", monospace',
+        scrollback: 5000,
+        ...extraOptions
+    });
+
+    const fitAddon = new FitAddon.FitAddon();
+    term.loadAddon(fitAddon);
+    term.open(container);
+    fitAddon.fit();
+
+    const handleResize = () => {
+        fitAddon.fit();
+        if (onResize) {
+            onResize(term.cols, term.rows);
+        }
+    };
+
+    window.addEventListener('resize', handleResize);
+    new ResizeObserver(handleResize).observe(container);
+
+    return { term, fitAddon };
+}
+
+/**
+ * Create WebSocket connection with standard handlers
+ * @param {string} path - WebSocket path
+ * @returns {WebSocket}
+ */
+function createWebSocket(path) {
+    const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
+    return new WebSocket(`${protocol}//${window.location.host}${path}`);
+}
+window.createWebSocket = createWebSocket;
+
+/**
+ * Initialize a terminal and connect to WebSocket for streaming
+ */
+function initTerminal(elementId, taskId) {
+    const container = document.getElementById(elementId);
+    if (!container) {
+        console.error('Terminal container not found:', elementId);
+        return;
+    }
+
+    const { term, fitAddon } = createTerminal(container);
+    const ws = createWebSocket(`/ws/terminal/${taskId}`);
+
+    const taskKey = getTaskKey();
+    ws.onopen = () => {
+        term.write(`${ANSI.DIM}[Connected]${ANSI.RESET}${ANSI.CRLF}`);
+        setTerminalLoading(true);
+        localStorage.setItem(taskKey, taskId);
+    };
+    ws.onmessage = (event) => {
+        term.write(event.data);
+        if (event.data.includes('[Done]') || event.data.includes('[Failed]')) {
+            localStorage.removeItem(taskKey);
+        }
+    };
+    ws.onclose = () => setTerminalLoading(false);
+    ws.onerror = (error) => {
+        term.write(`${ANSI.RED}[WebSocket Error]${ANSI.RESET}${ANSI.CRLF}`);
+        console.error('WebSocket error:', error);
+        setTerminalLoading(false);
+    };
+
+    terminals[taskId] = { term, ws, fitAddon };
+    return { term, ws };
+}
+
+window.initTerminal = initTerminal;
+
+/**
+ * Initialize an interactive exec terminal
+ */
+let execTerminal = null;
+let execWs = null;
+
+function initExecTerminal(service, container, host) {
+    const containerEl = document.getElementById('exec-terminal-container');
+    const terminalEl = document.getElementById('exec-terminal');
+
+    if (!containerEl || !terminalEl) {
+        console.error('Exec terminal elements not found');
+        return;
+    }
+
+    containerEl.classList.remove('hidden');
+
+    // Clean up existing
+    if (execWs) { execWs.close(); execWs = null; }
+    if (execTerminal) { execTerminal.dispose(); execTerminal = null; }
+
+    // Create WebSocket first so resize callback can use it
+    execWs = createWebSocket(`/ws/exec/${service}/${container}/${host}`);
+
+    // Resize callback sends size to WebSocket
+    const sendSize = (cols, rows) => {
+        if (execWs && execWs.readyState === WebSocket.OPEN) {
+            execWs.send(JSON.stringify({ type: 'resize', cols, rows }));
+        }
+    };
+
+    const { term } = createTerminal(terminalEl, { cursorBlink: true }, sendSize);
+    execTerminal = term;
+
+    execWs.onopen = () => { sendSize(term.cols, term.rows); term.focus(); };
+    execWs.onmessage = (event) => term.write(event.data);
+    execWs.onclose = () => term.write(`${ANSI.CRLF}${ANSI.DIM}[Connection closed]${ANSI.RESET}${ANSI.CRLF}`);
+    execWs.onerror = (error) => {
+        term.write(`${ANSI.RED}[WebSocket Error]${ANSI.RESET}${ANSI.CRLF}`);
+        console.error('Exec WebSocket error:', error);
+    };
+
+    term.onData((data) => {
+        if (execWs && execWs.readyState === WebSocket.OPEN) {
+            execWs.send(data);
+        }
+    });
+}
+
+window.initExecTerminal = initExecTerminal;
+
+/**
+ * Refresh dashboard partials while preserving collapse states
+ */
+function refreshDashboard() {
+    const isExpanded = (id) => document.getElementById(id)?.checked ?? true;
+    htmx.ajax('GET', '/partials/sidebar', {target: '#sidebar nav', swap: 'innerHTML'});
+    htmx.ajax('GET', '/partials/stats', {target: '#stats-cards', swap: 'outerHTML'});
+    htmx.ajax('GET', `/partials/pending?expanded=${isExpanded('pending-collapse')}`, {target: '#pending-operations', swap: 'outerHTML'});
+    htmx.ajax('GET', `/partials/services-by-host?expanded=${isExpanded('services-by-host-collapse')}`, {target: '#services-by-host', swap: 'outerHTML'});
+    htmx.ajax('GET', '/partials/config-error', {target: '#config-error', swap: 'innerHTML'});
+}
+
+/**
+ * Load Monaco editor dynamically (only once)
+ */
+function loadMonaco(callback) {
+    if (monacoLoaded) {
+        callback();
+        return;
+    }
+
+    if (monacoLoading) {
+        // Wait for it to load
+        const checkInterval = setInterval(() => {
+            if (monacoLoaded) {
+                clearInterval(checkInterval);
+                callback();
+            }
+        }, 100);
+        return;
+    }
+
+    monacoLoading = true;
+
+    // Load the Monaco loader script
+    const script = document.createElement('script');
+    script.src = 'https://cdn.jsdelivr.net/npm/monaco-editor@0.52.2/min/vs/loader.js';
+    script.onload = function() {
+        require.config({ paths: { vs: 'https://cdn.jsdelivr.net/npm/monaco-editor@0.52.2/min/vs' }});
+        require(['vs/editor/editor.main'], function() {
+            monacoLoaded = true;
+            monacoLoading = false;
+            callback();
+        });
+    };
+    document.head.appendChild(script);
+}
+
+/**
+ * Create a Monaco editor instance
+ * @param {HTMLElement} container - Container element
+ * @param {string} content - Initial content
+ * @param {string} language - Editor language (yaml, plaintext, etc.)
+ * @param {object} opts - Options: { readonly, onSave }
+ * @returns {object} Monaco editor instance
+ */
+function createEditor(container, content, language, opts = {}) {
+    // Support legacy boolean readonly parameter
+    if (typeof opts === 'boolean') {
+        opts = { readonly: opts };
+    }
+    const { readonly = false, onSave = null } = opts;
+
+    const options = {
+        value: content,
+        language: language,
+        theme: 'vs-dark',
+        minimap: { enabled: false },
+        automaticLayout: true,
+        scrollBeyondLastLine: false,
+        fontSize: 14,
+        lineNumbers: 'on',
+        wordWrap: 'on'
+    };
+
+    if (readonly) {
+        options.readOnly = true;
+        options.domReadOnly = true;
+    }
+
+    const editor = monaco.editor.create(container, options);
+
+    // Add Command+S / Ctrl+S handler for editable editors
+    if (!readonly) {
+        editor.addCommand(monaco.KeyMod.CtrlCmd | monaco.KeyCode.KeyS, function() {
+            if (onSave) {
+                onSave(editor);
+            } else {
+                saveAllEditors();
+            }
+        });
+    }
+
+    return editor;
+}
+window.createEditor = createEditor;
+
+/**
+ * Initialize all Monaco editors on the page
+ */
+function initMonacoEditors() {
+    // Dispose existing editors
+    Object.values(editors).forEach(ed => {
+        if (ed && ed.dispose) ed.dispose();
+    });
+    Object.keys(editors).forEach(key => delete editors[key]);
+
+    const editorConfigs = [
+        { id: 'compose-editor', language: 'yaml', readonly: false },
+        { id: 'env-editor', language: 'plaintext', readonly: false },
+        { id: 'config-editor', language: 'yaml', readonly: false },
+        { id: 'state-viewer', language: 'yaml', readonly: true }
+    ];
+
+    // Check if any editor elements exist
+    const hasEditors = editorConfigs.some(({ id }) => document.getElementById(id));
+    if (!hasEditors) return;
+
+    // Load Monaco and create editors
+    loadMonaco(() => {
+        editorConfigs.forEach(({ id, language, readonly }) => {
+            const el = document.getElementById(id);
+            if (!el) return;
+
+            const content = el.dataset.content || '';
+            editors[id] = createEditor(el, content, language, readonly);
+            if (!readonly) {
+                editors[id].saveUrl = el.dataset.saveUrl;
+            }
+        });
+    });
+}
+
+/**
+ * Save all editors
+ */
+async function saveAllEditors() {
+    const saveBtn = document.getElementById('save-btn') || document.getElementById('save-config-btn');
+    const results = [];
+
+    for (const [id, editor] of Object.entries(editors)) {
+        if (!editor || !editor.saveUrl) continue;
+
+        const content = editor.getValue();
+        try {
+            const response = await fetch(editor.saveUrl, {
+                method: 'PUT',
+                headers: { 'Content-Type': 'text/plain' },
+                body: content
+            });
+            const data = await response.json();
+            if (!response.ok || !data.success) {
+                results.push({ id, success: false, error: data.detail || 'Unknown error' });
+            } else {
+                results.push({ id, success: true });
+            }
+        } catch (e) {
+            results.push({ id, success: false, error: e.message });
+        }
+    }
+
+    // Show result
+    if (saveBtn && results.length > 0) {
+        saveBtn.textContent = 'Saved!';
+        setTimeout(() => saveBtn.textContent = saveBtn.id === 'save-config-btn' ? 'Save Config' : 'Save All', 2000);
+        refreshDashboard();
+    }
+}
+
+/**
+ * Initialize save button handler
+ */
+function initSaveButton() {
+    const saveBtn = document.getElementById('save-btn') || document.getElementById('save-config-btn');
+    if (!saveBtn) return;
+
+    saveBtn.onclick = saveAllEditors;
+}
+
+/**
+ * Global keyboard shortcut handler
+ */
+function initKeyboardShortcuts() {
+    document.addEventListener('keydown', function(e) {
+        // Command+S (Mac) or Ctrl+S (Windows/Linux)
+        if ((e.metaKey || e.ctrlKey) && e.key === 's') {
+            // Only handle if we have editors and no Monaco editor is focused
+            if (Object.keys(editors).length > 0) {
+                // Check if any Monaco editor is focused
+                const focusedEditor = Object.values(editors).find(ed => ed && ed.hasTextFocus && ed.hasTextFocus());
+                if (!focusedEditor) {
+                    e.preventDefault();
+                    saveAllEditors();
+                }
+            }
+        }
+    });
+}
+
+/**
+ * Initialize page components
+ */
+function initPage() {
+    initMonacoEditors();
+    initSaveButton();
+}
+
+/**
+ * Attempt to reconnect to an active task from localStorage
+ */
+function tryReconnectToTask() {
+    const taskId = localStorage.getItem(getTaskKey());
+    if (!taskId) return;
+
+    // Wait for xterm to be loaded
+    const tryInit = (attempts) => {
+        if (typeof Terminal !== 'undefined' && typeof FitAddon !== 'undefined') {
+            expandTerminal();
+            initTerminal('terminal-output', taskId);
+        } else if (attempts > 0) {
+            setTimeout(() => tryInit(attempts - 1), 100);
+        }
+    };
+    tryInit(20);
+}
+
+// Play intro animation on command palette button
+function playFabIntro() {
+    const fab = document.getElementById('cmd-fab');
+    if (!fab) return;
+    setTimeout(() => {
+        fab.style.setProperty('--cmd-pos', '0');
+        fab.style.setProperty('--cmd-opacity', '1');
+        fab.style.setProperty('--cmd-blur', '30');
+        setTimeout(() => {
+            fab.style.removeProperty('--cmd-pos');
+            fab.style.removeProperty('--cmd-opacity');
+            fab.style.removeProperty('--cmd-blur');
+        }, 3000);
+    }, 500);
+}
+
+// Initialize on page load
+document.addEventListener('DOMContentLoaded', function() {
+    initPage();
+    initKeyboardShortcuts();
+    playFabIntro();
+
+    // Try to reconnect to any active task
+    tryReconnectToTask();
+});
+
+// Re-initialize after HTMX swaps main content
+document.body.addEventListener('htmx:afterSwap', function(evt) {
+    if (evt.detail.target.id === 'main-content') {
+        initPage();
+        // Try to reconnect when navigating back to dashboard
+        tryReconnectToTask();
+    }
+});
+
+/**
+ * Expand terminal collapse and scroll to it
+ */
+function expandTerminal() {
+    const toggle = document.getElementById('terminal-toggle');
+    if (toggle) toggle.checked = true;
+
+    const collapse = document.getElementById('terminal-collapse');
+    if (collapse) {
+        collapse.scrollIntoView({ behavior: 'smooth', block: 'start' });
+    }
+}
+
+/**
+ * Show/hide terminal loading spinner
+ */
+function setTerminalLoading(loading) {
+    const spinner = document.getElementById('terminal-spinner');
+    if (spinner) {
+        spinner.classList.toggle('hidden', !loading);
+    }
+}
+
+// Handle action responses (terminal streaming)
+document.body.addEventListener('htmx:afterRequest', function(evt) {
+    if (!evt.detail.successful || !evt.detail.xhr) return;
+
+    const text = evt.detail.xhr.responseText;
+    // Only try to parse if it looks like JSON (starts with {)
+    if (!text || !text.trim().startsWith('{')) return;
+
+    try {
+        const response = JSON.parse(text);
+        if (response.task_id) {
+            // Expand terminal and scroll to it
+            expandTerminal();
+
+            // Wait for xterm to be loaded if needed
+            const tryInit = (attempts) => {
+                if (typeof Terminal !== 'undefined' && typeof FitAddon !== 'undefined') {
+                    initTerminal('terminal-output', response.task_id);
+                } else if (attempts > 0) {
+                    setTimeout(() => tryInit(attempts - 1), 100);
+                } else {
+                    console.error('xterm.js failed to load');
+                }
+            };
+            tryInit(20); // Try for up to 2 seconds
+        }
+    } catch (e) {
+        // Not valid JSON, ignore
+    }
+});
+
+// Command Palette
+(function() {
+    const dialog = document.getElementById('cmd-palette');
+    const input = document.getElementById('cmd-input');
+    const list = document.getElementById('cmd-list');
+    const fab = document.getElementById('cmd-fab');
+    if (!dialog || !input || !list) return;
+
+    // Load icons from template (rendered server-side from icons.html)
+    const iconTemplate = document.getElementById('cmd-icons');
+    const icons = {};
+    if (iconTemplate) {
+        iconTemplate.content.querySelectorAll('[data-icon]').forEach(el => {
+            icons[el.dataset.icon] = el.innerHTML;
+        });
+    }
+
+    const colors = { service: '#22c55e', action: '#eab308', nav: '#3b82f6', app: '#a855f7' };
+    let commands = [];
+    let filtered = [];
+    let selected = 0;
+
+    const post = (url) => () => htmx.ajax('POST', url, {swap: 'none'});
+    const nav = (url) => () => {
+        htmx.ajax('GET', url, {target: '#main-content', select: '#main-content', swap: 'outerHTML'}).then(() => {
+            history.pushState({}, '', url);
+        });
+    };
+    // Navigate to dashboard and trigger action (or just POST if already on dashboard)
+    const dashboardAction = (endpoint) => () => {
+        if (window.location.pathname === '/') {
+            htmx.ajax('POST', `/api/${endpoint}`, {swap: 'none'});
+        } else {
+            // Navigate via HTMX, then trigger action after swap
+            htmx.ajax('GET', '/', {target: '#main-content', select: '#main-content', swap: 'outerHTML'}).then(() => {
+                history.pushState({}, '', '/');
+                htmx.ajax('POST', `/api/${endpoint}`, {swap: 'none'});
+            });
+        }
+    };
+    const cmd = (type, name, desc, action, icon = null) => ({ type, name, desc, action, icon });
+
+    function buildCommands() {
+        const actions = [
+            cmd('action', 'Apply', 'Make reality match config', dashboardAction('apply'), icons.check),
+            cmd('action', 'Refresh', 'Update state from reality', dashboardAction('refresh'), icons.refresh_cw),
+            cmd('app', 'Dashboard', 'Go to dashboard', nav('/'), icons.home),
+            cmd('app', 'Console', 'Go to console', nav('/console'), icons.terminal),
+        ];
+
+        // Add service-specific actions if on a service page
+        const match = window.location.pathname.match(/^\/service\/(.+)$/);
+        if (match) {
+            const svc = decodeURIComponent(match[1]);
+            const svcCmd = (name, desc, endpoint, icon) => cmd('service', name, `${desc} ${svc}`, post(`/api/service/${svc}/${endpoint}`), icon);
+            actions.unshift(
+                svcCmd('Up', 'Start', 'up', icons.play),
+                svcCmd('Down', 'Stop', 'down', icons.square),
+                svcCmd('Restart', 'Restart', 'restart', icons.rotate_cw),
+                svcCmd('Pull', 'Pull', 'pull', icons.cloud_download),
+                svcCmd('Update', 'Pull + restart', 'update', icons.refresh_cw),
+                svcCmd('Logs', 'View logs for', 'logs', icons.file_text),
+            );
+        }
+
+        // Add nav commands for all services from sidebar
+        const services = [...document.querySelectorAll('#sidebar-services li[data-svc] a[href]')].map(a => {
+            const name = a.getAttribute('href').replace('/service/', '');
+            return cmd('nav', name, 'Go to service', nav(`/service/${name}`), icons.box);
+        });
+
+        commands = [...actions, ...services];
+    }
+
+    function filter() {
+        const q = input.value.toLowerCase();
+        filtered = commands.filter(c => c.name.toLowerCase().includes(q));
+        selected = Math.max(0, Math.min(selected, filtered.length - 1));
+    }
+
+    function render() {
+        list.innerHTML = filtered.map((c, i) => `
+            <a class="flex justify-between items-center px-3 py-2 rounded-r cursor-pointer hover:bg-base-200 border-l-4 ${i === selected ? 'bg-base-300' : ''}" style="border-left-color: ${colors[c.type] || '#666'}" data-idx="${i}">
+                <span class="flex items-center gap-2">${c.icon || ''}<span>${c.name}</span></span>
+                <span class="opacity-40 text-xs">${c.desc}</span>
+            </a>
+        `).join('') || '<div class="opacity-50 p-2">No matches</div>';
+        // Scroll selected item into view
+        const sel = list.querySelector(`[data-idx="${selected}"]`);
+        if (sel) sel.scrollIntoView({ block: 'nearest' });
+    }
+
+    function open() {
+        buildCommands();
+        selected = 0;
+        input.value = '';
+        filter();
+        render();
+        dialog.showModal();
+        input.focus();
+    }
+
+    function exec() {
+        if (filtered[selected]) {
+            dialog.close();
+            filtered[selected].action();
+        }
+    }
+
+    // Keyboard: Cmd+K to open
+    document.addEventListener('keydown', e => {
+        if ((e.metaKey || e.ctrlKey) && e.key === 'k') {
+            e.preventDefault();
+            open();
+        }
+    });
+
+    // Input filtering
+    input.addEventListener('input', () => { filter(); render(); });
+
+    // Keyboard nav inside palette
+    dialog.addEventListener('keydown', e => {
+        if (!dialog.open) return;
+        if (e.key === 'ArrowDown') { e.preventDefault(); selected = Math.min(selected + 1, filtered.length - 1); render(); }
+        else if (e.key === 'ArrowUp') { e.preventDefault(); selected = Math.max(selected - 1, 0); render(); }
+        else if (e.key === 'Enter') { e.preventDefault(); exec(); }
+    });
+
+    // Click to execute
+    list.addEventListener('click', e => {
+        const a = e.target.closest('a[data-idx]');
+        if (a) {
+            selected = parseInt(a.dataset.idx, 10);
+            exec();
+        }
+    });
+
+    // FAB click to open
+    if (fab) fab.addEventListener('click', open);
+})();
--- a/src/compose_farm/web/streaming.py
+++ b/src/compose_farm/web/streaming.py
@@ -0,0 +1,225 @@
+"""Streaming executor adapter for web UI."""
+
+from __future__ import annotations
+
+import asyncio
+import os
+import time
+from typing import TYPE_CHECKING, Any
+
+from compose_farm.executor import build_ssh_command
+from compose_farm.ssh_keys import get_ssh_auth_sock
+
+if TYPE_CHECKING:
+    from compose_farm.config import Config
+
+# Environment variable to identify the web service (for self-update detection)
+CF_WEB_SERVICE = os.environ.get("CF_WEB_SERVICE", "")
+
+# ANSI escape codes for terminal output
+RED = "\x1b[31m"
+GREEN = "\x1b[32m"
+DIM = "\x1b[2m"
+RESET = "\x1b[0m"
+CRLF = "\r\n"
+
+# In-memory task registry
+tasks: dict[str, dict[str, Any]] = {}
+
+# How long to keep completed tasks (10 minutes)
+TASK_TTL_SECONDS = 600
+
+
+def cleanup_stale_tasks() -> int:
+    """Remove tasks that completed more than TASK_TTL_SECONDS ago.
+
+    Returns the number of tasks removed.
+    """
+    cutoff = time.time() - TASK_TTL_SECONDS
+    stale = [
+        tid
+        for tid, task in tasks.items()
+        if task.get("completed_at") and task["completed_at"] < cutoff
+    ]
+    for tid in stale:
+        tasks.pop(tid, None)
+    return len(stale)
+
+
+async def stream_to_task(task_id: str, message: str) -> None:
+    """Send a message to a task's output buffer."""
+    if task_id in tasks:
+        tasks[task_id]["output"].append(message)
+
+
+async def run_cli_streaming(
+    config: Config,
+    args: list[str],
+    task_id: str,
+) -> None:
+    """Run a cf CLI command as subprocess and stream output to task buffer.
+
+    This reuses all CLI logic including Rich formatting, progress bars, etc.
+    The subprocess gets a pseudo-TTY via FORCE_COLOR so Rich outputs ANSI codes.
+    """
+    try:
+        # Build command - config option goes after the subcommand
+        cmd = ["cf", *args, f"--config={config.config_path}"]
+
+        # Show command being executed
+        cmd_display = " ".join(["cf", *args])
+        await stream_to_task(task_id, f"{DIM}$ {cmd_display}{RESET}{CRLF}")
+
+        # Force color output even though there's no real TTY
+        # Set COLUMNS for Rich/Typer to format output correctly
+        env = {"FORCE_COLOR": "1", "TERM": "xterm-256color", "COLUMNS": "120"}
+
+        # Ensure SSH agent is available (auto-detect if needed)
+        ssh_sock = get_ssh_auth_sock()
+        if ssh_sock:
+            env["SSH_AUTH_SOCK"] = ssh_sock
+
+        process = await asyncio.create_subprocess_exec(
+            *cmd,
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.STDOUT,
+            env={**os.environ, **env},
+        )
+
+        # Stream output line by line
+        if process.stdout:
+            async for line in process.stdout:
+                text = line.decode("utf-8", errors="replace")
+                # Convert \n to \r\n for xterm.js
+                if text.endswith("\n") and not text.endswith("\r\n"):
+                    text = text[:-1] + "\r\n"
+                await stream_to_task(task_id, text)
+
+        exit_code = await process.wait()
+        tasks[task_id]["status"] = "completed" if exit_code == 0 else "failed"
+        tasks[task_id]["completed_at"] = time.time()
+
+    except Exception as e:
+        await stream_to_task(task_id, f"{RED}Error: {e}{RESET}{CRLF}")
+        tasks[task_id]["status"] = "failed"
+        tasks[task_id]["completed_at"] = time.time()
+
+
+def _is_self_update(service: str, command: str) -> bool:
+    """Check if this is a self-update (updating the web service itself).
+
+    Self-updates need special handling because running 'down' on the container
+    we're running in would kill the process before 'up' can execute.
+    """
+    if not CF_WEB_SERVICE or service != CF_WEB_SERVICE:
+        return False
+    # Commands that involve 'down' need SSH: update, restart, down
+    return command in ("update", "restart", "down")
+
+
+async def _run_cli_via_ssh(
+    config: Config,
+    args: list[str],
+    task_id: str,
+) -> None:
+    """Run a cf CLI command via SSH to the host.
+
+    Used for self-updates to ensure the command survives container restart.
+    Uses setsid to run command in a new session (completely detached), with
+    output going to a log file. We tail the log to stream output. When SSH
+    dies (container killed), the tail dies but the setsid process continues.
+    """
+    try:
+        # Get the host for the web service
+        host = config.get_host(CF_WEB_SERVICE)
+
+        cf_cmd = f"cf {' '.join(args)} --config={config.config_path}"
+        log_file = "/tmp/cf-self-update.log"  # noqa: S108
+
+        # Build the remote command:
+        # 1. setsid runs command in new session (survives SSH disconnect)
+        # 2. Output goes to log file
+        # 3. tail -f streams the log (dies when SSH dies, but command continues)
+        # 4. wait for tail or timeout after command should be done
+        remote_cmd = (
+            f"rm -f {log_file} && "
+            f"PATH=$HOME/.local/bin:/usr/local/bin:$PATH "
+            f"setsid sh -c '{cf_cmd} > {log_file} 2>&1' & "
+            f"sleep 0.3 && "
+            f"tail -f {log_file} 2>/dev/null"
+        )
+
+        # Show what we're doing
+        await stream_to_task(
+            task_id,
+            f"{DIM}$ {cf_cmd}{RESET}{CRLF}",
+        )
+        await stream_to_task(
+            task_id,
+            f"{GREEN}Running via SSH (detached with setsid){RESET}{CRLF}",
+        )
+
+        # Build SSH command (no TTY needed, output comes from tail)
+        ssh_args = build_ssh_command(host, remote_cmd, tty=False)
+
+        # Set up environment with SSH agent
+        env = {**os.environ}
+        ssh_sock = get_ssh_auth_sock()
+        if ssh_sock:
+            env["SSH_AUTH_SOCK"] = ssh_sock
+
+        process = await asyncio.create_subprocess_exec(
+            *ssh_args,
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.STDOUT,
+            env=env,
+        )
+
+        # Stream output until SSH dies (container killed) or command completes
+        if process.stdout:
+            async for line in process.stdout:
+                text = line.decode("utf-8", errors="replace")
+                if text.endswith("\n") and not text.endswith("\r\n"):
+                    text = text[:-1] + "\r\n"
+                await stream_to_task(task_id, text)
+
+        exit_code = await process.wait()
+
+        # Exit code 255 means SSH connection closed (container died during down)
+        # This is expected for self-updates - setsid ensures command continues
+        if exit_code == 255:  # noqa: PLR2004
+            await stream_to_task(
+                task_id,
+                f"{CRLF}{GREEN}Container restarting... refresh the page in a few seconds.{RESET}{CRLF}",
+            )
+            tasks[task_id]["status"] = "completed"
+        else:
+            tasks[task_id]["status"] = "completed" if exit_code == 0 else "failed"
+        tasks[task_id]["completed_at"] = time.time()
+
+    except Exception as e:
+        await stream_to_task(task_id, f"{RED}Error: {e}{RESET}{CRLF}")
+        tasks[task_id]["status"] = "failed"
+        tasks[task_id]["completed_at"] = time.time()
+
+
+async def run_compose_streaming(
+    config: Config,
+    service: str,
+    command: str,
+    task_id: str,
+) -> None:
+    """Run a compose command (up/down/pull/restart) via CLI subprocess."""
+    # Split command into args (e.g., "up -d" -> ["up", "-d"])
+    args = command.split()
+    cli_cmd = args[0]  # up, down, pull, restart
+    extra_args = args[1:]  # -d, etc.
+
+    # Build CLI args
+    cli_args = [cli_cmd, service, *extra_args]
+
+    # Use SSH for self-updates to survive container restart
+    if _is_self_update(service, cli_cmd):
+        await _run_cli_via_ssh(config, cli_args, task_id)
+    else:
+        await run_cli_streaming(config, cli_args, task_id)
--- a/src/compose_farm/web/templates/base.html
+++ b/src/compose_farm/web/templates/base.html
@@ -0,0 +1,67 @@
+{% from "partials/icons.html" import github, hamburger %}
+<!DOCTYPE html>
+<html lang="en" data-theme="dark">
+<head>
+    <meta charset="utf-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+    <title>{% block title %}Compose Farm{% endblock %}</title>
+
+    <!-- daisyUI + Tailwind -->
+    <link href="https://cdn.jsdelivr.net/npm/daisyui@5" data-vendor="daisyui.css" rel="stylesheet" type="text/css" />
+    <script src="https://cdn.jsdelivr.net/npm/@tailwindcss/browser@4" data-vendor="tailwind.js"></script>
+
+    <!-- xterm.js -->
+    <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@xterm/xterm@5.5.0/css/xterm.css" data-vendor="xterm.css">
+
+    <!-- Custom styles -->
+    <link rel="stylesheet" href="/static/app.css">
+</head>
+<body class="min-h-screen bg-base-200">
+    <div class="drawer lg:drawer-open">
+        <input id="drawer-toggle" type="checkbox" class="drawer-toggle" />
+
+        <!-- Main content -->
+        <div class="drawer-content flex flex-col">
+            <!-- Mobile navbar with hamburger -->
+            <header class="navbar bg-base-100 border-b border-base-300 lg:hidden">
+                <label for="drawer-toggle" class="btn btn-ghost btn-square">
+                    {{ hamburger() }}
+                </label>
+                <span class="font-semibold rainbow-hover">Compose Farm</span>
+            </header>
+
+            <main id="main-content" class="flex-1 p-6 overflow-y-auto" hx-boost="true" hx-target="#main-content" hx-select="#main-content" hx-swap="outerHTML">
+                {% block content %}{% endblock %}
+            </main>
+        </div>
+
+        <!-- Sidebar -->
+        <div class="drawer-side">
+            <label for="drawer-toggle" class="drawer-overlay" aria-label="close sidebar"></label>
+            <aside id="sidebar" class="w-64 bg-base-100 border-r border-base-300 flex flex-col min-h-screen">
+                <header class="p-4 border-b border-base-300">
+                    <h2 class="text-lg font-semibold flex items-center gap-2">
+                        <span class="rainbow-hover">Compose Farm</span>
+                        <a href="https://github.com/basnijholt/compose-farm" target="_blank" title="GitHub" class="opacity-50 hover:opacity-100 transition-opacity">
+                            {{ github() }}
+                        </a>
+                    </h2>
+                </header>
+                <nav class="flex-1 overflow-y-auto p-2" hx-get="/partials/sidebar" hx-trigger="load" hx-swap="innerHTML">
+                    <span class="loading loading-spinner loading-sm"></span> Loading...
+                </nav>
+            </aside>
+        </div>
+    </div>
+
+    <!-- Command Palette -->
+    {% include "partials/command_palette.html" %}
+
+    <!-- Scripts - HTMX first -->
+    <script src="https://unpkg.com/htmx.org@2.0.4" data-vendor="htmx.js"></script>
+    <script src="https://cdn.jsdelivr.net/npm/@xterm/xterm@5.5.0/lib/xterm.js" data-vendor="xterm.js"></script>
+    <script src="https://cdn.jsdelivr.net/npm/@xterm/addon-fit@0.10.0/lib/addon-fit.js" data-vendor="xterm-fit.js"></script>
+    <script src="/static/app.js"></script>
+    {% block scripts %}{% endblock %}
+</body>
+</html>
--- a/src/compose_farm/web/templates/console.html
+++ b/src/compose_farm/web/templates/console.html
@@ -0,0 +1,243 @@
+{% extends "base.html" %}
+{% from "partials/components.html" import page_header, collapse %}
+{% from "partials/icons.html" import terminal, file_code, save %}
+{% block title %}Console - Compose Farm{% endblock %}
+
+{% block content %}
+<div class="max-w-6xl">
+    {{ page_header("Console", "Terminal and editor access") }}
+
+    <!-- Host Selector -->
+    <div class="flex items-center gap-4 mb-4">
+        <label class="font-semibold">Host:</label>
+        <select id="console-host-select" class="select select-sm select-bordered">
+            {% for name in hosts %}
+            <option value="{{ name }}">{{ name }}{% if name == local_host %} (local){% endif %}</option>
+            {% endfor %}
+        </select>
+        <button id="console-connect-btn" class="btn btn-sm btn-primary" onclick="connectConsole()">Connect</button>
+        <span id="console-status" class="text-sm opacity-60"></span>
+    </div>
+
+    <!-- Terminal -->
+    {% call collapse("Terminal", checked=True, icon=terminal(), subtitle="Full shell access to selected host") %}
+        <div id="console-terminal" class="w-full bg-base-300 rounded-lg overflow-hidden resize-y" style="height: 384px; min-height: 200px;"></div>
+    {% endcall %}
+
+    <!-- Editor -->
+    {% call collapse("Editor", checked=True, icon=file_code()) %}
+        <div class="flex items-center justify-between mb-2">
+            <div class="flex items-center gap-4">
+                <input type="text" id="console-file-path" class="input input-sm input-bordered w-96" placeholder="Enter file path (e.g., ~/docker-compose.yaml)" value="{{ config_path }}">
+                <button class="btn btn-sm btn-outline" onclick="loadFile()">Open</button>
+            </div>
+            <div class="flex items-center gap-2">
+                <span id="editor-status" class="text-sm opacity-60"></span>
+                <button id="console-save-btn" class="btn btn-sm btn-primary" onclick="saveFile()">{{ save() }} Save</button>
+            </div>
+        </div>
+        <div id="console-editor" class="resize-y overflow-hidden rounded-lg" style="height: 512px; min-height: 200px;"></div>
+    {% endcall %}
+</div>
+
+<script>
+// Use var to allow re-declaration on HTMX navigation
+var consoleTerminal = null;
+var consoleWs = null;
+var consoleEditor = null;
+var currentFilePath = null;
+var currentHost = null;
+
+// Helper to show status with monospace path
+function setEditorStatus(prefix, path) {
+    const statusEl = document.getElementById('editor-status');
+    const escaped = path.replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;');
+    statusEl.innerHTML = `${prefix} <code class="font-mono">${escaped}</code>`;
+}
+
+function connectConsole() {
+    const hostSelect = document.getElementById('console-host-select');
+    const host = hostSelect.value;
+    const statusEl = document.getElementById('console-status');
+    const terminalEl = document.getElementById('console-terminal');
+
+    if (!host) {
+        statusEl.textContent = 'Please select a host';
+        return;
+    }
+
+    currentHost = host;
+
+    // Clean up existing connection
+    if (consoleWs) {
+        consoleWs.close();
+        consoleWs = null;
+    }
+    if (consoleTerminal) {
+        consoleTerminal.dispose();
+        consoleTerminal = null;
+    }
+
+    statusEl.textContent = 'Connecting...';
+
+    // Create WebSocket
+    consoleWs = createWebSocket(`/ws/shell/${host}`);
+
+    // Resize callback - createTerminal's ResizeObserver calls this on container resize
+    const sendSize = (cols, rows) => {
+        if (consoleWs && consoleWs.readyState === WebSocket.OPEN) {
+            consoleWs.send(JSON.stringify({ type: 'resize', cols, rows }));
+        }
+    };
+
+    // Create terminal with resize callback
+    const { term } = createTerminal(terminalEl, { cursorBlink: true }, sendSize);
+    consoleTerminal = term;
+
+    consoleWs.onopen = () => {
+        statusEl.textContent = `Connected to ${host}`;
+        sendSize(term.cols, term.rows);
+        term.focus();
+        // Auto-load the default file once editor is ready
+        const pathInput = document.getElementById('console-file-path');
+        if (pathInput && pathInput.value) {
+            const tryLoad = () => {
+                if (consoleEditor) {
+                    loadFile();
+                } else {
+                    setTimeout(tryLoad, 100);
+                }
+            };
+            tryLoad();
+        }
+    };
+
+    consoleWs.onmessage = (event) => term.write(event.data);
+
+    consoleWs.onclose = () => {
+        statusEl.textContent = 'Disconnected';
+        term.write(`${ANSI.CRLF}${ANSI.DIM}[Connection closed]${ANSI.RESET}${ANSI.CRLF}`);
+    };
+
+    consoleWs.onerror = (error) => {
+        statusEl.textContent = 'Connection error';
+        term.write(`${ANSI.RED}[WebSocket Error]${ANSI.RESET}${ANSI.CRLF}`);
+        console.error('Console WebSocket error:', error);
+    };
+
+    // Send input to WebSocket
+    term.onData((data) => {
+        if (consoleWs && consoleWs.readyState === WebSocket.OPEN) {
+            consoleWs.send(data);
+        }
+    });
+}
+
+function initConsoleEditor() {
+    const editorEl = document.getElementById('console-editor');
+    if (!editorEl || consoleEditor) return;
+
+    loadMonaco(() => {
+        consoleEditor = createEditor(editorEl, '', 'plaintext', { onSave: saveFile });
+    });
+}
+
+async function loadFile() {
+    const pathInput = document.getElementById('console-file-path');
+    const path = pathInput.value.trim();
+    const statusEl = document.getElementById('editor-status');
+
+    if (!path) {
+        statusEl.textContent = 'Enter a file path';
+        return;
+    }
+
+    if (!currentHost) {
+        statusEl.textContent = 'Connect to a host first';
+        return;
+    }
+
+    setEditorStatus('Loading', path + '...');
+
+    try {
+        const response = await fetch(`/api/console/file?host=${encodeURIComponent(currentHost)}&path=${encodeURIComponent(path)}`);
+        const data = await response.json();
+
+        if (!response.ok || !data.success) {
+            statusEl.textContent = data.detail || 'Failed to load file';
+            return;
+        }
+
+        const language = getLanguageFromPath(path);
+
+        if (consoleEditor) {
+            consoleEditor.setValue(data.content);
+            monaco.editor.setModelLanguage(consoleEditor.getModel(), language);
+            currentFilePath = path;  // Only set after content is loaded
+            setEditorStatus('Loaded:', path);
+        } else {
+            statusEl.textContent = 'Editor not ready';
+        }
+    } catch (e) {
+        statusEl.textContent = `Error: ${e.message}`;
+    }
+}
+
+async function saveFile() {
+    const statusEl = document.getElementById('editor-status');
+
+    if (!currentFilePath) {
+        statusEl.textContent = 'No file loaded';
+        return;
+    }
+
+    if (!currentHost) {
+        statusEl.textContent = 'Not connected to a host';
+        return;
+    }
+
+    if (!consoleEditor) {
+        statusEl.textContent = 'Editor not ready';
+        return;
+    }
+
+    setEditorStatus('Saving', currentFilePath + '...');
+
+    try {
+        const content = consoleEditor.getValue();
+        const response = await fetch(`/api/console/file?host=${encodeURIComponent(currentHost)}&path=${encodeURIComponent(currentFilePath)}`, {
+            method: 'PUT',
+            headers: { 'Content-Type': 'text/plain' },
+            body: content
+        });
+        const data = await response.json();
+
+        if (!response.ok || !data.success) {
+            statusEl.textContent = data.detail || 'Failed to save file';
+            return;
+        }
+
+        setEditorStatus('Saved:', currentFilePath);
+    } catch (e) {
+        statusEl.textContent = `Error: ${e.message}`;
+    }
+}
+
+// Initialize editor and auto-connect to first host
+function init() {
+    initConsoleEditor();
+    const hostSelect = document.getElementById('console-host-select');
+    if (hostSelect && hostSelect.options.length > 0) {
+        connectConsole();
+    }
+}
+
+// On HTMX navigation, dependencies (app.js) are already loaded.
+// On hard refresh, this script runs before app.js, so wait for DOMContentLoaded.
+if (typeof createTerminal === 'function') {
+    init();
+} else {
+    document.addEventListener('DOMContentLoaded', init);
+}
+</script>
+{% endblock content %}
--- a/src/compose_farm/web/templates/index.html
+++ b/src/compose_farm/web/templates/index.html
@@ -0,0 +1,73 @@
+{% extends "base.html" %}
+{% from "partials/components.html" import page_header, collapse, stat_card, table, action_btn %}
+{% from "partials/icons.html" import check, refresh_cw, save, settings, server, database %}
+{% block title %}Dashboard - Compose Farm{% endblock %}
+
+{% block content %}
+<div class="max-w-5xl">
+    {{ page_header("Compose Farm", "Cluster overview and management") }}
+
+    <!-- Stats Cards -->
+    {% include "partials/stats.html" %}
+
+    <!-- Global Actions -->
+    <div class="flex flex-wrap gap-2 mb-6">
+        {{ action_btn("Apply", "/api/apply", "primary", "Make reality match config", check()) }}
+        {{ action_btn("Refresh", "/api/refresh", "outline", "Update state from reality", refresh_cw()) }}
+        <button id="save-config-btn" class="btn btn-outline">{{ save() }} Save Config</button>
+    </div>
+
+    {% include "partials/terminal.html" %}
+
+    <!-- Config Error Banner -->
+    <div id="config-error">
+    {% if config_error %}
+        {% include "partials/config_error.html" %}
+    {% endif %}
+    </div>
+
+    <!-- Config Editor -->
+    {% call collapse("Edit Config", badge="compose-farm.yaml", icon=settings(), checked=config_error) %}
+        <div class="editor-wrapper yaml-wrapper">
+            <div id="config-editor" class="yaml-editor" data-content="{{ config_content | e }}" data-save-url="/api/config"></div>
+        </div>
+    {% endcall %}
+
+    <!-- Pending Operations -->
+    {% include "partials/pending.html" %}
+
+    <!-- Services by Host -->
+    {% include "partials/services_by_host.html" %}
+
+    <!-- Hosts Configuration -->
+    {% call collapse("Hosts (" ~ (hosts | length) ~ ")", icon=server()) %}
+        {% call table() %}
+            <thead>
+                <tr>
+                    <th>Name</th>
+                    <th>Address</th>
+                    <th>User</th>
+                    <th>Port</th>
+                </tr>
+            </thead>
+            <tbody>
+                {% for name, host in hosts.items() %}
+                <tr class="hover:bg-base-300">
+                    <td class="font-semibold">{{ name }}</td>
+                    <td><code class="text-sm">{{ host.address }}</code></td>
+                    <td><code class="text-sm">{{ host.user }}</code></td>
+                    <td><code class="text-sm">{{ host.port }}</code></td>
+                </tr>
+                {% endfor %}
+            </tbody>
+        {% endcall %}
+    {% endcall %}
+
+    <!-- State Viewer -->
+    {% call collapse("Raw State", badge="compose-farm-state.yaml", icon=database()) %}
+        <div class="editor-wrapper viewer-wrapper">
+            <div id="state-viewer" class="yaml-viewer" data-content="{{ state_content | e }}"></div>
+        </div>
+    {% endcall %}
+</div>
+{% endblock %}
--- a/src/compose_farm/web/templates/partials/command_palette.html
+++ b/src/compose_farm/web/templates/partials/command_palette.html
@@ -0,0 +1,35 @@
+{% from "partials/icons.html" import search, play, square, rotate_cw, cloud_download, refresh_cw, file_text, check, home, terminal, box %}
+
+<!-- Icons for command palette (referenced by JS) -->
+<template id="cmd-icons">
+    <span data-icon="play">{{ play() }}</span>
+    <span data-icon="square">{{ square() }}</span>
+    <span data-icon="rotate_cw">{{ rotate_cw() }}</span>
+    <span data-icon="cloud_download">{{ cloud_download() }}</span>
+    <span data-icon="refresh_cw">{{ refresh_cw() }}</span>
+    <span data-icon="file_text">{{ file_text() }}</span>
+    <span data-icon="check">{{ check() }}</span>
+    <span data-icon="home">{{ home() }}</span>
+    <span data-icon="terminal">{{ terminal() }}</span>
+    <span data-icon="box">{{ box() }}</span>
+</template>
+<dialog id="cmd-palette" class="modal">
+    <div class="modal-box max-w-lg p-0">
+        <label class="input input-lg bg-base-100 border-0 border-b border-base-300 w-full rounded-none rounded-t-box sticky top-0 z-10 focus-within:outline-none">
+            {{ search(20) }}
+            <input type="text" id="cmd-input" class="grow" placeholder="Type a command..." autocomplete="off" />
+            <kbd class="kbd kbd-sm opacity-50">esc</kbd>
+        </label>
+        <div id="cmd-list" class="flex flex-col p-2 max-h-80 overflow-y-auto">
+            <!-- Populated by JS -->
+        </div>
+    </div>
+    <form method="dialog" class="modal-backdrop"><button>close</button></form>
+</dialog>
+
+<!-- Floating button to open command palette -->
+<button id="cmd-fab" class="fixed bottom-6 right-6 z-50" title="Command Palette (⌘K)">
+    <div class="cmd-fab-inner">
+        <span>⌘ + K</span>
+    </div>
+</button>
--- a/src/compose_farm/web/templates/partials/components.html
+++ b/src/compose_farm/web/templates/partials/components.html
@@ -0,0 +1,53 @@
+{# Page header with title and optional subtitle (supports HTML) #}
+{% macro page_header(title, subtitle=None) %}
+<div class="mb-6">
+    <h1 class="text-3xl font-bold rainbow-hover">{{ title }}</h1>
+    {% if subtitle %}
+    <p class="text-base-content/60 text-lg">{{ subtitle | safe }}</p>
+    {% endif %}
+</div>
+{% endmacro %}
+
+{# Collapsible section #}
+{% macro collapse(title, id=None, checked=False, badge=None, icon=None, subtitle=None) %}
+<div class="collapse collapse-arrow bg-base-100 shadow mb-4">
+    <input type="checkbox" {% if id %}id="{{ id }}"{% endif %} {% if checked %}checked{% endif %} />
+    <div class="collapse-title font-semibold flex items-center gap-2">
+        {% if icon %}{{ icon }}{% endif %}{{ title }}
+        {% if badge %}<code class="text-xs ml-2 opacity-60">{{ badge }}</code>{% endif %}
+        {% if subtitle %}<span class="text-xs opacity-50 font-normal">{{ subtitle }}</span>{% endif %}
+    </div>
+    <div class="collapse-content">
+        {{ caller() }}
+    </div>
+</div>
+{% endmacro %}
+
+{# Action button with htmx #}
+{% macro action_btn(label, url, style="outline", title=None, icon=None) %}
+<button hx-post="{{ url }}"
+        hx-swap="none"
+        class="btn btn-{{ style }}"
+        {% if title %}title="{{ title }}"{% endif %}>
+    {% if icon %}{{ icon }}{% endif %}{{ label }}
+</button>
+{% endmacro %}
+
+{# Stat card for dashboard #}
+{% macro stat_card(label, value, color=None, icon=None) %}
+<div class="card bg-base-100 shadow">
+    <div class="card-body items-center text-center">
+        <h2 class="card-title text-base-content/60 text-sm gap-1">{% if icon %}{{ icon }}{% endif %}{{ label }}</h2>
+        <p class="text-4xl font-bold {% if color %}text-{{ color }}{% endif %}">{{ value }}</p>
+    </div>
+</div>
+{% endmacro %}
+
+{# Data table wrapper #}
+{% macro table() %}
+<div class="overflow-x-auto">
+    <table class="table table-zebra">
+        {{ caller() }}
+    </table>
+</div>
+{% endmacro %}
--- a/src/compose_farm/web/templates/partials/config_error.html
+++ b/src/compose_farm/web/templates/partials/config_error.html
@@ -0,0 +1,8 @@
+{% from "partials/icons.html" import alert_triangle %}
+<div class="alert alert-error mb-4">
+    {{ alert_triangle(size=20) }}
+    <div>
+        <h3 class="font-bold">Configuration Error</h3>
+        <div class="text-sm">{{ config_error }}</div>
+    </div>
+</div>
--- a/src/compose_farm/web/templates/partials/containers.html
+++ b/src/compose_farm/web/templates/partials/containers.html
@@ -0,0 +1,27 @@
+{# Container list for a service on a single host #}
+{% from "partials/icons.html" import terminal %}
+{% macro container_row(service, container, host) %}
+<div class="flex items-center gap-2 mb-2">
+    {% if container.State == "running" %}
+        <span class="badge badge-success">running</span>
+    {% elif container.State == "unknown" %}
+        <span class="badge badge-ghost"><span class="loading loading-spinner loading-xs"></span></span>
+    {% else %}
+        <span class="badge badge-warning">{{ container.State }}</span>
+    {% endif %}
+    <code class="text-sm flex-1">{{ container.Name }}</code>
+    <button class="btn btn-sm btn-outline"
+            onclick="initExecTerminal('{{ service }}', '{{ container.Name }}', '{{ host }}')">
+        {{ terminal() }} Shell
+    </button>
+</div>
+{% endmacro %}
+
+{% macro host_containers(service, host, containers, show_header=False) %}
+{% if show_header %}
+<div class="font-semibold text-sm mt-3 mb-1">{{ host }}</div>
+{% endif %}
+{% for container in containers %}
+{{ container_row(service, container, host) }}
+{% endfor %}
+{% endmacro %}
--- a/src/compose_farm/web/templates/partials/icons.html
+++ b/src/compose_farm/web/templates/partials/icons.html
@@ -0,0 +1,148 @@
+{# Lucide-style icons (https://lucide.dev) - 24x24 viewBox, 2px stroke, round caps #}
+
+{# Brand icons #}
+{% macro github(size=16) %}
+<svg height="{{ size }}" width="{{ size }}" viewBox="0 0 16 16" fill="currentColor"><path d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"/></svg>
+{% endmacro %}
+
+{# UI icons #}
+{% macro hamburger(size=20) %}
+<svg xmlns="http://www.w3.org/2000/svg" width="{{ size }}" height="{{ size }}" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
+    <line x1="4" x2="20" y1="6" y2="6"/><line x1="4" x2="20" y1="12" y2="12"/><line x1="4" x2="20" y1="18" y2="18"/>
+</svg>
+{% endmacro %}
+
+{% macro command(size=16) %}
+<svg xmlns="http://www.w3.org/2000/svg" width="{{ size }}" height="{{ size }}" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
+    <path d="M15 6v12a3 3 0 1 0 3-3H6a3 3 0 1 0 3 3V6a3 3 0 1 0-3 3h12a3 3 0 1 0-3-3"/>
+</svg>
+{% endmacro %}
+
+{# Action icons #}
+{% macro play(size=16) %}
+<svg xmlns="http://www.w3.org/2000/svg" width="{{ size }}" height="{{ size }}" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
+    <polygon points="6 3 20 12 6 21 6 3"/>
+</svg>
+{% endmacro %}
+
+{% macro square(size=16) %}
+<svg xmlns="http://www.w3.org/2000/svg" width="{{ size }}" height="{{ size }}" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
+    <rect width="14" height="14" x="5" y="5" rx="2"/>
+</svg>
+{% endmacro %}
+
+{% macro rotate_cw(size=16) %}
+<svg xmlns="http://www.w3.org/2000/svg" width="{{ size }}" height="{{ size }}" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
+    <path d="M21 12a9 9 0 1 1-9-9c2.52 0 4.93 1 6.74 2.74L21 8"/><path d="M21 3v5h-5"/>
+</svg>
+{% endmacro %}
+
+{% macro download(size=16) %}
+<svg xmlns="http://www.w3.org/2000/svg" width="{{ size }}" height="{{ size }}" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
+    <path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"/><polyline points="7 10 12 15 17 10"/><line x1="12" x2="12" y1="15" y2="3"/>
+</svg>
+{% endmacro %}
+
+{% macro cloud_download(size=16) %}
+<svg xmlns="http://www.w3.org/2000/svg" width="{{ size }}" height="{{ size }}" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
+    <path d="M12 13v8l-4-4"/><path d="m12 21 4-4"/><path d="M4.393 15.269A7 7 0 1 1 15.71 8h1.79a4.5 4.5 0 0 1 2.436 8.284"/>
+</svg>
+{% endmacro %}
+
+{% macro file_text(size=16) %}
+<svg xmlns="http://www.w3.org/2000/svg" width="{{ size }}" height="{{ size }}" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
+    <path d="M15 2H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V7Z"/><path d="M14 2v4a2 2 0 0 0 2 2h4"/><path d="M10 9H8"/><path d="M16 13H8"/><path d="M16 17H8"/>
+</svg>
+{% endmacro %}
+
+{% macro save(size=16) %}
+<svg xmlns="http://www.w3.org/2000/svg" width="{{ size }}" height="{{ size }}" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
+    <path d="M15.2 3a2 2 0 0 1 1.4.6l3.8 3.8a2 2 0 0 1 .6 1.4V19a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V5a2 2 0 0 1 2-2z"/><path d="M17 21v-7a1 1 0 0 0-1-1H8a1 1 0 0 0-1 1v7"/><path d="M7 3v4a1 1 0 0 0 1 1h7"/>
+</svg>
+{% endmacro %}
+
+{% macro check(size=16) %}
+<svg xmlns="http://www.w3.org/2000/svg" width="{{ size }}" height="{{ size }}" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
+    <path d="M20 6 9 17l-5-5"/>
+</svg>
+{% endmacro %}
+
+{% macro refresh_cw(size=16) %}
+<svg xmlns="http://www.w3.org/2000/svg" width="{{ size }}" height="{{ size }}" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
+    <path d="M3 12a9 9 0 0 1 9-9 9.75 9.75 0 0 1 6.74 2.74L21 8"/><path d="M21 3v5h-5"/><path d="M21 12a9 9 0 0 1-9 9 9.75 9.75 0 0 1-6.74-2.74L3 16"/><path d="M8 16H3v5"/>
+</svg>
+{% endmacro %}
+
+{% macro terminal(size=16) %}
+<svg xmlns="http://www.w3.org/2000/svg" width="{{ size }}" height="{{ size }}" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
+    <polyline points="4 17 10 11 4 5"/><line x1="12" x2="20" y1="19" y2="19"/>
+</svg>
+{% endmacro %}
+
+{# Stats/navigation icons #}
+{% macro server(size=16) %}
+<svg xmlns="http://www.w3.org/2000/svg" width="{{ size }}" height="{{ size }}" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
+    <rect width="20" height="8" x="2" y="2" rx="2" ry="2"/><rect width="20" height="8" x="2" y="14" rx="2" ry="2"/><line x1="6" x2="6.01" y1="6" y2="6"/><line x1="6" x2="6.01" y1="18" y2="18"/>
+</svg>
+{% endmacro %}
+
+{% macro layers(size=16) %}
+<svg xmlns="http://www.w3.org/2000/svg" width="{{ size }}" height="{{ size }}" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
+    <path d="m12.83 2.18a2 2 0 0 0-1.66 0L2.6 6.08a1 1 0 0 0 0 1.83l8.58 3.91a2 2 0 0 0 1.66 0l8.58-3.9a1 1 0 0 0 0-1.83Z"/><path d="m22 17.65-9.17 4.16a2 2 0 0 1-1.66 0L2 17.65"/><path d="m22 12.65-9.17 4.16a2 2 0 0 1-1.66 0L2 12.65"/>
+</svg>
+{% endmacro %}
+
+{% macro circle_check(size=16) %}
+<svg xmlns="http://www.w3.org/2000/svg" width="{{ size }}" height="{{ size }}" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
+    <circle cx="12" cy="12" r="10"/><path d="m9 12 2 2 4-4"/>
+</svg>
+{% endmacro %}
+
+{% macro circle_x(size=16) %}
+<svg xmlns="http://www.w3.org/2000/svg" width="{{ size }}" height="{{ size }}" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
+    <circle cx="12" cy="12" r="10"/><path d="m15 9-6 6"/><path d="m9 9 6 6"/>
+</svg>
+{% endmacro %}
+
+{% macro home(size=16) %}
+<svg xmlns="http://www.w3.org/2000/svg" width="{{ size }}" height="{{ size }}" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
+    <path d="M15 21v-8a1 1 0 0 0-1-1h-4a1 1 0 0 0-1 1v8"/><path d="M3 10a2 2 0 0 1 .709-1.528l7-5.999a2 2 0 0 1 2.582 0l7 5.999A2 2 0 0 1 21 10v9a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2z"/>
+</svg>
+{% endmacro %}
+
+{% macro box(size=16) %}
+<svg xmlns="http://www.w3.org/2000/svg" width="{{ size }}" height="{{ size }}" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
+    <path d="M21 8a2 2 0 0 0-1-1.73l-7-4a2 2 0 0 0-2 0l-7 4A2 2 0 0 0 3 8v8a2 2 0 0 0 1 1.73l7 4a2 2 0 0 0 2 0l7-4A2 2 0 0 0 21 16Z"/><path d="m3.3 7 8.7 5 8.7-5"/><path d="M12 22V12"/>
+</svg>
+{% endmacro %}
+
+{# Section icons #}
+{% macro settings(size=16) %}
+<svg xmlns="http://www.w3.org/2000/svg" width="{{ size }}" height="{{ size }}" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
+    <path d="M12.22 2h-.44a2 2 0 0 0-2 2v.18a2 2 0 0 1-1 1.73l-.43.25a2 2 0 0 1-2 0l-.15-.08a2 2 0 0 0-2.73.73l-.22.38a2 2 0 0 0 .73 2.73l.15.1a2 2 0 0 1 1 1.72v.51a2 2 0 0 1-1 1.74l-.15.09a2 2 0 0 0-.73 2.73l.22.38a2 2 0 0 0 2.73.73l.15-.08a2 2 0 0 1 2 0l.43.25a2 2 0 0 1 1 1.73V20a2 2 0 0 0 2 2h.44a2 2 0 0 0 2-2v-.18a2 2 0 0 1 1-1.73l.43-.25a2 2 0 0 1 2 0l.15.08a2 2 0 0 0 2.73-.73l.22-.39a2 2 0 0 0-.73-2.73l-.15-.08a2 2 0 0 1-1-1.74v-.5a2 2 0 0 1 1-1.74l.15-.09a2 2 0 0 0 .73-2.73l-.22-.38a2 2 0 0 0-2.73-.73l-.15.08a2 2 0 0 1-2 0l-.43-.25a2 2 0 0 1-1-1.73V4a2 2 0 0 0-2-2z"/><circle cx="12" cy="12" r="3"/>
+</svg>
+{% endmacro %}
+
+{% macro file_code(size=16) %}
+<svg xmlns="http://www.w3.org/2000/svg" width="{{ size }}" height="{{ size }}" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
+    <path d="M10 12.5 8 15l2 2.5"/><path d="m14 12.5 2 2.5-2 2.5"/><path d="M14 2v4a2 2 0 0 0 2 2h4"/><path d="M15 2H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V7z"/>
+</svg>
+{% endmacro %}
+
+{% macro database(size=16) %}
+<svg xmlns="http://www.w3.org/2000/svg" width="{{ size }}" height="{{ size }}" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
+    <ellipse cx="12" cy="5" rx="9" ry="3"/><path d="M3 5V19A9 3 0 0 0 21 19V5"/><path d="M3 12A9 3 0 0 0 21 12"/>
+</svg>
+{% endmacro %}
+
+{% macro search(size=16) %}
+<svg xmlns="http://www.w3.org/2000/svg" width="{{ size }}" height="{{ size }}" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
+    <circle cx="11" cy="11" r="8"/><path d="m21 21-4.3-4.3"/>
+</svg>
+{% endmacro %}
+
+{% macro alert_triangle(size=16) %}
+<svg xmlns="http://www.w3.org/2000/svg" width="{{ size }}" height="{{ size }}" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
+    <path d="m21.73 18-8-14a2 2 0 0 0-3.48 0l-8 14A2 2 0 0 0 4 21h16a2 2 0 0 0 1.73-3"/><path d="M12 9v4"/><path d="M12 17h.01"/>
+</svg>
+{% endmacro %}
--- a/src/compose_farm/web/templates/partials/pending.html
+++ b/src/compose_farm/web/templates/partials/pending.html
@@ -0,0 +1,38 @@
+{% from "partials/components.html" import collapse %}
+<div id="pending-operations">
+{% if orphaned or migrations or not_started %}
+{% call collapse("Pending Operations", id="pending-collapse", checked=expanded|default(true)) %}
+    {% if orphaned %}
+    <h4 class="font-semibold mt-2 mb-1">Orphaned Services (will be stopped)</h4>
+    <ul class="list-disc list-inside mb-4">
+        {% for svc, host in orphaned.items() %}
+        <li><a href="/service/{{ svc }}" class="badge badge-warning hover:badge-primary">{{ svc }}</a> on {{ host }}</li>
+        {% endfor %}
+    </ul>
+    {% endif %}
+
+    {% if migrations %}
+    <h4 class="font-semibold mt-2 mb-1">Services Needing Migration</h4>
+    <ul class="list-disc list-inside mb-4">
+        {% for svc in migrations %}
+        <li><a href="/service/{{ svc }}" class="badge badge-info hover:badge-primary">{{ svc }}</a></li>
+        {% endfor %}
+    </ul>
+    {% endif %}
+
+    {% if not_started %}
+    <h4 class="font-semibold mt-2 mb-1">Services Not Started</h4>
+    <ul class="menu menu-horizontal bg-base-200 rounded-box mb-2">
+        {% for svc in not_started | sort %}
+        <li><a href="/service/{{ svc }}">{{ svc }}</a></li>
+        {% endfor %}
+    </ul>
+    {% endif %}
+{% endcall %}
+{% else %}
+<div role="alert" class="alert alert-success mb-4">
+    <svg xmlns="http://www.w3.org/2000/svg" class="stroke-current shrink-0 h-6 w-6" fill="none" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 12l2 2 4-4m6 2a9 9 0 11-18 0 9 9 0 0118 0z" /></svg>
+    <span>All services are in sync with configuration.</span>
+</div>
+{% endif %}
+</div>
--- a/src/compose_farm/web/templates/partials/services_by_host.html
+++ b/src/compose_farm/web/templates/partials/services_by_host.html
@@ -0,0 +1,41 @@
+{% from "partials/components.html" import collapse %}
+{% from "partials/icons.html" import layers, search %}
+<div id="services-by-host">
+{% call collapse("Services by Host", id="services-by-host-collapse", checked=expanded|default(true), icon=layers()) %}
+    <div class="flex flex-wrap gap-2 mb-4 items-center">
+        <label class="input input-sm input-bordered flex items-center gap-2 bg-base-200">
+            {{ search() }}<input type="text" id="sbh-filter" class="w-32" placeholder="Filter..." onkeyup="sbhFilter()" />
+        </label>
+        <select id="sbh-host-select" class="select select-sm select-bordered bg-base-200" onchange="sbhFilter()">
+            <option value="">All hosts</option>
+            {% for h in services_by_host.keys() | sort %}<option value="{{ h }}">{{ h }}</option>{% endfor %}
+        </select>
+    </div>
+    {% for host_name, host_services in services_by_host.items() | sort %}
+    <div class="sbh-group" data-h="{{ host_name }}">
+        <h4 class="font-semibold mt-3 mb-1">{{ host_name }}{% if host_name in hosts %}<code class="text-xs ml-2 opacity-60">{{ hosts[host_name].address }}</code>{% endif %}</h4>
+        <ul class="menu menu-horizontal bg-base-200 rounded-box mb-2 flex-wrap">
+            {% for svc in host_services | sort %}<li data-s="{{ svc | lower }}"><a href="/service/{{ svc }}">{{ svc }}</a></li>{% endfor %}
+        </ul>
+    </div>
+    {% else %}
+    <p class="text-base-content/60 italic">No services currently running.</p>
+    {% endfor %}
+    <script>
+    function sbhFilter() {
+        const q = (document.getElementById('sbh-filter')?.value || '').toLowerCase();
+        const h = document.getElementById('sbh-host-select')?.value || '';
+        document.querySelectorAll('.sbh-group').forEach(g => {
+            if (h && g.dataset.h !== h) { g.hidden = true; return; }
+            let n = 0;
+            g.querySelectorAll('li[data-s]').forEach(li => {
+                const show = !q || li.dataset.s.includes(q);
+                li.hidden = !show;
+                if (show) n++;
+            });
+            g.hidden = !n;
+        });
+    }
+    </script>
+{% endcall %}
+</div>
--- a/src/compose_farm/web/templates/partials/sidebar.html
+++ b/src/compose_farm/web/templates/partials/sidebar.html
@@ -0,0 +1,46 @@
+{% from "partials/icons.html" import home, search, terminal %}
+<!-- Navigation Links -->
+<div class="mb-4">
+    <ul class="menu" hx-boost="true" hx-target="#main-content" hx-select="#main-content" hx-swap="outerHTML">
+        <li><a href="/" class="font-semibold">{{ home() }} Dashboard</a></li>
+        <li><a href="/console" class="font-semibold">{{ terminal() }} Console</a></li>
+    </ul>
+</div>
+
+<!-- Services Section -->
+<div class="mb-4">
+    <h4 class="text-xs uppercase tracking-wide text-base-content/60 px-3 py-1">Services <span class="opacity-50" id="sidebar-count">({{ services | length }})</span></h4>
+    <div class="px-2 mb-2 flex flex-col gap-1">
+        <label class="input input-xs flex items-center gap-2 bg-base-200">
+            {{ search(14) }}<input type="text" id="sidebar-filter" placeholder="Filter..." onkeyup="sidebarFilter()" />
+        </label>
+        <select id="sidebar-host-select" class="select select-xs bg-base-200 w-full" onchange="sidebarFilter()">
+            <option value="">All hosts</option>
+            {% for h in hosts %}<option value="{{ h }}">{{ h }}</option>{% endfor %}
+        </select>
+    </div>
+    <ul class="menu menu-sm" id="sidebar-services" hx-boost="true" hx-target="#main-content" hx-select="#main-content" hx-swap="outerHTML">
+        {% for service in services %}
+        <li data-svc="{{ service | lower }}" data-h="{{ service_hosts.get(service, '') }}">
+            <a href="/service/{{ service }}" class="flex items-center gap-2">
+                {% if service in state %}<span class="status status-success" title="In state file"></span>
+                {% else %}<span class="status status-neutral" title="Not in state file"></span>{% endif %}
+                {{ service }}
+            </a>
+        </li>
+        {% endfor %}
+    </ul>
+</div>
+<script>
+function sidebarFilter() {
+    const q = (document.getElementById('sidebar-filter')?.value || '').toLowerCase();
+    const h = document.getElementById('sidebar-host-select')?.value || '';
+    let n = 0;
+    document.querySelectorAll('#sidebar-services li').forEach(li => {
+        const show = (!q || li.dataset.svc.includes(q)) && (!h || !li.dataset.h || li.dataset.h === h);
+        li.hidden = !show;
+        if (show) n++;
+    });
+    document.getElementById('sidebar-count').textContent = '(' + n + ')';
+}
+</script>
--- a/src/compose_farm/web/templates/partials/stats.html
+++ b/src/compose_farm/web/templates/partials/stats.html
@@ -0,0 +1,8 @@
+{% from "partials/components.html" import stat_card %}
+{% from "partials/icons.html" import server, layers, circle_check, circle_x %}
+<div id="stats-cards" class="grid grid-cols-2 md:grid-cols-4 gap-4 mb-6">
+    {{ stat_card("Hosts", hosts | length, icon=server()) }}
+    {{ stat_card("Services", services | length, icon=layers()) }}
+    {{ stat_card("Running", running_count, "success", circle_check()) }}
+    {{ stat_card("Stopped", stopped_count, icon=circle_x()) }}
+</div>
--- a/src/compose_farm/web/templates/partials/terminal.html
+++ b/src/compose_farm/web/templates/partials/terminal.html
@@ -0,0 +1,14 @@
+{% from "partials/icons.html" import terminal %}
+<!-- Shared Terminal Component -->
+<div class="collapse collapse-arrow bg-base-100 shadow mb-4" id="terminal-collapse">
+    <input type="checkbox" id="terminal-toggle" />
+    <div class="collapse-title font-medium flex items-center gap-2">
+        {{ terminal() }} Terminal Output
+        <span id="terminal-spinner" class="loading loading-spinner loading-sm hidden"></span>
+    </div>
+    <div class="collapse-content">
+        <div id="terminal-container" class="bg-[#1a1a2e] rounded-lg h-[300px] border border-white/10 resize-y overflow-hidden">
+            <div id="terminal-output" class="h-full"></div>
+        </div>
+    </div>
+</div>
--- a/src/compose_farm/web/templates/service.html
+++ b/src/compose_farm/web/templates/service.html
@@ -0,0 +1,67 @@
+{% extends "base.html" %}
+{% from "partials/components.html" import collapse, action_btn %}
+{% from "partials/icons.html" import play, square, rotate_cw, download, cloud_download, file_text, save, file_code, terminal, settings %}
+{% block title %}{{ name }} - Compose Farm{% endblock %}
+
+{% block content %}
+<div class="max-w-5xl">
+    <div class="mb-6">
+        <h1 class="text-3xl font-bold rainbow-hover">{{ name }}</h1>
+        <div class="flex flex-wrap items-center gap-2 mt-2">
+            {% if current_host %}
+                <span class="badge badge-success">Running on {{ current_host }}</span>
+            {% else %}
+                <span class="badge badge-neutral">Not running</span>
+            {% endif %}
+            <span class="badge badge-outline">{{ hosts | join(', ') }}</span>
+        </div>
+    </div>
+
+    <!-- Action Buttons -->
+    <div class="flex flex-wrap gap-2 mb-6">
+        <!-- Lifecycle -->
+        {{ action_btn("Up", "/api/service/" ~ name ~ "/up", "primary", "Start service (docker compose up -d)", play()) }}
+        {{ action_btn("Down", "/api/service/" ~ name ~ "/down", "outline", "Stop service (docker compose down)", square()) }}
+        {{ action_btn("Restart", "/api/service/" ~ name ~ "/restart", "secondary", "Restart service (down + up)", rotate_cw()) }}
+        {{ action_btn("Update", "/api/service/" ~ name ~ "/update", "accent", "Update to latest (pull + build + down + up)", download()) }}
+
+        <div class="divider divider-horizontal mx-0"></div>
+
+        <!-- Other -->
+        {{ action_btn("Pull", "/api/service/" ~ name ~ "/pull", "outline", "Pull latest images (no restart)", cloud_download()) }}
+        {{ action_btn("Logs", "/api/service/" ~ name ~ "/logs", "outline", "Show recent logs", file_text()) }}
+        <button id="save-btn" class="btn btn-outline">{{ save() }} Save All</button>
+    </div>
+
+    {% call collapse("Compose File", badge=compose_path, icon=file_code()) %}
+        <div class="editor-wrapper yaml-wrapper">
+            <div id="compose-editor" class="yaml-editor" data-content="{{ compose_content | e }}" data-save-url="/api/service/{{ name }}/compose"></div>
+        </div>
+    {% endcall %}
+
+    {% call collapse(".env File", badge=env_path, icon=settings()) %}
+        <div class="editor-wrapper env-wrapper">
+            <div id="env-editor" class="env-editor" data-content="{{ env_content | e }}" data-save-url="/api/service/{{ name }}/env"></div>
+        </div>
+    {% endcall %}
+
+    {% include "partials/terminal.html" %}
+
+    <!-- Exec Terminal -->
+    {% if current_host %}
+    {% call collapse("Container Shell", id="exec-collapse", checked=True, icon=terminal()) %}
+        <div id="containers-list" class="mb-4"
+             hx-get="/api/service/{{ name }}/containers"
+             hx-trigger="load"
+             hx-target="this"
+             hx-select="unset"
+             hx-swap="innerHTML">
+            <span class="loading loading-spinner loading-sm"></span> Loading containers...
+        </div>
+        <div id="exec-terminal-container" class="bg-[#1a1a2e] rounded-lg h-[400px] border border-white/10 hidden">
+            <div id="exec-terminal" class="h-full"></div>
+        </div>
+    {% endcall %}
+    {% endif %}
+</div>
+{% endblock %}
--- a/src/compose_farm/web/ws.py
+++ b/src/compose_farm/web/ws.py
@@ -0,0 +1,290 @@
+"""WebSocket handler for terminal streaming."""
+
+from __future__ import annotations
+
+import asyncio
+import contextlib
+import fcntl
+import json
+import os
+import pty
+import signal
+import struct
+import termios
+from typing import TYPE_CHECKING, Any
+
+import asyncssh
+from fastapi import APIRouter, WebSocket, WebSocketDisconnect
+
+from compose_farm.executor import is_local, ssh_connect_kwargs
+from compose_farm.web.deps import get_config
+from compose_farm.web.streaming import CRLF, DIM, GREEN, RED, RESET, tasks
+
+if TYPE_CHECKING:
+    from compose_farm.config import Host
+
+router = APIRouter()
+
+
+def _parse_resize(msg: str) -> tuple[int, int] | None:
+    """Parse a resize message, return (cols, rows) or None if not a resize."""
+    try:
+        data = json.loads(msg)
+        if data.get("type") == "resize":
+            return int(data["cols"]), int(data["rows"])
+    except (json.JSONDecodeError, KeyError, TypeError, ValueError):
+        pass
+    return None
+
+
+def _resize_pty(
+    fd: int, cols: int, rows: int, proc: asyncio.subprocess.Process | None = None
+) -> None:
+    """Resize a local PTY and send SIGWINCH to the process."""
+    winsize = struct.pack("HHHH", rows, cols, 0, 0)
+    fcntl.ioctl(fd, termios.TIOCSWINSZ, winsize)
+    # Explicitly send SIGWINCH so docker exec forwards it to the container
+    if proc and proc.pid:
+        os.kill(proc.pid, signal.SIGWINCH)
+
+
+async def _bridge_websocket_to_fd(
+    websocket: WebSocket,
+    master_fd: int,
+    proc: asyncio.subprocess.Process,
+) -> None:
+    """Bridge WebSocket to a local PTY file descriptor."""
+    loop = asyncio.get_event_loop()
+
+    async def read_output() -> None:
+        while proc.returncode is None:
+            try:
+                data = await loop.run_in_executor(None, lambda: os.read(master_fd, 4096))
+            except BlockingIOError:
+                await asyncio.sleep(0.01)
+                continue
+            except OSError:
+                break
+            if not data:
+                break
+            await websocket.send_text(data.decode("utf-8", errors="replace"))
+
+    read_task = asyncio.create_task(read_output())
+
+    try:
+        while proc.returncode is None:
+            try:
+                msg = await asyncio.wait_for(websocket.receive_text(), timeout=0.1)
+            except TimeoutError:
+                continue
+            if size := _parse_resize(msg):
+                _resize_pty(master_fd, *size, proc)
+            else:
+                os.write(master_fd, msg.encode())
+    finally:
+        read_task.cancel()
+        os.close(master_fd)
+        if proc.returncode is None:
+            proc.terminate()
+
+
+async def _bridge_websocket_to_ssh(
+    websocket: WebSocket,
+    proc: Any,  # asyncssh.SSHClientProcess
+) -> None:
+    """Bridge WebSocket to an SSH process with PTY."""
+    assert proc.stdout is not None
+    assert proc.stdin is not None
+
+    async def read_stdout() -> None:
+        while proc.returncode is None:
+            data = await proc.stdout.read(4096)
+            if not data:
+                break
+            text = data if isinstance(data, str) else data.decode()
+            await websocket.send_text(text)
+
+    read_task = asyncio.create_task(read_stdout())
+
+    try:
+        while proc.returncode is None:
+            try:
+                msg = await asyncio.wait_for(websocket.receive_text(), timeout=0.1)
+            except TimeoutError:
+                continue
+            if size := _parse_resize(msg):
+                proc.change_terminal_size(*size)
+            else:
+                proc.stdin.write(msg)
+    finally:
+        read_task.cancel()
+        proc.terminate()
+
+
+def _make_controlling_tty(slave_fd: int) -> None:
+    """Set up the slave PTY as the controlling terminal for the child process."""
+    # Create a new session
+    os.setsid()
+    # Make the slave fd the controlling terminal
+    fcntl.ioctl(slave_fd, termios.TIOCSCTTY, 0)
+
+
+async def _run_local_exec(websocket: WebSocket, exec_cmd: str) -> None:
+    """Run docker exec locally with PTY."""
+    master_fd, slave_fd = pty.openpty()
+
+    proc = await asyncio.create_subprocess_shell(
+        exec_cmd,
+        stdin=slave_fd,
+        stdout=slave_fd,
+        stderr=slave_fd,
+        close_fds=True,
+        preexec_fn=lambda: _make_controlling_tty(slave_fd),
+        start_new_session=False,  # We handle setsid in preexec_fn
+    )
+    os.close(slave_fd)
+
+    # Set non-blocking
+    flags = fcntl.fcntl(master_fd, fcntl.F_GETFL)
+    fcntl.fcntl(master_fd, fcntl.F_SETFL, flags | os.O_NONBLOCK)
+
+    await _bridge_websocket_to_fd(websocket, master_fd, proc)
+
+
+async def _run_remote_exec(
+    websocket: WebSocket, host: Host, exec_cmd: str, *, agent_forwarding: bool = False
+) -> None:
+    """Run docker exec on remote host via SSH with PTY."""
+    # ssh_connect_kwargs includes agent_path and client_keys fallback
+    async with asyncssh.connect(
+        **ssh_connect_kwargs(host),
+        agent_forwarding=agent_forwarding,
+    ) as conn:
+        proc: asyncssh.SSHClientProcess[Any] = await conn.create_process(
+            exec_cmd,
+            term_type="xterm-256color",
+            term_size=(80, 24),
+        )
+        async with proc:
+            await _bridge_websocket_to_ssh(websocket, proc)
+
+
+async def _run_exec_session(
+    websocket: WebSocket,
+    container: str,
+    host_name: str,
+) -> None:
+    """Run an interactive docker exec session over WebSocket."""
+    config = get_config()
+    host = config.hosts.get(host_name)
+    if not host:
+        await websocket.send_text(f"{RED}Host '{host_name}' not found{RESET}{CRLF}")
+        return
+
+    exec_cmd = f"docker exec -it {container} /bin/sh -c 'command -v bash >/dev/null && exec bash || exec sh'"
+
+    if is_local(host):
+        await _run_local_exec(websocket, exec_cmd)
+    else:
+        await _run_remote_exec(websocket, host, exec_cmd)
+
+
+@router.websocket("/ws/exec/{service}/{container}/{host}")
+async def exec_websocket(
+    websocket: WebSocket,
+    service: str,  # noqa: ARG001
+    container: str,
+    host: str,
+) -> None:
+    """WebSocket endpoint for interactive container exec."""
+    await websocket.accept()
+
+    try:
+        await websocket.send_text(f"{DIM}[Connecting to {container} on {host}...]{RESET}{CRLF}")
+        await _run_exec_session(websocket, container, host)
+        await websocket.send_text(f"{CRLF}{DIM}[Disconnected]{RESET}{CRLF}")
+    except WebSocketDisconnect:
+        pass
+    except Exception as e:
+        with contextlib.suppress(Exception):
+            await websocket.send_text(f"{RED}Error: {e}{RESET}{CRLF}")
+    finally:
+        with contextlib.suppress(Exception):
+            await websocket.close()
+
+
+async def _run_shell_session(
+    websocket: WebSocket,
+    host_name: str,
+) -> None:
+    """Run an interactive shell session on a host over WebSocket."""
+    config = get_config()
+    host = config.hosts.get(host_name)
+    if not host:
+        await websocket.send_text(f"{RED}Host '{host_name}' not found{RESET}{CRLF}")
+        return
+
+    # Start interactive shell in home directory (avoid login shell to prevent job control warnings)
+    shell_cmd = "cd ~ && exec bash -i 2>/dev/null || exec sh -i"
+
+    if is_local(host):
+        await _run_local_exec(websocket, shell_cmd)
+    else:
+        await _run_remote_exec(websocket, host, shell_cmd, agent_forwarding=True)
+
+
+@router.websocket("/ws/shell/{host}")
+async def shell_websocket(
+    websocket: WebSocket,
+    host: str,
+) -> None:
+    """WebSocket endpoint for interactive host shell access."""
+    await websocket.accept()
+
+    try:
+        await websocket.send_text(f"{DIM}[Connecting to {host}...]{RESET}{CRLF}")
+        await _run_shell_session(websocket, host)
+        await websocket.send_text(f"{CRLF}{DIM}[Disconnected]{RESET}{CRLF}")
+    except WebSocketDisconnect:
+        pass
+    except Exception as e:
+        with contextlib.suppress(Exception):
+            await websocket.send_text(f"{RED}Error: {e}{RESET}{CRLF}")
+    finally:
+        with contextlib.suppress(Exception):
+            await websocket.close()
+
+
+@router.websocket("/ws/terminal/{task_id}")
+async def terminal_websocket(websocket: WebSocket, task_id: str) -> None:
+    """WebSocket endpoint for terminal streaming."""
+    await websocket.accept()
+
+    if task_id not in tasks:
+        await websocket.send_text(
+            f"{DIM}Task not found (expired or container restarted).{RESET}{CRLF}"
+        )
+        await websocket.close(code=4004)
+        return
+
+    task = tasks[task_id]
+    sent_count = 0
+
+    try:
+        while True:
+            # Send any new output
+            while sent_count < len(task["output"]):
+                await websocket.send_text(task["output"][sent_count])
+                sent_count += 1
+
+            if task["status"] in ("completed", "failed"):
+                status = "[Done]" if task["status"] == "completed" else "[Failed]"
+                color = GREEN if task["status"] == "completed" else RED
+                await websocket.send_text(f"{CRLF}{color}{status}{RESET}{CRLF}")
+                await websocket.close()
+                break
+
+            await asyncio.sleep(0.05)
+    except WebSocketDisconnect:
+        pass
+    # Task stays in memory for reconnection; cleanup_stale_tasks() handles expiry
--- a/tests/test_cli_lifecycle.py
+++ b/tests/test_cli_lifecycle.py
@@ -0,0 +1,426 @@
+"""Tests for CLI lifecycle commands (apply, down --orphaned)."""
+
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+import typer
+
+from compose_farm.cli.lifecycle import apply, down
+from compose_farm.config import Config, Host
+from compose_farm.executor import CommandResult
+
+
+def _make_config(tmp_path: Path, services: dict[str, str] | None = None) -> Config:
+    """Create a minimal config for testing."""
+    compose_dir = tmp_path / "compose"
+    compose_dir.mkdir()
+
+    svc_dict = services or {"svc1": "host1", "svc2": "host2"}
+    for svc in svc_dict:
+        svc_dir = compose_dir / svc
+        svc_dir.mkdir()
+        (svc_dir / "docker-compose.yml").write_text("services: {}\n")
+
+    config_path = tmp_path / "compose-farm.yaml"
+    config_path.write_text("")
+
+    return Config(
+        compose_dir=compose_dir,
+        hosts={"host1": Host(address="localhost"), "host2": Host(address="localhost")},
+        services=svc_dict,
+        config_path=config_path,
+    )
+
+
+def _make_result(service: str, success: bool = True) -> CommandResult:
+    """Create a command result."""
+    return CommandResult(
+        service=service,
+        exit_code=0 if success else 1,
+        success=success,
+        stdout="",
+        stderr="",
+    )
+
+
+class TestApplyCommand:
+    """Tests for the apply command."""
+
+    def test_apply_nothing_to_do(self, tmp_path: Path, capsys: pytest.CaptureFixture[str]) -> None:
+        """When no migrations, orphans, or missing services, prints success message."""
+        cfg = _make_config(tmp_path)
+
+        with (
+            patch("compose_farm.cli.lifecycle.load_config_or_exit", return_value=cfg),
+            patch("compose_farm.cli.lifecycle.get_orphaned_services", return_value={}),
+            patch("compose_farm.cli.lifecycle.get_services_needing_migration", return_value=[]),
+            patch("compose_farm.cli.lifecycle.get_services_not_in_state", return_value=[]),
+        ):
+            apply(dry_run=False, no_orphans=False, full=False, config=None)
+
+        captured = capsys.readouterr()
+        assert "Nothing to apply" in captured.out
+
+    def test_apply_dry_run_shows_preview(
+        self, tmp_path: Path, capsys: pytest.CaptureFixture[str]
+    ) -> None:
+        """Dry run shows what would be done without executing."""
+        cfg = _make_config(tmp_path)
+
+        with (
+            patch("compose_farm.cli.lifecycle.load_config_or_exit", return_value=cfg),
+            patch(
+                "compose_farm.cli.lifecycle.get_orphaned_services",
+                return_value={"old-svc": "host1"},
+            ),
+            patch(
+                "compose_farm.cli.lifecycle.get_services_needing_migration",
+                return_value=["svc1"],
+            ),
+            patch("compose_farm.cli.lifecycle.get_services_not_in_state", return_value=[]),
+            patch("compose_farm.cli.lifecycle.get_service_host", return_value="host1"),
+            patch("compose_farm.cli.lifecycle.stop_orphaned_services") as mock_stop,
+            patch("compose_farm.cli.lifecycle.up_services") as mock_up,
+        ):
+            apply(dry_run=True, no_orphans=False, full=False, config=None)
+
+        captured = capsys.readouterr()
+        assert "Services to migrate" in captured.out
+        assert "svc1" in captured.out
+        assert "Orphaned services to stop" in captured.out
+        assert "old-svc" in captured.out
+        assert "dry-run" in captured.out
+
+        # Should not have called the actual operations
+        mock_stop.assert_not_called()
+        mock_up.assert_not_called()
+
+    def test_apply_executes_migrations(self, tmp_path: Path) -> None:
+        """Apply runs migrations when services need migration."""
+        cfg = _make_config(tmp_path)
+        mock_results = [_make_result("svc1")]
+
+        with (
+            patch("compose_farm.cli.lifecycle.load_config_or_exit", return_value=cfg),
+            patch("compose_farm.cli.lifecycle.get_orphaned_services", return_value={}),
+            patch(
+                "compose_farm.cli.lifecycle.get_services_needing_migration",
+                return_value=["svc1"],
+            ),
+            patch("compose_farm.cli.lifecycle.get_services_not_in_state", return_value=[]),
+            patch("compose_farm.cli.lifecycle.get_service_host", return_value="host1"),
+            patch(
+                "compose_farm.cli.lifecycle.run_async",
+                return_value=mock_results,
+            ),
+            patch("compose_farm.cli.lifecycle.up_services") as mock_up,
+            patch("compose_farm.cli.lifecycle.maybe_regenerate_traefik"),
+            patch("compose_farm.cli.lifecycle.report_results"),
+        ):
+            apply(dry_run=False, no_orphans=False, full=False, config=None)
+
+            mock_up.assert_called_once()
+            call_args = mock_up.call_args
+            assert call_args[0][1] == ["svc1"]  # services list
+
+    def test_apply_executes_orphan_cleanup(self, tmp_path: Path) -> None:
+        """Apply stops orphaned services."""
+        cfg = _make_config(tmp_path)
+        mock_results = [_make_result("old-svc@host1")]
+
+        with (
+            patch("compose_farm.cli.lifecycle.load_config_or_exit", return_value=cfg),
+            patch(
+                "compose_farm.cli.lifecycle.get_orphaned_services",
+                return_value={"old-svc": "host1"},
+            ),
+            patch("compose_farm.cli.lifecycle.get_services_needing_migration", return_value=[]),
+            patch("compose_farm.cli.lifecycle.get_services_not_in_state", return_value=[]),
+            patch(
+                "compose_farm.cli.lifecycle.run_async",
+                return_value=mock_results,
+            ),
+            patch("compose_farm.cli.lifecycle.stop_orphaned_services") as mock_stop,
+            patch("compose_farm.cli.lifecycle.report_results"),
+        ):
+            apply(dry_run=False, no_orphans=False, full=False, config=None)
+
+            mock_stop.assert_called_once_with(cfg)
+
+    def test_apply_no_orphans_skips_orphan_cleanup(
+        self, tmp_path: Path, capsys: pytest.CaptureFixture[str]
+    ) -> None:
+        """--no-orphans flag skips orphan cleanup."""
+        cfg = _make_config(tmp_path)
+        mock_results = [_make_result("svc1")]
+
+        with (
+            patch("compose_farm.cli.lifecycle.load_config_or_exit", return_value=cfg),
+            patch(
+                "compose_farm.cli.lifecycle.get_orphaned_services",
+                return_value={"old-svc": "host1"},
+            ),
+            patch(
+                "compose_farm.cli.lifecycle.get_services_needing_migration",
+                return_value=["svc1"],
+            ),
+            patch("compose_farm.cli.lifecycle.get_services_not_in_state", return_value=[]),
+            patch("compose_farm.cli.lifecycle.get_service_host", return_value="host1"),
+            patch(
+                "compose_farm.cli.lifecycle.run_async",
+                return_value=mock_results,
+            ),
+            patch("compose_farm.cli.lifecycle.up_services") as mock_up,
+            patch("compose_farm.cli.lifecycle.stop_orphaned_services") as mock_stop,
+            patch("compose_farm.cli.lifecycle.maybe_regenerate_traefik"),
+            patch("compose_farm.cli.lifecycle.report_results"),
+        ):
+            apply(dry_run=False, no_orphans=True, full=False, config=None)
+
+            # Should run migrations but not orphan cleanup
+            mock_up.assert_called_once()
+            mock_stop.assert_not_called()
+
+        # Orphans should not appear in output
+        captured = capsys.readouterr()
+        assert "old-svc" not in captured.out
+
+    def test_apply_no_orphans_nothing_to_do(
+        self, tmp_path: Path, capsys: pytest.CaptureFixture[str]
+    ) -> None:
+        """--no-orphans with only orphans means nothing to do."""
+        cfg = _make_config(tmp_path)
+
+        with (
+            patch("compose_farm.cli.lifecycle.load_config_or_exit", return_value=cfg),
+            patch(
+                "compose_farm.cli.lifecycle.get_orphaned_services",
+                return_value={"old-svc": "host1"},
+            ),
+            patch("compose_farm.cli.lifecycle.get_services_needing_migration", return_value=[]),
+            patch("compose_farm.cli.lifecycle.get_services_not_in_state", return_value=[]),
+        ):
+            apply(dry_run=False, no_orphans=True, full=False, config=None)
+
+        captured = capsys.readouterr()
+        assert "Nothing to apply" in captured.out
+
+    def test_apply_starts_missing_services(self, tmp_path: Path) -> None:
+        """Apply starts services that are in config but not in state."""
+        cfg = _make_config(tmp_path)
+        mock_results = [_make_result("svc1")]
+
+        with (
+            patch("compose_farm.cli.lifecycle.load_config_or_exit", return_value=cfg),
+            patch("compose_farm.cli.lifecycle.get_orphaned_services", return_value={}),
+            patch("compose_farm.cli.lifecycle.get_services_needing_migration", return_value=[]),
+            patch(
+                "compose_farm.cli.lifecycle.get_services_not_in_state",
+                return_value=["svc1"],
+            ),
+            patch(
+                "compose_farm.cli.lifecycle.run_async",
+                return_value=mock_results,
+            ),
+            patch("compose_farm.cli.lifecycle.up_services") as mock_up,
+            patch("compose_farm.cli.lifecycle.maybe_regenerate_traefik"),
+            patch("compose_farm.cli.lifecycle.report_results"),
+        ):
+            apply(dry_run=False, no_orphans=False, full=False, config=None)
+
+            mock_up.assert_called_once()
+            call_args = mock_up.call_args
+            assert call_args[0][1] == ["svc1"]
+
+    def test_apply_dry_run_shows_missing_services(
+        self, tmp_path: Path, capsys: pytest.CaptureFixture[str]
+    ) -> None:
+        """Dry run shows services that would be started."""
+        cfg = _make_config(tmp_path)
+
+        with (
+            patch("compose_farm.cli.lifecycle.load_config_or_exit", return_value=cfg),
+            patch("compose_farm.cli.lifecycle.get_orphaned_services", return_value={}),
+            patch("compose_farm.cli.lifecycle.get_services_needing_migration", return_value=[]),
+            patch(
+                "compose_farm.cli.lifecycle.get_services_not_in_state",
+                return_value=["svc1"],
+            ),
+        ):
+            apply(dry_run=True, no_orphans=False, full=False, config=None)
+
+        captured = capsys.readouterr()
+        assert "Services to start" in captured.out
+        assert "svc1" in captured.out
+        assert "dry-run" in captured.out
+
+    def test_apply_full_refreshes_all_services(self, tmp_path: Path) -> None:
+        """--full runs up on all services to pick up config changes."""
+        cfg = _make_config(tmp_path)
+        mock_results = [_make_result("svc1"), _make_result("svc2")]
+
+        with (
+            patch("compose_farm.cli.lifecycle.load_config_or_exit", return_value=cfg),
+            patch("compose_farm.cli.lifecycle.get_orphaned_services", return_value={}),
+            patch("compose_farm.cli.lifecycle.get_services_needing_migration", return_value=[]),
+            patch("compose_farm.cli.lifecycle.get_services_not_in_state", return_value=[]),
+            patch(
+                "compose_farm.cli.lifecycle.run_async",
+                return_value=mock_results,
+            ),
+            patch("compose_farm.cli.lifecycle.up_services") as mock_up,
+            patch("compose_farm.cli.lifecycle.maybe_regenerate_traefik"),
+            patch("compose_farm.cli.lifecycle.report_results"),
+        ):
+            apply(dry_run=False, no_orphans=False, full=True, config=None)
+
+            mock_up.assert_called_once()
+            call_args = mock_up.call_args
+            # Should refresh all services in config
+            assert set(call_args[0][1]) == {"svc1", "svc2"}
+
+    def test_apply_full_dry_run_shows_refresh(
+        self, tmp_path: Path, capsys: pytest.CaptureFixture[str]
+    ) -> None:
+        """--full --dry-run shows services that would be refreshed."""
+        cfg = _make_config(tmp_path)
+
+        with (
+            patch("compose_farm.cli.lifecycle.load_config_or_exit", return_value=cfg),
+            patch("compose_farm.cli.lifecycle.get_orphaned_services", return_value={}),
+            patch("compose_farm.cli.lifecycle.get_services_needing_migration", return_value=[]),
+            patch("compose_farm.cli.lifecycle.get_services_not_in_state", return_value=[]),
+        ):
+            apply(dry_run=True, no_orphans=False, full=True, config=None)
+
+        captured = capsys.readouterr()
+        assert "Services to refresh" in captured.out
+        assert "svc1" in captured.out
+        assert "svc2" in captured.out
+        assert "dry-run" in captured.out
+
+    def test_apply_full_excludes_already_handled_services(self, tmp_path: Path) -> None:
+        """--full doesn't double-process services that are migrating or starting."""
+        cfg = _make_config(tmp_path, {"svc1": "host1", "svc2": "host2", "svc3": "host1"})
+        mock_results = [_make_result("svc1"), _make_result("svc3")]
+
+        with (
+            patch("compose_farm.cli.lifecycle.load_config_or_exit", return_value=cfg),
+            patch("compose_farm.cli.lifecycle.get_orphaned_services", return_value={}),
+            patch(
+                "compose_farm.cli.lifecycle.get_services_needing_migration",
+                return_value=["svc1"],
+            ),
+            patch(
+                "compose_farm.cli.lifecycle.get_services_not_in_state",
+                return_value=["svc2"],
+            ),
+            patch("compose_farm.cli.lifecycle.get_service_host", return_value="host2"),
+            patch(
+                "compose_farm.cli.lifecycle.run_async",
+                return_value=mock_results,
+            ),
+            patch("compose_farm.cli.lifecycle.up_services") as mock_up,
+            patch("compose_farm.cli.lifecycle.maybe_regenerate_traefik"),
+            patch("compose_farm.cli.lifecycle.report_results"),
+        ):
+            apply(dry_run=False, no_orphans=False, full=True, config=None)
+
+            # up_services should be called 3 times: migrate, start, refresh
+            assert mock_up.call_count == 3
+            # Get the third call (refresh) and check it only has svc3
+            refresh_call = mock_up.call_args_list[2]
+            assert refresh_call[0][1] == ["svc3"]
+
+
+class TestDownOrphaned:
+    """Tests for down --orphaned flag."""
+
+    def test_down_orphaned_no_orphans(
+        self, tmp_path: Path, capsys: pytest.CaptureFixture[str]
+    ) -> None:
+        """When no orphans exist, prints success message."""
+        cfg = _make_config(tmp_path)
+
+        with (
+            patch("compose_farm.cli.lifecycle.load_config_or_exit", return_value=cfg),
+            patch("compose_farm.cli.lifecycle.get_orphaned_services", return_value={}),
+        ):
+            down(
+                services=None,
+                all_services=False,
+                orphaned=True,
+                host=None,
+                config=None,
+            )
+
+        captured = capsys.readouterr()
+        assert "No orphaned services to stop" in captured.out
+
+    def test_down_orphaned_stops_services(self, tmp_path: Path) -> None:
+        """--orphaned stops orphaned services."""
+        cfg = _make_config(tmp_path)
+        mock_results = [_make_result("old-svc@host1")]
+
+        with (
+            patch("compose_farm.cli.lifecycle.load_config_or_exit", return_value=cfg),
+            patch(
+                "compose_farm.cli.lifecycle.get_orphaned_services",
+                return_value={"old-svc": "host1"},
+            ),
+            patch(
+                "compose_farm.cli.lifecycle.run_async",
+                return_value=mock_results,
+            ),
+            patch("compose_farm.cli.lifecycle.stop_orphaned_services") as mock_stop,
+            patch("compose_farm.cli.lifecycle.report_results"),
+        ):
+            down(
+                services=None,
+                all_services=False,
+                orphaned=True,
+                host=None,
+                config=None,
+            )
+
+            mock_stop.assert_called_once_with(cfg)
+
+    def test_down_orphaned_with_services_errors(self) -> None:
+        """--orphaned cannot be combined with service arguments."""
+        with pytest.raises(typer.Exit) as exc_info:
+            down(
+                services=["svc1"],
+                all_services=False,
+                orphaned=True,
+                host=None,
+                config=None,
+            )
+
+        assert exc_info.value.exit_code == 1
+
+    def test_down_orphaned_with_all_errors(self) -> None:
+        """--orphaned cannot be combined with --all."""
+        with pytest.raises(typer.Exit) as exc_info:
+            down(
+                services=None,
+                all_services=True,
+                orphaned=True,
+                host=None,
+                config=None,
+            )
+
+        assert exc_info.value.exit_code == 1
+
+    def test_down_orphaned_with_host_errors(self) -> None:
+        """--orphaned cannot be combined with --host."""
+        with pytest.raises(typer.Exit) as exc_info:
+            down(
+                services=None,
+                all_services=False,
+                orphaned=True,
+                host="host1",
+                config=None,
+            )
+
+        assert exc_info.value.exit_code == 1
--- a/tests/test_cli_logs.py
+++ b/tests/test_cli_logs.py
@@ -0,0 +1,207 @@
+"""Tests for CLI logs command."""
+
+from collections.abc import Coroutine
+from pathlib import Path
+from typing import Any
+from unittest.mock import patch
+
+import pytest
+import typer
+
+from compose_farm.cli.monitoring import logs
+from compose_farm.config import Config, Host
+from compose_farm.executor import CommandResult
+
+
+def _make_config(tmp_path: Path) -> Config:
+    """Create a minimal config for testing."""
+    compose_dir = tmp_path / "compose"
+    compose_dir.mkdir()
+    for svc in ("svc1", "svc2", "svc3"):
+        svc_dir = compose_dir / svc
+        svc_dir.mkdir()
+        (svc_dir / "docker-compose.yml").write_text("services: {}\n")
+
+    return Config(
+        compose_dir=compose_dir,
+        hosts={"local": Host(address="localhost"), "remote": Host(address="192.168.1.10")},
+        services={"svc1": "local", "svc2": "local", "svc3": "remote"},
+    )
+
+
+def _make_result(service: str) -> CommandResult:
+    """Create a successful command result."""
+    return CommandResult(service=service, exit_code=0, success=True, stdout="", stderr="")
+
+
+def _mock_run_async_factory(
+    services: list[str],
+) -> tuple[Any, list[CommandResult]]:
+    """Create a mock run_async that returns results for given services."""
+    results = [_make_result(s) for s in services]
+
+    def mock_run_async(_coro: Coroutine[Any, Any, Any]) -> list[CommandResult]:
+        return results
+
+    return mock_run_async, results
+
+
+class TestLogsContextualDefault:
+    """Tests for logs --tail contextual default behavior."""
+
+    def test_logs_all_services_defaults_to_20(self, tmp_path: Path) -> None:
+        """When --all is specified, default tail should be 20."""
+        cfg = _make_config(tmp_path)
+        mock_run_async, _ = _mock_run_async_factory(["svc1", "svc2", "svc3"])
+
+        with (
+            patch("compose_farm.cli.monitoring.load_config_or_exit", return_value=cfg),
+            patch("compose_farm.cli.common.load_config_or_exit", return_value=cfg),
+            patch("compose_farm.cli.monitoring.run_async", side_effect=mock_run_async),
+            patch("compose_farm.cli.monitoring.run_on_services") as mock_run,
+        ):
+            mock_run.return_value = None
+
+            logs(services=None, all_services=True, host=None, follow=False, tail=None, config=None)
+
+            mock_run.assert_called_once()
+            call_args = mock_run.call_args
+            assert call_args[0][2] == "logs --tail 20"
+
+    def test_logs_single_service_defaults_to_100(self, tmp_path: Path) -> None:
+        """When specific services are specified, default tail should be 100."""
+        cfg = _make_config(tmp_path)
+        mock_run_async, _ = _mock_run_async_factory(["svc1"])
+
+        with (
+            patch("compose_farm.cli.monitoring.load_config_or_exit", return_value=cfg),
+            patch("compose_farm.cli.common.load_config_or_exit", return_value=cfg),
+            patch("compose_farm.cli.monitoring.run_async", side_effect=mock_run_async),
+            patch("compose_farm.cli.monitoring.run_on_services") as mock_run,
+        ):
+            logs(
+                services=["svc1"],
+                all_services=False,
+                host=None,
+                follow=False,
+                tail=None,
+                config=None,
+            )
+
+            mock_run.assert_called_once()
+            call_args = mock_run.call_args
+            assert call_args[0][2] == "logs --tail 100"
+
+    def test_logs_explicit_tail_overrides_default(self, tmp_path: Path) -> None:
+        """When --tail is explicitly provided, it should override the default."""
+        cfg = _make_config(tmp_path)
+        mock_run_async, _ = _mock_run_async_factory(["svc1", "svc2", "svc3"])
+
+        with (
+            patch("compose_farm.cli.monitoring.load_config_or_exit", return_value=cfg),
+            patch("compose_farm.cli.common.load_config_or_exit", return_value=cfg),
+            patch("compose_farm.cli.monitoring.run_async", side_effect=mock_run_async),
+            patch("compose_farm.cli.monitoring.run_on_services") as mock_run,
+        ):
+            logs(
+                services=None,
+                all_services=True,
+                host=None,
+                follow=False,
+                tail=50,
+                config=None,
+            )
+
+            mock_run.assert_called_once()
+            call_args = mock_run.call_args
+            assert call_args[0][2] == "logs --tail 50"
+
+    def test_logs_follow_appends_flag(self, tmp_path: Path) -> None:
+        """When --follow is specified, -f should be appended to command."""
+        cfg = _make_config(tmp_path)
+        mock_run_async, _ = _mock_run_async_factory(["svc1"])
+
+        with (
+            patch("compose_farm.cli.monitoring.load_config_or_exit", return_value=cfg),
+            patch("compose_farm.cli.common.load_config_or_exit", return_value=cfg),
+            patch("compose_farm.cli.monitoring.run_async", side_effect=mock_run_async),
+            patch("compose_farm.cli.monitoring.run_on_services") as mock_run,
+        ):
+            logs(
+                services=["svc1"],
+                all_services=False,
+                host=None,
+                follow=True,
+                tail=None,
+                config=None,
+            )
+
+            mock_run.assert_called_once()
+            call_args = mock_run.call_args
+            assert call_args[0][2] == "logs --tail 100 -f"
+
+
+class TestLogsHostFilter:
+    """Tests for logs --host filter behavior."""
+
+    def test_logs_host_filter_selects_services_on_host(self, tmp_path: Path) -> None:
+        """When --host is specified, only services on that host are included."""
+        cfg = _make_config(tmp_path)
+        mock_run_async, _ = _mock_run_async_factory(["svc1", "svc2"])
+
+        with (
+            patch("compose_farm.cli.common.load_config_or_exit", return_value=cfg),
+            patch("compose_farm.cli.monitoring.run_async", side_effect=mock_run_async),
+            patch("compose_farm.cli.monitoring.run_on_services") as mock_run,
+        ):
+            logs(
+                services=None,
+                all_services=False,
+                host="local",
+                follow=False,
+                tail=None,
+                config=None,
+            )
+
+            mock_run.assert_called_once()
+            call_args = mock_run.call_args
+            # svc1 and svc2 are on "local", svc3 is on "remote"
+            assert set(call_args[0][1]) == {"svc1", "svc2"}
+
+    def test_logs_host_filter_defaults_to_20_lines(self, tmp_path: Path) -> None:
+        """When --host is specified, default tail should be 20 (multiple services)."""
+        cfg = _make_config(tmp_path)
+        mock_run_async, _ = _mock_run_async_factory(["svc1", "svc2"])
+
+        with (
+            patch("compose_farm.cli.common.load_config_or_exit", return_value=cfg),
+            patch("compose_farm.cli.monitoring.run_async", side_effect=mock_run_async),
+            patch("compose_farm.cli.monitoring.run_on_services") as mock_run,
+        ):
+            logs(
+                services=None,
+                all_services=False,
+                host="local",
+                follow=False,
+                tail=None,
+                config=None,
+            )
+
+            mock_run.assert_called_once()
+            call_args = mock_run.call_args
+            assert call_args[0][2] == "logs --tail 20"
+
+    def test_logs_all_and_host_mutually_exclusive(self) -> None:
+        """Using --all and --host together should error."""
+        # No config mock needed - error is raised before config is loaded
+        with pytest.raises(typer.Exit) as exc_info:
+            logs(
+                services=None,
+                all_services=True,
+                host="local",
+                follow=False,
+                tail=None,
+                config=None,
+            )
+
+        assert exc_info.value.exit_code == 1
--- a/tests/test_cli_ssh.py
+++ b/tests/test_cli_ssh.py
@@ -0,0 +1,114 @@
+"""Tests for CLI ssh commands."""
+
+from pathlib import Path
+from unittest.mock import patch
+
+from typer.testing import CliRunner
+
+from compose_farm.cli.app import app
+
+runner = CliRunner()
+
+
+class TestSshKeygen:
+    """Tests for cf ssh keygen command."""
+
+    def test_keygen_generates_key(self, tmp_path: Path) -> None:
+        """Generate SSH key when none exists."""
+        key_path = tmp_path / "compose-farm"
+        pubkey_path = tmp_path / "compose-farm.pub"
+
+        with (
+            patch("compose_farm.cli.ssh.SSH_KEY_PATH", key_path),
+            patch("compose_farm.cli.ssh.SSH_PUBKEY_PATH", pubkey_path),
+            patch("compose_farm.cli.ssh.key_exists", return_value=False),
+        ):
+            result = runner.invoke(app, ["ssh", "keygen"])
+
+            # Command runs (may fail if ssh-keygen not available in test env)
+            assert result.exit_code in (0, 1)
+
+    def test_keygen_skips_if_exists(self, tmp_path: Path) -> None:
+        """Skip key generation if key already exists."""
+        key_path = tmp_path / "compose-farm"
+        pubkey_path = tmp_path / "compose-farm.pub"
+
+        with (
+            patch("compose_farm.cli.ssh.SSH_KEY_PATH", key_path),
+            patch("compose_farm.cli.ssh.SSH_PUBKEY_PATH", pubkey_path),
+            patch("compose_farm.cli.ssh.key_exists", return_value=True),
+        ):
+            result = runner.invoke(app, ["ssh", "keygen"])
+
+            assert "already exists" in result.output
+
+
+class TestSshStatus:
+    """Tests for cf ssh status command."""
+
+    def test_status_shows_no_key(self, tmp_path: Path) -> None:
+        """Show message when no key exists."""
+        config_file = tmp_path / "compose-farm.yaml"
+        config_file.write_text("""
+hosts:
+  local:
+    address: localhost
+services:
+  test: local
+""")
+
+        with patch("compose_farm.cli.ssh.key_exists", return_value=False):
+            result = runner.invoke(app, ["ssh", "status", f"--config={config_file}"])
+
+            assert "No key found" in result.output
+
+    def test_status_shows_key_exists(self, tmp_path: Path) -> None:
+        """Show key info when key exists."""
+        config_file = tmp_path / "compose-farm.yaml"
+        config_file.write_text("""
+hosts:
+  local:
+    address: localhost
+services:
+  test: local
+""")
+
+        with (
+            patch("compose_farm.cli.ssh.key_exists", return_value=True),
+            patch("compose_farm.cli.ssh.get_pubkey_content", return_value="ssh-ed25519 AAAA..."),
+        ):
+            result = runner.invoke(app, ["ssh", "status", f"--config={config_file}"])
+
+            assert "Key exists" in result.output
+
+
+class TestSshSetup:
+    """Tests for cf ssh setup command."""
+
+    def test_setup_no_remote_hosts(self, tmp_path: Path) -> None:
+        """Show message when no remote hosts configured."""
+        config_file = tmp_path / "compose-farm.yaml"
+        config_file.write_text("""
+hosts:
+  local:
+    address: localhost
+services:
+  test: local
+""")
+
+        result = runner.invoke(app, ["ssh", "setup", f"--config={config_file}"])
+
+        assert "No remote hosts" in result.output
+
+
+class TestSshHelp:
+    """Tests for cf ssh help."""
+
+    def test_ssh_help(self) -> None:
+        """Show help for ssh command."""
+        result = runner.invoke(app, ["ssh", "--help"])
+
+        assert result.exit_code == 0
+        assert "setup" in result.output
+        assert "status" in result.output
+        assert "keygen" in result.output
--- a/tests/test_cli_startup.py
+++ b/tests/test_cli_startup.py
@@ -0,0 +1,58 @@
+"""Test CLI startup performance."""
+
+from __future__ import annotations
+
+import shutil
+import subprocess
+import sys
+import time
+
+# Thresholds in seconds, per OS
+if sys.platform == "win32":
+    CLI_STARTUP_THRESHOLD = 2.0
+elif sys.platform == "darwin":
+    CLI_STARTUP_THRESHOLD = 0.35
+else:  # Linux
+    CLI_STARTUP_THRESHOLD = 0.25
+
+
+def test_cli_startup_time() -> None:
+    """Verify CLI startup time stays within acceptable bounds.
+
+    This test ensures we don't accidentally introduce slow imports
+    that degrade the user experience.
+    """
+    cf_path = shutil.which("cf")
+    assert cf_path is not None, "cf command not found in PATH"
+
+    # Run up to 6 times, return early if we hit the threshold
+    times: list[float] = []
+    for _ in range(6):
+        start = time.perf_counter()
+        result = subprocess.run(
+            [cf_path, "--help"],
+            check=False,
+            capture_output=True,
+            text=True,
+        )
+        elapsed = time.perf_counter() - start
+        times.append(elapsed)
+
+        # Verify the command succeeded
+        assert result.returncode == 0, f"CLI failed: {result.stderr}"
+
+        # Pass early if under threshold
+        if elapsed < CLI_STARTUP_THRESHOLD:
+            print(f"\nCLI startup: {elapsed:.3f}s (threshold: {CLI_STARTUP_THRESHOLD}s)")
+            return
+
+    # All attempts exceeded threshold
+    best_time = min(times)
+    msg = (
+        f"\nCLI startup times: {[f'{t:.3f}s' for t in times]}\n"
+        f"Best: {best_time:.3f}s, Threshold: {CLI_STARTUP_THRESHOLD}s"
+    )
+    print(msg)
+
+    err_msg = f"CLI startup too slow!\n{msg}\nCheck for slow imports."
+    raise AssertionError(err_msg)
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -128,6 +128,8 @@ class TestLoadConfig:

    def test_load_config_not_found(self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
        monkeypatch.chdir(tmp_path)
+        monkeypatch.delenv("CF_CONFIG", raising=False)
+        monkeypatch.setenv("XDG_CONFIG_HOME", str(tmp_path / "empty_config"))
        with pytest.raises(FileNotFoundError, match="Config file not found"):
            load_config()

--- a/tests/test_config_cmd.py
+++ b/tests/test_config_cmd.py
@@ -0,0 +1,230 @@
+"""Tests for config command module."""
+
+from pathlib import Path
+from typing import Any
+
+import pytest
+import yaml
+from typer.testing import CliRunner
+
+from compose_farm.cli import app
+from compose_farm.cli.config import (
+    _generate_template,
+    _get_config_file,
+    _get_editor,
+)
+
+
+@pytest.fixture
+def runner() -> CliRunner:
+    return CliRunner()
+
+
+@pytest.fixture
+def valid_config_data() -> dict[str, Any]:
+    return {
+        "compose_dir": "/opt/compose",
+        "hosts": {"server1": "192.168.1.10"},
+        "services": {"nginx": "server1"},
+    }
+
+
+class TestGetEditor:
+    """Tests for _get_editor function."""
+
+    def test_uses_editor_env(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        monkeypatch.setenv("EDITOR", "code")
+        monkeypatch.delenv("VISUAL", raising=False)
+        assert _get_editor() == "code"
+
+    def test_uses_visual_env(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        monkeypatch.delenv("EDITOR", raising=False)
+        monkeypatch.setenv("VISUAL", "subl")
+        assert _get_editor() == "subl"
+
+    def test_editor_takes_precedence(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        monkeypatch.setenv("EDITOR", "vim")
+        monkeypatch.setenv("VISUAL", "code")
+        assert _get_editor() == "vim"
+
+
+class TestGetConfigFile:
+    """Tests for _get_config_file function."""
+
+    def test_explicit_path(self, tmp_path: Path) -> None:
+        config_file = tmp_path / "my-config.yaml"
+        config_file.touch()
+        result = _get_config_file(config_file)
+        assert result == config_file.resolve()
+
+    def test_cf_config_env(self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
+        config_file = tmp_path / "env-config.yaml"
+        config_file.touch()
+        monkeypatch.setenv("CF_CONFIG", str(config_file))
+        result = _get_config_file(None)
+        assert result == config_file.resolve()
+
+    def test_returns_none_when_not_found(
+        self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+    ) -> None:
+        monkeypatch.chdir(tmp_path)
+        monkeypatch.delenv("CF_CONFIG", raising=False)
+        # Set XDG_CONFIG_HOME to a nonexistent path - config_search_paths() will
+        # now return paths that don't exist
+        monkeypatch.setenv("XDG_CONFIG_HOME", str(tmp_path / "nonexistent"))
+        result = _get_config_file(None)
+        assert result is None
+
+
+class TestGenerateTemplate:
+    """Tests for _generate_template function."""
+
+    def test_generates_valid_yaml(self) -> None:
+        template = _generate_template()
+        # Should be valid YAML
+        data = yaml.safe_load(template)
+        assert "compose_dir" in data
+        assert "hosts" in data
+        assert "services" in data
+
+    def test_has_documentation_comments(self) -> None:
+        template = _generate_template()
+        assert "# Compose Farm configuration" in template
+        assert "hosts:" in template
+        assert "services:" in template
+
+
+class TestConfigInit:
+    """Tests for cf config init command."""
+
+    def test_init_creates_file(
+        self, runner: CliRunner, tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+    ) -> None:
+        monkeypatch.delenv("CF_CONFIG", raising=False)
+        config_file = tmp_path / "new-config.yaml"
+        result = runner.invoke(app, ["config", "init", "-p", str(config_file)])
+        assert result.exit_code == 0
+        assert config_file.exists()
+        assert "Config file created" in result.stdout
+
+    def test_init_force_overwrites(
+        self, runner: CliRunner, tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+    ) -> None:
+        monkeypatch.delenv("CF_CONFIG", raising=False)
+        config_file = tmp_path / "existing.yaml"
+        config_file.write_text("old content")
+        result = runner.invoke(app, ["config", "init", "-p", str(config_file), "-f"])
+        assert result.exit_code == 0
+        content = config_file.read_text()
+        assert "old content" not in content
+        assert "compose_dir" in content
+
+    def test_init_prompts_on_existing(
+        self, runner: CliRunner, tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+    ) -> None:
+        monkeypatch.delenv("CF_CONFIG", raising=False)
+        config_file = tmp_path / "existing.yaml"
+        config_file.write_text("old content")
+        result = runner.invoke(app, ["config", "init", "-p", str(config_file)], input="n\n")
+        assert result.exit_code == 0
+        assert "Aborted" in result.stdout
+        assert config_file.read_text() == "old content"
+
+
+class TestConfigPath:
+    """Tests for cf config path command."""
+
+    def test_path_shows_config(
+        self,
+        runner: CliRunner,
+        tmp_path: Path,
+        valid_config_data: dict[str, Any],
+        monkeypatch: pytest.MonkeyPatch,
+    ) -> None:
+        monkeypatch.chdir(tmp_path)
+        monkeypatch.delenv("CF_CONFIG", raising=False)
+        config_file = tmp_path / "compose-farm.yaml"
+        config_file.write_text(yaml.dump(valid_config_data))
+        result = runner.invoke(app, ["config", "path"])
+        assert result.exit_code == 0
+        assert str(config_file) in result.stdout
+
+    def test_path_with_explicit_path(self, runner: CliRunner, tmp_path: Path) -> None:
+        # When explicitly provided, path is returned even if file doesn't exist
+        nonexistent = tmp_path / "nonexistent.yaml"
+        result = runner.invoke(app, ["config", "path", "-p", str(nonexistent)])
+        assert result.exit_code == 0
+        assert str(nonexistent) in result.stdout
+
+
+class TestConfigShow:
+    """Tests for cf config show command."""
+
+    def test_show_displays_content(
+        self,
+        runner: CliRunner,
+        tmp_path: Path,
+        valid_config_data: dict[str, Any],
+        monkeypatch: pytest.MonkeyPatch,
+    ) -> None:
+        monkeypatch.chdir(tmp_path)
+        monkeypatch.delenv("CF_CONFIG", raising=False)
+        config_file = tmp_path / "compose-farm.yaml"
+        config_file.write_text(yaml.dump(valid_config_data))
+        result = runner.invoke(app, ["config", "show"])
+        assert result.exit_code == 0
+        assert "Config file:" in result.stdout
+
+    def test_show_raw_output(
+        self,
+        runner: CliRunner,
+        tmp_path: Path,
+        valid_config_data: dict[str, Any],
+        monkeypatch: pytest.MonkeyPatch,
+    ) -> None:
+        monkeypatch.chdir(tmp_path)
+        monkeypatch.delenv("CF_CONFIG", raising=False)
+        config_file = tmp_path / "compose-farm.yaml"
+        content = yaml.dump(valid_config_data)
+        config_file.write_text(content)
+        result = runner.invoke(app, ["config", "show", "-r"])
+        assert result.exit_code == 0
+        assert content in result.stdout
+
+
+class TestConfigValidate:
+    """Tests for cf config validate command."""
+
+    def test_validate_valid_config(
+        self,
+        runner: CliRunner,
+        tmp_path: Path,
+        valid_config_data: dict[str, Any],
+        monkeypatch: pytest.MonkeyPatch,
+    ) -> None:
+        monkeypatch.chdir(tmp_path)
+        monkeypatch.delenv("CF_CONFIG", raising=False)
+        config_file = tmp_path / "compose-farm.yaml"
+        config_file.write_text(yaml.dump(valid_config_data))
+        result = runner.invoke(app, ["config", "validate"])
+        assert result.exit_code == 0
+        assert "Valid config" in result.stdout
+        assert "Hosts: 1" in result.stdout
+        assert "Services: 1" in result.stdout
+
+    def test_validate_invalid_config(self, runner: CliRunner, tmp_path: Path) -> None:
+        config_file = tmp_path / "invalid.yaml"
+        config_file.write_text("invalid: [yaml: content")
+        result = runner.invoke(app, ["config", "validate", "-p", str(config_file)])
+        assert result.exit_code == 1
+        # Error goes to stderr (captured in output when using CliRunner)
+        output = result.stdout + (result.stderr or "")
+        assert "Invalid config" in output or "✗" in output
+
+    def test_validate_missing_config(self, runner: CliRunner, tmp_path: Path) -> None:
+        nonexistent = tmp_path / "nonexistent.yaml"
+        result = runner.invoke(app, ["config", "validate", "-p", str(nonexistent)])
+        assert result.exit_code == 1
+        # Error goes to stderr
+        output = result.stdout + (result.stderr or "")
+        assert "Config file not found" in output or "not found" in output.lower()
--- a/tests/test_executor.py
+++ b/tests/test_executor.py
@@ -0,0 +1,241 @@
+"""Tests for executor module."""
+
+import sys
+from pathlib import Path
+
+import pytest
+
+from compose_farm.config import Config, Host
+from compose_farm.executor import (
+    CommandResult,
+    _run_local_command,
+    check_networks_exist,
+    check_paths_exist,
+    is_local,
+    run_command,
+    run_compose,
+    run_on_services,
+)
+
+# These tests run actual shell commands that only work on Linux
+linux_only = pytest.mark.skipif(sys.platform != "linux", reason="Linux-only shell commands")
+
+
+class TestIsLocal:
+    """Tests for is_local function."""
+
+    @pytest.mark.parametrize(
+        "address",
+        ["local", "localhost", "127.0.0.1", "::1", "LOCAL", "LOCALHOST"],
+    )
+    def test_local_addresses(self, address: str) -> None:
+        host = Host(address=address)
+        assert is_local(host) is True
+
+    @pytest.mark.parametrize(
+        "address",
+        ["192.168.1.10", "nas01.local", "10.0.0.1", "example.com"],
+    )
+    def test_remote_addresses(self, address: str) -> None:
+        host = Host(address=address)
+        assert is_local(host) is False
+
+
+class TestRunLocalCommand:
+    """Tests for local command execution."""
+
+    async def test_run_local_command_success(self) -> None:
+        result = await _run_local_command("echo hello", "test-service")
+        assert result.success is True
+        assert result.exit_code == 0
+        assert result.service == "test-service"
+
+    async def test_run_local_command_failure(self) -> None:
+        result = await _run_local_command("exit 1", "test-service")
+        assert result.success is False
+        assert result.exit_code == 1
+
+    async def test_run_local_command_not_found(self) -> None:
+        result = await _run_local_command("nonexistent_command_xyz", "test-service")
+        assert result.success is False
+        assert result.exit_code != 0
+
+    async def test_run_local_command_captures_output(self) -> None:
+        result = await _run_local_command("echo hello", "test-service", stream=False)
+        assert "hello" in result.stdout
+
+
+class TestRunCommand:
+    """Tests for run_command dispatcher."""
+
+    async def test_run_command_local(self) -> None:
+        host = Host(address="localhost")
+        result = await run_command(host, "echo test", "test-service")
+        assert result.success is True
+
+    async def test_run_command_result_structure(self) -> None:
+        host = Host(address="local")
+        result = await run_command(host, "true", "my-service")
+        assert isinstance(result, CommandResult)
+        assert result.service == "my-service"
+        assert result.exit_code == 0
+        assert result.success is True
+
+
+class TestRunCompose:
+    """Tests for compose command execution."""
+
+    async def test_run_compose_builds_correct_command(self, tmp_path: Path) -> None:
+        # Create a minimal compose file
+        compose_dir = tmp_path / "compose"
+        service_dir = compose_dir / "test-service"
+        service_dir.mkdir(parents=True)
+        compose_file = service_dir / "docker-compose.yml"
+        compose_file.write_text("services: {}")
+
+        config = Config(
+            compose_dir=compose_dir,
+            hosts={"local": Host(address="localhost")},
+            services={"test-service": "local"},
+        )
+
+        # This will fail because docker compose isn't running,
+        # but we can verify the command structure works
+        result = await run_compose(config, "test-service", "config", stream=False)
+        # Command may fail due to no docker, but structure is correct
+        assert result.service == "test-service"
+
+
+class TestRunOnServices:
+    """Tests for parallel service execution."""
+
+    async def test_run_on_services_parallel(self) -> None:
+        config = Config(
+            compose_dir=Path("/tmp"),
+            hosts={"local": Host(address="localhost")},
+            services={"svc1": "local", "svc2": "local"},
+        )
+
+        # Use a simple command that will work without docker
+        # We'll test the parallelism structure
+        results = await run_on_services(config, ["svc1", "svc2"], "version", stream=False)
+        assert len(results) == 2
+        assert results[0].service == "svc1"
+        assert results[1].service == "svc2"
+
+
+@linux_only
+class TestCheckPathsExist:
+    """Tests for check_paths_exist function (uses 'test -e' shell command)."""
+
+    async def test_check_existing_paths(self, tmp_path: Path) -> None:
+        """Check paths that exist."""
+        config = Config(
+            compose_dir=tmp_path,
+            hosts={"local": Host(address="localhost")},
+            services={},
+        )
+        # Create test paths
+        (tmp_path / "dir1").mkdir()
+        (tmp_path / "file1").touch()
+
+        result = await check_paths_exist(
+            config, "local", [str(tmp_path / "dir1"), str(tmp_path / "file1")]
+        )
+
+        assert result[str(tmp_path / "dir1")] is True
+        assert result[str(tmp_path / "file1")] is True
+
+    async def test_check_missing_paths(self, tmp_path: Path) -> None:
+        """Check paths that don't exist."""
+        config = Config(
+            compose_dir=tmp_path,
+            hosts={"local": Host(address="localhost")},
+            services={},
+        )
+
+        result = await check_paths_exist(
+            config, "local", [str(tmp_path / "missing1"), str(tmp_path / "missing2")]
+        )
+
+        assert result[str(tmp_path / "missing1")] is False
+        assert result[str(tmp_path / "missing2")] is False
+
+    async def test_check_mixed_paths(self, tmp_path: Path) -> None:
+        """Check mix of existing and missing paths."""
+        config = Config(
+            compose_dir=tmp_path,
+            hosts={"local": Host(address="localhost")},
+            services={},
+        )
+        (tmp_path / "exists").mkdir()
+
+        result = await check_paths_exist(
+            config, "local", [str(tmp_path / "exists"), str(tmp_path / "missing")]
+        )
+
+        assert result[str(tmp_path / "exists")] is True
+        assert result[str(tmp_path / "missing")] is False
+
+    async def test_check_empty_paths(self, tmp_path: Path) -> None:
+        """Empty path list returns empty dict."""
+        config = Config(
+            compose_dir=tmp_path,
+            hosts={"local": Host(address="localhost")},
+            services={},
+        )
+
+        result = await check_paths_exist(config, "local", [])
+        assert result == {}
+
+
+@linux_only
+class TestCheckNetworksExist:
+    """Tests for check_networks_exist function (requires Docker)."""
+
+    async def test_check_bridge_network_exists(self, tmp_path: Path) -> None:
+        """The 'bridge' network always exists on Docker hosts."""
+        config = Config(
+            compose_dir=tmp_path,
+            hosts={"local": Host(address="localhost")},
+            services={},
+        )
+
+        result = await check_networks_exist(config, "local", ["bridge"])
+        assert result["bridge"] is True
+
+    async def test_check_nonexistent_network(self, tmp_path: Path) -> None:
+        """Check a network that doesn't exist."""
+        config = Config(
+            compose_dir=tmp_path,
+            hosts={"local": Host(address="localhost")},
+            services={},
+        )
+
+        result = await check_networks_exist(config, "local", ["nonexistent_network_xyz_123"])
+        assert result["nonexistent_network_xyz_123"] is False
+
+    async def test_check_mixed_networks(self, tmp_path: Path) -> None:
+        """Check mix of existing and non-existing networks."""
+        config = Config(
+            compose_dir=tmp_path,
+            hosts={"local": Host(address="localhost")},
+            services={},
+        )
+
+        result = await check_networks_exist(
+            config, "local", ["bridge", "nonexistent_network_xyz_123"]
+        )
+        assert result["bridge"] is True
+        assert result["nonexistent_network_xyz_123"] is False
+
+    async def test_check_empty_networks(self, tmp_path: Path) -> None:
+        """Empty network list returns empty dict."""
+        config = Config(
+            compose_dir=tmp_path,
+            hosts={"local": Host(address="localhost")},
+            services={},
+        )
+
+        result = await check_networks_exist(config, "local", [])
+        assert result == {}
--- a/tests/test_logs.py
+++ b/tests/test_logs.py
@@ -8,8 +8,15 @@ from pathlib import Path
 import pytest

 from compose_farm.config import Config, Host
-from compose_farm.logs import _parse_images_output, snapshot_services
-from compose_farm.ssh import CommandResult
+from compose_farm.executor import CommandResult
+from compose_farm.logs import (
+    _parse_images_output,
+    collect_service_entries,
+    isoformat,
+    load_existing_entries,
+    merge_entries,
+    write_toml,
+)


 def test_parse_images_output_handles_list_and_lines() -> None:
@@ -55,26 +62,29 @@ async def test_snapshot_preserves_first_seen(tmp_path: Path) -> None:

    log_path = tmp_path / "dockerfarm-log.toml"

+    # First snapshot
    first_time = datetime(2025, 1, 1, tzinfo=UTC)
-    await snapshot_services(
-        config,
-        ["svc"],
-        log_path=log_path,
-        now=first_time,
-        run_compose_fn=fake_run_compose,
+    first_entries = await collect_service_entries(
+        config, "svc", now=first_time, run_compose_fn=fake_run_compose
    )
+    first_iso = isoformat(first_time)
+    merged = merge_entries([], first_entries, now_iso=first_iso)
+    meta = {"generated_at": first_iso, "compose_dir": str(config.compose_dir)}
+    write_toml(log_path, meta=meta, entries=merged)

    after_first = tomllib.loads(log_path.read_text())
    first_seen = after_first["entries"][0]["first_seen"]

+    # Second snapshot
    second_time = datetime(2025, 2, 1, tzinfo=UTC)
-    await snapshot_services(
-        config,
-        ["svc"],
-        log_path=log_path,
-        now=second_time,
-        run_compose_fn=fake_run_compose,
+    second_entries = await collect_service_entries(
+        config, "svc", now=second_time, run_compose_fn=fake_run_compose
    )
+    second_iso = isoformat(second_time)
+    existing = load_existing_entries(log_path)
+    merged = merge_entries(existing, second_entries, now_iso=second_iso)
+    meta = {"generated_at": second_iso, "compose_dir": str(config.compose_dir)}
+    write_toml(log_path, meta=meta, entries=merged)

    after_second = tomllib.loads(log_path.read_text())
    entry = after_second["entries"][0]
--- a/tests/test_operations.py
+++ b/tests/test_operations.py
@@ -0,0 +1,111 @@
+"""Tests for operations module."""
+
+from __future__ import annotations
+
+import inspect
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+
+from compose_farm.cli import lifecycle
+from compose_farm.config import Config, Host
+from compose_farm.executor import CommandResult
+from compose_farm.operations import _migrate_service
+
+
+@pytest.fixture
+def basic_config(tmp_path: Path) -> Config:
+    """Create a basic test config."""
+    compose_dir = tmp_path / "compose"
+    service_dir = compose_dir / "test-service"
+    service_dir.mkdir(parents=True)
+    (service_dir / "docker-compose.yml").write_text("services: {}")
+    return Config(
+        compose_dir=compose_dir,
+        hosts={
+            "host1": Host(address="localhost"),
+            "host2": Host(address="localhost"),
+        },
+        services={"test-service": "host2"},
+    )
+
+
+class TestMigrationCommands:
+    """Tests for migration command sequence."""
+
+    @pytest.fixture
+    def config(self, tmp_path: Path) -> Config:
+        """Create a test config."""
+        compose_dir = tmp_path / "compose"
+        service_dir = compose_dir / "test-service"
+        service_dir.mkdir(parents=True)
+        (service_dir / "docker-compose.yml").write_text("services: {}")
+        return Config(
+            compose_dir=compose_dir,
+            hosts={
+                "host1": Host(address="localhost"),
+                "host2": Host(address="localhost"),
+            },
+            services={"test-service": "host2"},
+        )
+
+    async def test_migration_uses_pull_ignore_buildable(self, config: Config) -> None:
+        """Migration should use 'pull --ignore-buildable' to skip buildable images."""
+        commands_called: list[str] = []
+
+        async def mock_run_compose_step(
+            cfg: Config,
+            service: str,
+            command: str,
+            *,
+            raw: bool,
+            host: str | None = None,
+        ) -> CommandResult:
+            commands_called.append(command)
+            return CommandResult(
+                service=service,
+                exit_code=0,
+                success=True,
+            )
+
+        with patch(
+            "compose_farm.operations._run_compose_step",
+            side_effect=mock_run_compose_step,
+        ):
+            await _migrate_service(
+                config,
+                "test-service",
+                current_host="host1",
+                target_host="host2",
+                prefix="[test]",
+                raw=False,
+            )
+
+        # Migration should call pull with --ignore-buildable, then build, then down
+        assert "pull --ignore-buildable" in commands_called
+        assert "build" in commands_called
+        assert "down" in commands_called
+        # pull should come before build
+        pull_idx = commands_called.index("pull --ignore-buildable")
+        build_idx = commands_called.index("build")
+        assert pull_idx < build_idx
+
+
+class TestUpdateCommandSequence:
+    """Tests for update command sequence."""
+
+    def test_update_command_sequence_includes_build(self) -> None:
+        """Update command should use pull --ignore-buildable and build."""
+        # This is a static check of the command sequence in lifecycle.py
+        # The actual command sequence is defined in the update function
+
+        source = inspect.getsource(lifecycle.update)
+
+        # Verify the command sequence includes pull --ignore-buildable
+        assert "pull --ignore-buildable" in source
+        # Verify build is included
+        assert '"build"' in source or "'build'" in source
+        # Verify the sequence is pull, build, down, up
+        assert "down" in source
+        assert "up -d" in source
--- a/tests/test_refresh.py
+++ b/tests/test_refresh.py
@@ -1,16 +1,15 @@
 """Tests for sync command and related functions."""

 from pathlib import Path
-from typing import Any
 from unittest.mock import AsyncMock, patch

 import pytest

-from compose_farm import cli as cli_module
-from compose_farm import ssh as ssh_module
+from compose_farm import executor as executor_module
 from compose_farm import state as state_module
+from compose_farm.cli import management as cli_management_module
 from compose_farm.config import Config, Host
-from compose_farm.ssh import CommandResult, check_service_running
+from compose_farm.executor import CommandResult, check_service_running


@pytest.fixture
@@ -58,7 +57,7 @@ class TestCheckServiceRunning:
    @pytest.mark.asyncio
    async def test_service_running(self, mock_config: Config) -> None:
        """Returns True when service has running containers."""
-        with patch.object(ssh_module, "run_command", new_callable=AsyncMock) as mock_run:
+        with patch.object(executor_module, "run_command", new_callable=AsyncMock) as mock_run:
            mock_run.return_value = CommandResult(
                service="plex",
                exit_code=0,
@@ -71,7 +70,7 @@ class TestCheckServiceRunning:
    @pytest.mark.asyncio
    async def test_service_not_running(self, mock_config: Config) -> None:
        """Returns False when service has no running containers."""
-        with patch.object(ssh_module, "run_command", new_callable=AsyncMock) as mock_run:
+        with patch.object(executor_module, "run_command", new_callable=AsyncMock) as mock_run:
            mock_run.return_value = CommandResult(
                service="plex",
                exit_code=0,
@@ -84,7 +83,7 @@ class TestCheckServiceRunning:
    @pytest.mark.asyncio
    async def test_command_failed(self, mock_config: Config) -> None:
        """Returns False when command fails."""
-        with patch.object(ssh_module, "run_command", new_callable=AsyncMock) as mock_run:
+        with patch.object(executor_module, "run_command", new_callable=AsyncMock) as mock_run:
            mock_run.return_value = CommandResult(
                service="plex",
                exit_code=1,
@@ -94,48 +93,12 @@ class TestCheckServiceRunning:
            assert result is False


-class TestDiscoverRunningServices:
-    """Tests for _discover_running_services function."""
-
-    @pytest.mark.asyncio
-    async def test_discovers_on_assigned_host(self, mock_config: Config) -> None:
-        """Discovers service running on its assigned host."""
-        with patch.object(
-            cli_module, "check_service_running", new_callable=AsyncMock
-        ) as mock_check:
-            # plex running on nas01, jellyfin not running, sonarr on nas02
-            async def check_side_effect(_cfg: Any, service: str, host: str) -> bool:
-                return (service == "plex" and host == "nas01") or (
-                    service == "sonarr" and host == "nas02"
-                )
-
-            mock_check.side_effect = check_side_effect
-
-            result = await cli_module._discover_running_services(mock_config)
-            assert result == {"plex": "nas01", "sonarr": "nas02"}
-
-    @pytest.mark.asyncio
-    async def test_discovers_on_different_host(self, mock_config: Config) -> None:
-        """Discovers service running on non-assigned host (after migration)."""
-        with patch.object(
-            cli_module, "check_service_running", new_callable=AsyncMock
-        ) as mock_check:
-            # plex migrated to nas02
-            async def check_side_effect(_cfg: Any, service: str, host: str) -> bool:
-                return service == "plex" and host == "nas02"
-
-            mock_check.side_effect = check_side_effect
-
-            result = await cli_module._discover_running_services(mock_config)
-            assert result == {"plex": "nas02"}
-
-
 class TestReportSyncChanges:
    """Tests for _report_sync_changes function."""

    def test_reports_added(self, capsys: pytest.CaptureFixture[str]) -> None:
        """Reports newly discovered services."""
-        cli_module._report_sync_changes(
+        cli_management_module._report_sync_changes(
            added=["plex", "jellyfin"],
            removed=[],
            changed=[],
@@ -149,7 +112,7 @@ class TestReportSyncChanges:

    def test_reports_removed(self, capsys: pytest.CaptureFixture[str]) -> None:
        """Reports services that are no longer running."""
-        cli_module._report_sync_changes(
+        cli_management_module._report_sync_changes(
            added=[],
            removed=["sonarr"],
            changed=[],
@@ -162,7 +125,7 @@ class TestReportSyncChanges:

    def test_reports_changed(self, capsys: pytest.CaptureFixture[str]) -> None:
        """Reports services that moved to a different host."""
-        cli_module._report_sync_changes(
+        cli_management_module._report_sync_changes(
            added=[],
            removed=[],
            changed=[("plex", "nas01", "nas02")],
--- a/tests/test_ssh.py
+++ b/tests/test_ssh.py
@@ -1,118 +0,0 @@
-"""Tests for ssh module."""
-
-from pathlib import Path
-
-import pytest
-
-from compose_farm.config import Config, Host
-from compose_farm.ssh import (
-    CommandResult,
-    _is_local,
-    _run_local_command,
-    run_command,
-    run_compose,
-    run_on_services,
-)
-
-
-class TestIsLocal:
-    """Tests for _is_local function."""
-
-    @pytest.mark.parametrize(
-        "address",
-        ["local", "localhost", "127.0.0.1", "::1", "LOCAL", "LOCALHOST"],
-    )
-    def test_local_addresses(self, address: str) -> None:
-        host = Host(address=address)
-        assert _is_local(host) is True
-
-    @pytest.mark.parametrize(
-        "address",
-        ["192.168.1.10", "nas01.local", "10.0.0.1", "example.com"],
-    )
-    def test_remote_addresses(self, address: str) -> None:
-        host = Host(address=address)
-        assert _is_local(host) is False
-
-
-class TestRunLocalCommand:
-    """Tests for local command execution."""
-
-    async def test_run_local_command_success(self) -> None:
-        result = await _run_local_command("echo hello", "test-service")
-        assert result.success is True
-        assert result.exit_code == 0
-        assert result.service == "test-service"
-
-    async def test_run_local_command_failure(self) -> None:
-        result = await _run_local_command("exit 1", "test-service")
-        assert result.success is False
-        assert result.exit_code == 1
-
-    async def test_run_local_command_not_found(self) -> None:
-        result = await _run_local_command("nonexistent_command_xyz", "test-service")
-        assert result.success is False
-        assert result.exit_code != 0
-
-    async def test_run_local_command_captures_output(self) -> None:
-        result = await _run_local_command("echo hello", "test-service", stream=False)
-        assert "hello" in result.stdout
-
-
-class TestRunCommand:
-    """Tests for run_command dispatcher."""
-
-    async def test_run_command_local(self) -> None:
-        host = Host(address="localhost")
-        result = await run_command(host, "echo test", "test-service")
-        assert result.success is True
-
-    async def test_run_command_result_structure(self) -> None:
-        host = Host(address="local")
-        result = await run_command(host, "true", "my-service")
-        assert isinstance(result, CommandResult)
-        assert result.service == "my-service"
-        assert result.exit_code == 0
-        assert result.success is True
-
-
-class TestRunCompose:
-    """Tests for compose command execution."""
-
-    async def test_run_compose_builds_correct_command(self, tmp_path: Path) -> None:
-        # Create a minimal compose file
-        compose_dir = tmp_path / "compose"
-        service_dir = compose_dir / "test-service"
-        service_dir.mkdir(parents=True)
-        compose_file = service_dir / "docker-compose.yml"
-        compose_file.write_text("services: {}")
-
-        config = Config(
-            compose_dir=compose_dir,
-            hosts={"local": Host(address="localhost")},
-            services={"test-service": "local"},
-        )
-
-        # This will fail because docker compose isn't running,
-        # but we can verify the command structure works
-        result = await run_compose(config, "test-service", "config", stream=False)
-        # Command may fail due to no docker, but structure is correct
-        assert result.service == "test-service"
-
-
-class TestRunOnServices:
-    """Tests for parallel service execution."""
-
-    async def test_run_on_services_parallel(self) -> None:
-        config = Config(
-            compose_dir=Path("/tmp"),
-            hosts={"local": Host(address="localhost")},
-            services={"svc1": "local", "svc2": "local"},
-        )
-
-        # Use a simple command that will work without docker
-        # We'll test the parallelism structure
-        results = await run_on_services(config, ["svc1", "svc2"], "version", stream=False)
-        assert len(results) == 2
-        assert results[0].service == "svc1"
-        assert results[1].service == "svc2"
--- a/tests/test_ssh_keys.py
+++ b/tests/test_ssh_keys.py
@@ -0,0 +1,245 @@
+"""Tests for ssh_keys module."""
+
+import os
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+from compose_farm.config import Host
+from compose_farm.executor import ssh_connect_kwargs
+from compose_farm.ssh_keys import (
+    SSH_KEY_PATH,
+    get_key_path,
+    get_pubkey_content,
+    get_ssh_auth_sock,
+    get_ssh_env,
+    key_exists,
+)
+
+
+class TestGetSshAuthSock:
+    """Tests for get_ssh_auth_sock function."""
+
+    def test_returns_env_var_when_socket_exists(self) -> None:
+        """Return SSH_AUTH_SOCK env var if the socket exists."""
+        mock_path = MagicMock()
+        mock_path.is_socket.return_value = True
+
+        with (
+            patch.dict(os.environ, {"SSH_AUTH_SOCK": "/tmp/agent.sock"}),
+            patch("compose_farm.ssh_keys.Path", return_value=mock_path),
+        ):
+            result = get_ssh_auth_sock()
+            assert result == "/tmp/agent.sock"
+
+    def test_returns_none_when_env_var_not_socket(self, tmp_path: Path) -> None:
+        """Return None if SSH_AUTH_SOCK points to non-socket."""
+        regular_file = tmp_path / "not_a_socket"
+        regular_file.touch()
+        with (
+            patch.dict(os.environ, {"SSH_AUTH_SOCK": str(regular_file)}),
+            patch("compose_farm.ssh_keys.Path.home", return_value=tmp_path),
+        ):
+            # Should fall through to agent dir check, which won't exist
+            result = get_ssh_auth_sock()
+            assert result is None
+
+    def test_finds_agent_in_ssh_agent_dir(self, tmp_path: Path) -> None:
+        """Find agent socket in ~/.ssh/agent/ directory."""
+        # Create agent directory structure with a regular file
+        agent_dir = tmp_path / ".ssh" / "agent"
+        agent_dir.mkdir(parents=True)
+        sock_path = agent_dir / "s.12345.sshd.67890"
+        sock_path.touch()  # Create as regular file
+
+        with (
+            patch.dict(os.environ, {}, clear=False),
+            patch("compose_farm.ssh_keys.Path.home", return_value=tmp_path),
+            patch.object(Path, "is_socket", return_value=True),
+        ):
+            os.environ.pop("SSH_AUTH_SOCK", None)
+            result = get_ssh_auth_sock()
+            assert result == str(sock_path)
+
+    def test_returns_none_when_no_agent_found(self, tmp_path: Path) -> None:
+        """Return None when no SSH agent socket is found."""
+        with (
+            patch.dict(os.environ, {}, clear=False),
+            patch("compose_farm.ssh_keys.Path.home", return_value=tmp_path),
+        ):
+            os.environ.pop("SSH_AUTH_SOCK", None)
+            result = get_ssh_auth_sock()
+            assert result is None
+
+
+class TestGetSshEnv:
+    """Tests for get_ssh_env function."""
+
+    def test_returns_env_with_ssh_auth_sock(self) -> None:
+        """Return env dict with SSH_AUTH_SOCK set."""
+        with patch("compose_farm.ssh_keys.get_ssh_auth_sock", return_value="/tmp/agent.sock"):
+            result = get_ssh_env()
+            assert result["SSH_AUTH_SOCK"] == "/tmp/agent.sock"
+            # Should include other env vars too
+            assert "PATH" in result or len(result) > 1
+
+    def test_returns_env_without_ssh_auth_sock_when_none(self, tmp_path: Path) -> None:
+        """Return env without SSH_AUTH_SOCK when no agent found."""
+        with (
+            patch.dict(os.environ, {}, clear=False),
+            patch("compose_farm.ssh_keys.Path.home", return_value=tmp_path),
+        ):
+            os.environ.pop("SSH_AUTH_SOCK", None)
+            result = get_ssh_env()
+            # SSH_AUTH_SOCK should not be set if no agent found
+            assert result.get("SSH_AUTH_SOCK") is None
+
+
+class TestKeyExists:
+    """Tests for key_exists function."""
+
+    def test_returns_true_when_both_keys_exist(self, tmp_path: Path) -> None:
+        """Return True when both private and public keys exist."""
+        key_path = tmp_path / "compose-farm"
+        pubkey_path = tmp_path / "compose-farm.pub"
+        key_path.touch()
+        pubkey_path.touch()
+
+        with (
+            patch("compose_farm.ssh_keys.SSH_KEY_PATH", key_path),
+            patch("compose_farm.ssh_keys.SSH_PUBKEY_PATH", pubkey_path),
+        ):
+            assert key_exists() is True
+
+    def test_returns_false_when_private_key_missing(self, tmp_path: Path) -> None:
+        """Return False when private key doesn't exist."""
+        key_path = tmp_path / "compose-farm"
+        pubkey_path = tmp_path / "compose-farm.pub"
+        pubkey_path.touch()  # Only public key exists
+
+        with (
+            patch("compose_farm.ssh_keys.SSH_KEY_PATH", key_path),
+            patch("compose_farm.ssh_keys.SSH_PUBKEY_PATH", pubkey_path),
+        ):
+            assert key_exists() is False
+
+    def test_returns_false_when_public_key_missing(self, tmp_path: Path) -> None:
+        """Return False when public key doesn't exist."""
+        key_path = tmp_path / "compose-farm"
+        pubkey_path = tmp_path / "compose-farm.pub"
+        key_path.touch()  # Only private key exists
+
+        with (
+            patch("compose_farm.ssh_keys.SSH_KEY_PATH", key_path),
+            patch("compose_farm.ssh_keys.SSH_PUBKEY_PATH", pubkey_path),
+        ):
+            assert key_exists() is False
+
+
+class TestGetKeyPath:
+    """Tests for get_key_path function."""
+
+    def test_returns_path_when_key_exists(self) -> None:
+        """Return key path when key exists."""
+        with patch("compose_farm.ssh_keys.key_exists", return_value=True):
+            result = get_key_path()
+            assert result == SSH_KEY_PATH
+
+    def test_returns_none_when_key_missing(self) -> None:
+        """Return None when key doesn't exist."""
+        with patch("compose_farm.ssh_keys.key_exists", return_value=False):
+            result = get_key_path()
+            assert result is None
+
+
+class TestGetPubkeyContent:
+    """Tests for get_pubkey_content function."""
+
+    def test_returns_content_when_exists(self, tmp_path: Path) -> None:
+        """Return public key content when file exists."""
+        pubkey_content = "ssh-ed25519 AAAA... compose-farm"
+        pubkey_path = tmp_path / "compose-farm.pub"
+        pubkey_path.write_text(pubkey_content + "\n")
+
+        with patch("compose_farm.ssh_keys.SSH_PUBKEY_PATH", pubkey_path):
+            result = get_pubkey_content()
+            assert result == pubkey_content
+
+    def test_returns_none_when_missing(self, tmp_path: Path) -> None:
+        """Return None when public key doesn't exist."""
+        pubkey_path = tmp_path / "compose-farm.pub"  # Doesn't exist
+
+        with patch("compose_farm.ssh_keys.SSH_PUBKEY_PATH", pubkey_path):
+            result = get_pubkey_content()
+            assert result is None
+
+
+class TestSshConnectKwargs:
+    """Tests for ssh_connect_kwargs function."""
+
+    def test_basic_kwargs(self) -> None:
+        """Return basic connection kwargs."""
+        host = Host(address="example.com", port=22, user="testuser")
+
+        with (
+            patch("compose_farm.executor.get_ssh_auth_sock", return_value=None),
+            patch("compose_farm.executor.get_key_path", return_value=None),
+        ):
+            result = ssh_connect_kwargs(host)
+
+            assert result["host"] == "example.com"
+            assert result["port"] == 22
+            assert result["username"] == "testuser"
+            assert result["known_hosts"] is None
+            assert "agent_path" not in result
+            assert "client_keys" not in result
+
+    def test_includes_agent_path_when_available(self) -> None:
+        """Include agent_path when SSH agent is available."""
+        host = Host(address="example.com")
+
+        with (
+            patch("compose_farm.executor.get_ssh_auth_sock", return_value="/tmp/agent.sock"),
+            patch("compose_farm.executor.get_key_path", return_value=None),
+        ):
+            result = ssh_connect_kwargs(host)
+
+            assert result["agent_path"] == "/tmp/agent.sock"
+
+    def test_includes_client_keys_when_key_exists(self, tmp_path: Path) -> None:
+        """Include client_keys when compose-farm key exists."""
+        host = Host(address="example.com")
+        key_path = tmp_path / "compose-farm"
+
+        with (
+            patch("compose_farm.executor.get_ssh_auth_sock", return_value=None),
+            patch("compose_farm.executor.get_key_path", return_value=key_path),
+        ):
+            result = ssh_connect_kwargs(host)
+
+            assert result["client_keys"] == [str(key_path)]
+
+    def test_includes_both_agent_and_key(self, tmp_path: Path) -> None:
+        """Include both agent_path and client_keys when both available."""
+        host = Host(address="example.com")
+        key_path = tmp_path / "compose-farm"
+
+        with (
+            patch("compose_farm.executor.get_ssh_auth_sock", return_value="/tmp/agent.sock"),
+            patch("compose_farm.executor.get_key_path", return_value=key_path),
+        ):
+            result = ssh_connect_kwargs(host)
+
+            assert result["agent_path"] == "/tmp/agent.sock"
+            assert result["client_keys"] == [str(key_path)]
+
+    def test_custom_port(self) -> None:
+        """Handle custom SSH port."""
+        host = Host(address="example.com", port=2222)
+
+        with (
+            patch("compose_farm.executor.get_ssh_auth_sock", return_value=None),
+            patch("compose_farm.executor.get_key_path", return_value=None),
+        ):
+            result = ssh_connect_kwargs(host)
+
+            assert result["port"] == 2222
--- a/tests/test_state.py
+++ b/tests/test_state.py
@@ -6,7 +6,9 @@ import pytest

 from compose_farm.config import Config, Host
 from compose_farm.state import (
+    get_orphaned_services,
    get_service_host,
+    get_services_not_in_state,
    load_state,
    remove_service,
    save_state,
@@ -130,3 +132,110 @@ class TestRemoveService:

        result = load_state(config)
        assert result["plex"] == "nas01"
+
+
+class TestGetOrphanedServices:
+    """Tests for get_orphaned_services function."""
+
+    def test_no_orphans(self, config: Config) -> None:
+        """Returns empty dict when all services in state are in config."""
+        state_file = config.get_state_path()
+        state_file.write_text("deployed:\n  plex: nas01\n")
+
+        result = get_orphaned_services(config)
+        assert result == {}
+
+    def test_finds_orphaned_service(self, config: Config) -> None:
+        """Returns services in state but not in config."""
+        state_file = config.get_state_path()
+        state_file.write_text("deployed:\n  plex: nas01\n  jellyfin: nas02\n")
+
+        result = get_orphaned_services(config)
+        # plex is in config, jellyfin is not
+        assert result == {"jellyfin": "nas02"}
+
+    def test_finds_orphaned_multi_host_service(self, config: Config) -> None:
+        """Returns multi-host orphaned services with host list."""
+        state_file = config.get_state_path()
+        state_file.write_text("deployed:\n  plex: nas01\n  dozzle:\n  - nas01\n  - nas02\n")
+
+        result = get_orphaned_services(config)
+        assert result == {"dozzle": ["nas01", "nas02"]}
+
+    def test_empty_state(self, config: Config) -> None:
+        """Returns empty dict when state is empty."""
+        result = get_orphaned_services(config)
+        assert result == {}
+
+    def test_all_orphaned(self, tmp_path: Path) -> None:
+        """Returns all services when none are in config."""
+        config_path = tmp_path / "compose-farm.yaml"
+        config_path.write_text("")
+        cfg = Config(
+            compose_dir=tmp_path / "compose",
+            hosts={"nas01": Host(address="192.168.1.10")},
+            services={},  # No services in config
+            config_path=config_path,
+        )
+        state_file = cfg.get_state_path()
+        state_file.write_text("deployed:\n  plex: nas01\n  jellyfin: nas02\n")
+
+        result = get_orphaned_services(cfg)
+        assert result == {"plex": "nas01", "jellyfin": "nas02"}
+
+
+class TestGetServicesNotInState:
+    """Tests for get_services_not_in_state function."""
+
+    def test_all_in_state(self, config: Config) -> None:
+        """Returns empty list when all services are in state."""
+        state_file = config.get_state_path()
+        state_file.write_text("deployed:\n  plex: nas01\n")
+
+        result = get_services_not_in_state(config)
+        assert result == []
+
+    def test_finds_missing_service(self, tmp_path: Path) -> None:
+        """Returns services in config but not in state."""
+        config_path = tmp_path / "compose-farm.yaml"
+        config_path.write_text("")
+        cfg = Config(
+            compose_dir=tmp_path / "compose",
+            hosts={"nas01": Host(address="192.168.1.10")},
+            services={"plex": "nas01", "jellyfin": "nas01"},
+            config_path=config_path,
+        )
+        state_file = cfg.get_state_path()
+        state_file.write_text("deployed:\n  plex: nas01\n")
+
+        result = get_services_not_in_state(cfg)
+        assert result == ["jellyfin"]
+
+    def test_empty_state(self, tmp_path: Path) -> None:
+        """Returns all services when state is empty."""
+        config_path = tmp_path / "compose-farm.yaml"
+        config_path.write_text("")
+        cfg = Config(
+            compose_dir=tmp_path / "compose",
+            hosts={"nas01": Host(address="192.168.1.10")},
+            services={"plex": "nas01", "jellyfin": "nas01"},
+            config_path=config_path,
+        )
+
+        result = get_services_not_in_state(cfg)
+        assert set(result) == {"plex", "jellyfin"}
+
+    def test_empty_config(self, config: Config) -> None:
+        """Returns empty list when config has no services."""
+        # config fixture has plex: nas01, but we need empty config
+        config_path = config.config_path
+        config_path.write_text("")
+        cfg = Config(
+            compose_dir=config.compose_dir,
+            hosts={"nas01": Host(address="192.168.1.10")},
+            services={},
+            config_path=config_path,
+        )
+
+        result = get_services_not_in_state(cfg)
+        assert result == []
--- a/tests/test_traefik.py
+++ b/tests/test_traefik.py
@@ -4,6 +4,7 @@ from pathlib import Path

 import yaml

+from compose_farm.compose import parse_external_networks
 from compose_farm.config import Config, Host
 from compose_farm.traefik import generate_traefik_config

@@ -241,3 +242,97 @@ def test_generate_follows_network_mode_service_for_ports(tmp_path: Path) -> None
    assert torrent_servers == [{"url": "http://192.168.1.10:5080"}]
    prowlarr_servers = dynamic["http"]["services"]["prowlarr"]["loadbalancer"]["servers"]
    assert prowlarr_servers == [{"url": "http://192.168.1.10:9696"}]
+
+
+def test_parse_external_networks_single(tmp_path: Path) -> None:
+    """Extract a single external network from compose file."""
+    cfg = Config(
+        compose_dir=tmp_path,
+        hosts={"host1": Host(address="192.168.1.10")},
+        services={"app": "host1"},
+    )
+    compose_path = tmp_path / "app" / "compose.yaml"
+    _write_compose(
+        compose_path,
+        {
+            "services": {"app": {"image": "nginx"}},
+            "networks": {"mynetwork": {"external": True}},
+        },
+    )
+
+    networks = parse_external_networks(cfg, "app")
+    assert networks == ["mynetwork"]
+
+
+def test_parse_external_networks_multiple(tmp_path: Path) -> None:
+    """Extract multiple external networks from compose file."""
+    cfg = Config(
+        compose_dir=tmp_path,
+        hosts={"host1": Host(address="192.168.1.10")},
+        services={"app": "host1"},
+    )
+    compose_path = tmp_path / "app" / "compose.yaml"
+    _write_compose(
+        compose_path,
+        {
+            "services": {"app": {"image": "nginx"}},
+            "networks": {
+                "frontend": {"external": True},
+                "backend": {"external": True},
+                "internal": {"driver": "bridge"},  # not external
+            },
+        },
+    )
+
+    networks = parse_external_networks(cfg, "app")
+    assert set(networks) == {"frontend", "backend"}
+
+
+def test_parse_external_networks_none(tmp_path: Path) -> None:
+    """No external networks returns empty list."""
+    cfg = Config(
+        compose_dir=tmp_path,
+        hosts={"host1": Host(address="192.168.1.10")},
+        services={"app": "host1"},
+    )
+    compose_path = tmp_path / "app" / "compose.yaml"
+    _write_compose(
+        compose_path,
+        {
+            "services": {"app": {"image": "nginx"}},
+            "networks": {"internal": {"driver": "bridge"}},
+        },
+    )
+
+    networks = parse_external_networks(cfg, "app")
+    assert networks == []
+
+
+def test_parse_external_networks_no_networks_section(tmp_path: Path) -> None:
+    """No networks section returns empty list."""
+    cfg = Config(
+        compose_dir=tmp_path,
+        hosts={"host1": Host(address="192.168.1.10")},
+        services={"app": "host1"},
+    )
+    compose_path = tmp_path / "app" / "compose.yaml"
+    _write_compose(
+        compose_path,
+        {"services": {"app": {"image": "nginx"}}},
+    )
+
+    networks = parse_external_networks(cfg, "app")
+    assert networks == []
+
+
+def test_parse_external_networks_missing_compose(tmp_path: Path) -> None:
+    """Missing compose file returns empty list."""
+    cfg = Config(
+        compose_dir=tmp_path,
+        hosts={"host1": Host(address="192.168.1.10")},
+        services={"app": "host1"},
+    )
+    # Don't create compose file
+
+    networks = parse_external_networks(cfg, "app")
+    assert networks == []
--- a/tests/web/init.py
+++ b/tests/web/init.py
@@ -0,0 +1 @@
+"""Web UI tests."""
--- a/tests/web/conftest.py
+++ b/tests/web/conftest.py
@@ -0,0 +1,87 @@
+"""Fixtures for web UI tests."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+import pytest
+
+if TYPE_CHECKING:
+    from compose_farm.config import Config
+
+
+@pytest.fixture
+def compose_dir(tmp_path: Path) -> Path:
+    """Create a temporary compose directory with sample services."""
+    compose_path = tmp_path / "compose"
+    compose_path.mkdir()
+
+    # Create a sample service
+    plex_dir = compose_path / "plex"
+    plex_dir.mkdir()
+    (plex_dir / "compose.yaml").write_text("""
+services:
+  plex:
+    image: plexinc/pms-docker
+    container_name: plex
+    ports:
+      - "32400:32400"
+""")
+    (plex_dir / ".env").write_text("PLEX_CLAIM=claim-xxx\n")
+
+    # Create another service
+    sonarr_dir = compose_path / "sonarr"
+    sonarr_dir.mkdir()
+    (sonarr_dir / "compose.yaml").write_text("""
+services:
+  sonarr:
+    image: linuxserver/sonarr
+""")
+
+    return compose_path
+
+
+@pytest.fixture
+def config_file(tmp_path: Path, compose_dir: Path) -> Path:
+    """Create a temporary config file and state file."""
+    config_path = tmp_path / "compose-farm.yaml"
+    config_path.write_text(f"""
+compose_dir: {compose_dir}
+
+hosts:
+  server-1:
+    address: 192.168.1.10
+    user: docker
+  server-2:
+    address: 192.168.1.11
+
+services:
+  plex: server-1
+  sonarr: server-2
+""")
+
+    # State file must be alongside config file
+    state_path = tmp_path / "compose-farm-state.yaml"
+    state_path.write_text("""
+deployed:
+  plex: server-1
+""")
+
+    return config_path
+
+
+@pytest.fixture
+def mock_config(config_file: Path, monkeypatch: pytest.MonkeyPatch) -> Config:
+    """Patch get_config to return a test config."""
+    from compose_farm.config import load_config
+    from compose_farm.web import deps as web_deps
+    from compose_farm.web.routes import api as web_api
+
+    config = load_config(config_file)
+
+    # Patch in all modules that import get_config
+    monkeypatch.setattr(web_deps, "get_config", lambda: config)
+    monkeypatch.setattr(web_api, "get_config", lambda: config)
+
+    return config
--- a/tests/web/test_backup.py
+++ b/tests/web/test_backup.py
@@ -0,0 +1,54 @@
+"""Tests for file backup functionality."""
+
+from pathlib import Path
+
+from compose_farm.web.routes.api import _backup_file, _save_with_backup
+
+
+def test_backup_creates_timestamped_file(tmp_path: Path) -> None:
+    """Test that backup creates file in .backups with correct content."""
+    test_file = tmp_path / "test.yaml"
+    test_file.write_text("original content")
+
+    backup_path = _backup_file(test_file)
+
+    assert backup_path is not None
+    assert backup_path.parent.name == ".backups"
+    assert backup_path.name.startswith("test.yaml.")
+    assert backup_path.read_text() == "original content"
+
+
+def test_backup_returns_none_for_nonexistent_file(tmp_path: Path) -> None:
+    """Test that backup returns None if file doesn't exist."""
+    assert _backup_file(tmp_path / "nonexistent.yaml") is None
+
+
+def test_save_creates_new_file(tmp_path: Path) -> None:
+    """Test that save creates new file without backup."""
+    test_file = tmp_path / "new.yaml"
+
+    assert _save_with_backup(test_file, "content") is True
+    assert test_file.read_text() == "content"
+    assert not (tmp_path / ".backups").exists()
+
+
+def test_save_skips_unchanged_content(tmp_path: Path) -> None:
+    """Test that save returns False and creates no backup if unchanged."""
+    test_file = tmp_path / "test.yaml"
+    test_file.write_text("same")
+
+    assert _save_with_backup(test_file, "same") is False
+    assert not (tmp_path / ".backups").exists()
+
+
+def test_save_creates_backup_before_overwrite(tmp_path: Path) -> None:
+    """Test that save backs up original before overwriting."""
+    test_file = tmp_path / "test.yaml"
+    test_file.write_text("original")
+
+    assert _save_with_backup(test_file, "new") is True
+    assert test_file.read_text() == "new"
+
+    backups = list((tmp_path / ".backups").glob("test.yaml.*"))
+    assert len(backups) == 1
+    assert backups[0].read_text() == "original"
--- a/Show More
+++ b/Show More