Mark package as Linux/macOS only in classifiers (#138 )

Remove Windows workarounds (#137 )
Fix Glances connectivity when web UI runs in Docker container (#135 )
2026-02-12 09:52:06 +00:00 · 2025-12-30 07:54:56 +01:00 · 2025-12-30 07:51:51 +01:00 · 2025-12-30 05:35:24 +01:00 · 2025-12-28 17:00:32 +01:00 · 2025-12-28 08:37:57 +01:00
152 changed files with 10659 additions and 2738 deletions
--- a/.envrc.example
+++ b/.envrc.example
@@ -0,0 +1,6 @@
+# Run containers as current user (preserves file ownership on NFS mounts)
+# Copy this file to .envrc and run: direnv allow
+export CF_UID=$(id -u)
+export CF_GID=$(id -g)
+export CF_HOME=$HOME
+export CF_USER=$USER
--- a/.github/check_readme_commands.py
+++ b/.github/check_readme_commands.py
@@ -24,7 +24,7 @@ def get_all_commands(typer_app: typer.Typer, prefix: str = "cf") -> set[str]:
            continue
        name = command.name
        if not name and command.callback:
-            name = command.callback.__name__
+            name = getattr(command.callback, "__name__", None)
        if name:
            commands.add(f"{prefix} {name}")

--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -12,7 +12,7 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        os: [ubuntu-latest, macos-latest, windows-latest]
+        os: [ubuntu-latest, macos-latest]
        python-version: ["3.11", "3.12", "3.13"]

    steps:
@@ -54,7 +54,7 @@ jobs:
        run: uv run playwright install chromium --with-deps

      - name: Run browser tests
-        run: uv run pytest -m browser -v --no-cov
+        run: uv run pytest -m browser -n auto -v

  lint:
    runs-on: ubuntu-latest
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -7,6 +7,11 @@ on:
      - "docs/**"
      - "zensical.toml"
      - ".github/workflows/docs.yml"
+  pull_request:
+    paths:
+      - "docs/**"
+      - "zensical.toml"
+      - ".github/workflows/docs.yml"
  workflow_dispatch:

 permissions:
@@ -15,14 +20,14 @@ permissions:
  id-token: write

 concurrency:
-  group: "pages"
-  cancel-in-progress: false
+  group: "pages-${{ github.ref }}"
+  cancel-in-progress: true

 jobs:
  build:
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
        with:
          lfs: true

@@ -39,14 +44,17 @@ jobs:
        run: zensical build

      - name: Setup Pages
+        if: github.event_name != 'pull_request'
        uses: actions/configure-pages@v5

      - name: Upload artifact
-        uses: actions/upload-pages-artifact@v3
+        if: github.event_name != 'pull_request'
+        uses: actions/upload-pages-artifact@v4
        with:
          path: "./site"

  deploy:
+    if: github.event_name != 'pull_request'
    environment:
      name: github-pages
      url: ${{ steps.deployment.outputs.page_url }}
--- a/.gitignore
+++ b/.gitignore
@@ -37,6 +37,7 @@ ENV/
 .coverage
 .pytest_cache/
 htmlcov/
+.code/

 # Local config (don't commit real configs)
 compose-farm.yaml
@@ -44,3 +45,5 @@ compose-farm.yaml
 coverage.xml
 .env
 homepage/
+site/
+.playwright-mcp/
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -21,16 +21,22 @@ repos:
  - repo: https://github.com/astral-sh/ruff-pre-commit
    rev: v0.14.9
    hooks:
-      - id: ruff
+      - id: ruff-check
        args: [--fix]
      - id: ruff-format

-  - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v1.14.0
+  - repo: local
    hooks:
      - id: mypy
-        additional_dependencies:
-          - pydantic>=2.0.0
-          - typer>=0.9.0
-          - asyncssh>=2.14.0
-          - types-PyYAML
+        name: mypy (type checker)
+        entry: uv run mypy src tests
+        language: system
+        types: [python]
+        pass_filenames: false
+
+      - id: ty
+        name: ty (type checker)
+        entry: uv run ty check
+        language: system
+        types: [python]
+        pass_filenames: false
--- a/.prompts/docs-review.md
+++ b/.prompts/docs-review.md
@@ -1,94 +1,117 @@
-Review all documentation in this repository for accuracy, completeness, and consistency. Cross-reference documentation against the actual codebase to identify issues.
+Review documentation for accuracy, completeness, and consistency. Focus on things that require judgment—automated checks handle the rest.

-## Scope
+## What's Already Automated

-Review all documentation files:
- docs/*.md (primary documentation)
- README.md (repository landing page)
- CLAUDE.md (development guidelines)
- examples/README.md (example configurations)
+Don't waste time on these—CI and pre-commit hooks handle them:

-## Review Checklist
+- **README help output**: `markdown-code-runner` regenerates `cf --help` blocks in CI
+- **README command table**: Pre-commit hook verifies commands are listed
+- **Linting/formatting**: Handled by pre-commit

-### 1. Command Documentation
+## What This Review Is For

-For each documented command, verify against the CLI source code:
+Focus on things that require judgment:

- Command exists in codebase
- All options are documented with correct names, types, and defaults
- Short options (-x) match long options (--xxx)
- Examples would work as written
- Check for undocumented commands or options
+1. **Accuracy**: Does the documentation match what the code actually does?
+2. **Completeness**: Are there undocumented features, options, or behaviors?
+3. **Clarity**: Would a new user understand this? Are examples realistic?
+4. **Consistency**: Do different docs contradict each other?
+5. **Freshness**: Has the code changed in ways the docs don't reflect?

-Run `--help` for each command to verify.
+## Review Process

-### 2. Configuration Documentation
+### 1. Check Recent Changes

-Verify against Pydantic models in the config module:
+```bash
+# What changed recently that might need doc updates?
+git log --oneline -20 | grep -iE "feat|fix|add|remove|change|option"

- All config keys are documented
- Types match Pydantic field types
- Required vs optional fields are correct
- Default values are accurate
- Config file search order matches code
- Example YAML is valid and uses current schema
+# What code files changed?
+git diff --name-only HEAD~20 | grep "\.py$"
+```

-### 3. Architecture Documentation
+Look for new features, changed defaults, renamed options, or removed functionality.

-Verify against actual directory structure:
+### 2. Verify docs/commands.md Options Tables

- File paths match actual source code location
- All modules listed actually exist
- No modules are missing from the list
- Component descriptions match code functionality
- CLI module list includes all command files
+The README auto-updates help output, but `docs/commands.md` has **manually maintained options tables**. These can drift.

-### 4. State and Data Files
+For each command's options table, compare against `cf <command> --help`:
+- Are all options listed?
+- Are short flags correct?
+- Are defaults accurate?
+- Are descriptions accurate?

-Verify against state and path modules:
+**Pay special attention to subcommands** (`cf config *`, `cf ssh *`)—these have their own options that are easy to miss.

- State file name and location are correct
- State file format matches actual structure
- Log file name and location are correct
- What triggers state/log updates is accurate
+### 3. Verify docs/configuration.md

-### 5. Installation Documentation
+Compare against Pydantic models in the source:

-Verify against pyproject.toml:
+```bash
+# Find the config models
+grep -r "class.*BaseModel" src/ --include="*.py" -A 15
+```

- Python version requirement matches requires-python
- Package name is correct
- Optional dependencies are documented
- CLI entry points are mentioned
- Installation methods work as documented
+Check:
+- All config keys documented
+- Types and defaults match code
+- Config file search order is accurate
+- Example YAML would actually work

-### 6. Feature Claims
+### 4. Verify docs/architecture.md

-For each claimed feature, verify it exists and works as described.
+```bash
+# What source files actually exist?
+git ls-files "src/**/*.py"
+```

-### 7. Cross-Reference Consistency
+Check:
+- Listed files exist
+- No files are missing from the list
+- Descriptions match what the code does

-Check for conflicts between documentation files:
+### 5. Check Examples

- README vs docs/index.md (should be consistent)
- CLAUDE.md vs actual code structure
- Command tables match across files
- Config examples are consistent
+For examples in any doc:
+- Would the YAML/commands actually work?
+- Are service names, paths, and options realistic?
+- Do examples use current syntax (not deprecated options)?
+
+### 6. Cross-Reference Consistency
+
+The same info appears in multiple places. Check for conflicts:
+- README.md vs docs/index.md
+- docs/commands.md vs CLAUDE.md command tables
+- Config examples across different docs
+
+### 7. Self-Check This Prompt
+
+This prompt can become outdated too. If you notice:
+- New automated checks that should be listed above
+- New doc files that need review guidelines
+- Patterns that caused issues
+
+Include prompt updates in your fixes.

 ## Output Format

-Provide findings in these categories:
+Categorize findings:

-1. **Critical Issues**: Incorrect information that would cause user problems
-2. **Inaccuracies**: Technical errors, wrong defaults, incorrect paths
-3. **Missing Documentation**: Features/commands that exist but aren't documented
-4. **Outdated Content**: Information that was once true but no longer is
-5. **Inconsistencies**: Conflicts between different documentation files
-6. **Minor Issues**: Typos, formatting, unclear wording
-7. **Verified Accurate**: Sections confirmed to be correct
+1. **Critical**: Wrong info that would break user workflows
+2. **Inaccuracy**: Technical errors (wrong defaults, paths, types)
+3. **Missing**: Undocumented features or options
+4. **Outdated**: Was true, no longer is
+5. **Inconsistency**: Docs contradict each other
+6. **Minor**: Typos, unclear wording

-For each issue, include:
- File path and line number (if applicable)
- What the documentation says
- What the code actually does
- Suggested fix
+For each issue, provide a ready-to-apply fix:
+
+```
+### Issue: [Brief description]
+
+- **File**: docs/commands.md:652
+- **Problem**: `cf ssh setup` has `--config` option but it's not documented
+- **Fix**: Add `--config, -c PATH` to the options table
+- **Verify**: `cf ssh setup --help`
+```
--- a/.prompts/duplication-audit.md
+++ b/.prompts/duplication-audit.md
@@ -0,0 +1,79 @@
+# Duplication audit and generalization prompt
+
+You are a coding agent working inside a repository. Your job is to find duplicated
+functionality (not just identical code) and propose a minimal, safe generalization.
+Keep it simple and avoid adding features.
+
+## First steps
+
+- Read project-specific instructions (AGENTS.md, CONTRIBUTING.md, or similar) and follow them.
+- If instructions mention tooling or style (e.g., preferred search tools), use those.
+- Ask a brief clarification if the request is ambiguous (for example: report only vs refactor).
+
+## Objective
+
+Identify and consolidate duplicated functionality across the codebase. Duplication includes:
+- Multiple functions that parse or validate the same data in slightly different ways
+- Repeated file reads or config parsing
+- Similar command building or subprocess execution paths
+- Near-identical error handling or logging patterns
+- Repeated data transforms that can become a shared helper
+
+The goal is to propose a general, reusable abstraction that reduces duplication while
+preserving behavior. Keep changes minimal and easy to review.
+
+## Search strategy
+
+1) Map the hot paths
+- Scan entry points (CLI, web handlers, tasks, jobs) to see what they do repeatedly.
+- Look for cross-module patterns: same steps, different files.
+
+2) Find duplicate operations
+- Use fast search tools (prefer `rg`) to find repeated keywords and patterns.
+- Check for repeated YAML/JSON parsing, env interpolation, file IO, command building,
+  data validation, or response formatting.
+
+3) Validate duplication is real
+- Confirm the functional intent matches (not just similar code).
+- Note any subtle differences that must be preserved.
+
+4) Propose a minimal generalization
+- Suggest a shared helper, utility, or wrapper.
+- Avoid over-engineering. If only two call sites exist, keep the helper small.
+- Prefer pure functions and centralized IO if that already exists.
+
+## Deliverables
+
+Provide a concise report with:
+
+1) Findings
+- List duplicated behaviors with file references and a short description of the
+  shared functionality.
+- Explain why these are functionally the same (or nearly the same).
+
+2) Proposed generalizations
+- For each duplication, propose a shared helper and where it should live.
+- Outline any behavior differences that need to be parameterized.
+
+3) Impact and risk
+- Note any behavior risks, test needs, or migration steps.
+
+If the user asked you to implement changes:
+- Make only the minimal edits needed to dedupe behavior.
+- Keep the public API stable unless explicitly requested.
+- Add small comments only when the logic is non-obvious.
+- Summarize what changed and why.
+
+## Output format
+
+- Start with a short summary of the top 1-3 duplications.
+- Then provide a list of findings, ordered by impact.
+- Include a small proposed refactor plan (step-by-step, no more than 5 steps).
+- End with any questions or assumptions.
+
+## Guardrails
+
+- Do not add new features or change behavior beyond deduplication.
+- Avoid deep refactors without explicit request.
+- Preserve existing style conventions and import rules.
+- If a duplication is better left alone (e.g., clarity, single usage), say so.
--- a/.prompts/pr-review.md
+++ b/.prompts/pr-review.md
@@ -0,0 +1,15 @@
+Review the pull request for:
+
+- **Code cleanliness**: Is the implementation clean and well-structured?
+- **DRY principle**: Does it avoid duplication?
+- **Code reuse**: Are there parts that should be reused from other places?
+- **Organization**: Is everything in the right place?
+- **Consistency**: Is it in the same style as other parts of the codebase?
+- **Simplicity**: Is it not over-engineered? Remember KISS and YAGNI. No dead code paths and NO defensive programming.
+- **User experience**: Does it provide a good user experience?
+- **PR**: Is the PR description and title clear and informative?
+- **Tests**: Are there tests, and do they cover the changes adequately? Are they testing something meaningful or are they just trivial?
+- **Live tests**: Test the changes in a REAL live environment to ensure they work as expected, use the config in `/opt/stacks/compose-farm.yaml`.
+- **Rules**: Does the code follow the project's coding standards and guidelines as laid out in @CLAUDE.md?
+
+Look at `git diff origin/main..HEAD` for the changes made in this pull request.
--- a/.prompts/update-demos.md
+++ b/.prompts/update-demos.md
@@ -0,0 +1,51 @@
+Update demo recordings to match the current compose-farm.yaml configuration.
+
+## Key Gotchas
+
+1. **Never `git checkout` without asking** - check for uncommitted changes first
+2. **Prefer `nas` stacks** - demos run locally on nas, SSH adds latency
+3. **Terminal captures keyboard** - use `blur()` to release focus before command palette
+4. **Clicking sidebar navigates away** - clicking h1 scrolls to top
+5. **Buttons have icons, not text** - use `[data-tip="..."]` selectors
+6. **`record.py` auto-restores config** - no manual cleanup needed after CLI demos
+
+## Stacks Used in Demos
+
+| Stack | CLI Demos | Web Demos | Notes |
+|-------|-----------|-----------|-------|
+| `audiobookshelf` | quickstart, migration, apply | - | Migrates nas→anton |
+| `grocy` | update | navigation, stack, workflow, console | - |
+| `immich` | logs, compose | shell | Multiple containers |
+| `dozzle` | - | workflow | - |
+
+## CLI Demos
+
+**Files:** `docs/demos/cli/*.tape`
+
+Check:
+- `quickstart.tape`: `bat -r` line ranges match current config structure
+- `migration.tape`: nvim keystrokes work, stack exists on nas
+- `compose.tape`: exec commands produce meaningful output
+
+Run: `python docs/demos/cli/record.py [demo]`
+
+## Web Demos
+
+**Files:** `docs/demos/web/demo_*.py`
+
+Check:
+- Stack names in demos still exist in config
+- Selectors match current templates (grep for IDs in `templates/`)
+- Shell demo uses command palette for ALL navigation
+
+Run: `python docs/demos/web/record.py [demo]`
+
+## Before Recording
+
+```bash
+# Check for uncommitted config changes
+git -C /opt/stacks diff compose-farm.yaml
+
+# Verify stacks are running
+cf ps audiobookshelf grocy immich dozzle
+```
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -15,7 +15,7 @@ src/compose_farm/
 │   ├── app.py         # Shared Typer app instance, version callback
 │   ├── common.py      # Shared helpers, options, progress bar utilities
 │   ├── config.py      # Config subcommand (init, show, path, validate, edit, symlink)
-│   ├── lifecycle.py   # up, down, pull, restart, update, apply commands
+│   ├── lifecycle.py   # up, down, stop, pull, restart, update, apply, compose commands
 │   ├── management.py  # refresh, check, init-network, traefik-file commands
 │   ├── monitoring.py  # logs, ps, stats commands
 │   ├── ssh.py         # SSH key management (setup, status, keygen)
@@ -25,7 +25,7 @@ src/compose_farm/
 ├── console.py         # Shared Rich console instances
 ├── executor.py        # SSH/local command execution, streaming output
 ├── operations.py      # Business logic (up, migrate, discover, preflight checks)
-├── state.py           # Deployment state tracking (which service on which host)
+├── state.py           # Deployment state tracking (which stack on which host)
 ├── logs.py            # Image digest snapshots (dockerfarm-log.toml)
 ├── paths.py           # Path utilities, config file discovery
 ├── ssh_keys.py        # SSH key path constants and utilities
@@ -46,34 +46,49 @@ Icons use [Lucide](https://lucide.dev/). Add new icons as macros in `web/templat
 ## Key Design Decisions

 1. **Hybrid SSH approach**: asyncssh for parallel streaming with prefixes; native `ssh -t` for raw mode (progress bars)
-2. **Parallel by default**: Multiple services run concurrently via `asyncio.gather`
-3. **Streaming output**: Real-time stdout/stderr with `[service]` prefix using Rich
+2. **Parallel by default**: Multiple stacks run concurrently via `asyncio.gather`
+3. **Streaming output**: Real-time stdout/stderr with `[stack]` prefix using Rich
 4. **SSH key auth only**: Uses ssh-agent, no password handling (YAGNI)
 5. **NFS assumption**: Compose files at same path on all hosts
 6. **Local IP auto-detection**: Skips SSH when target host matches local machine's IP
-7. **State tracking**: Tracks where services are deployed for auto-migration
+7. **State tracking**: Tracks where stacks are deployed for auto-migration
 8. **Pre-flight checks**: Verifies NFS mounts and Docker networks exist before starting/migrating

 ## Code Style

 - **Imports at top level**: Never add imports inside functions unless they are explicitly marked with `# noqa: PLC0415` and a comment explaining it speeds up CLI startup. Heavy modules like `pydantic`, `yaml`, and `rich.table` are lazily imported to keep `cf --help` fast.

+## Development Commands
+
+Use `just` for common tasks. Run `just` to list available commands:
+
+| Command | Description |
+|---------|-------------|
+| `just install` | Install dev dependencies |
+| `just test` | Run all tests |
+| `just test-cli` | Run CLI tests (parallel) |
+| `just test-web` | Run web UI tests (parallel) |
+| `just lint` | Lint, format, and type check |
+| `just web` | Start web UI (port 9001) |
+| `just doc` | Build and serve docs (port 9002) |
+| `just clean` | Clean build artifacts |
+
 ## Testing

-Run tests with `uv run pytest`. Browser tests require Chromium (system-installed or via `playwright install chromium`):
+Run tests with `just test` or `uv run pytest`. Browser tests require Chromium (system-installed or via `playwright install chromium`):

 ```bash
-# Unit tests only (skip browser tests, can parallelize)
+# Unit tests only (parallel)
 uv run pytest -m "not browser" -n auto

-# Browser tests only (run sequentially, no coverage)
-uv run pytest -m browser --no-cov
+# Browser tests only (parallel)
+uv run pytest -m browser -n auto

 # All tests
-uv run pytest --no-cov
+uv run pytest
 ```

-Browser tests are marked with `@pytest.mark.browser`. They use Playwright to test HTMX behavior, JavaScript functionality (sidebar filter, command palette, terminals), and content stability during navigation. Run sequentially (no `-n`) to avoid resource contention.
+Browser tests are marked with `@pytest.mark.browser`. They use Playwright to test HTMX behavior, JavaScript functionality (sidebar filter, command palette, terminals), and content stability during navigation.

 ## Communication Notes

@@ -114,19 +129,21 @@ CLI available as `cf` or `compose-farm`.

 | Command | Description |
 |---------|-------------|
-| `up`    | Start services (`docker compose up -d`), auto-migrates if host changed |
-| `down`  | Stop services (`docker compose down`). Use `--orphaned` to stop services removed from config |
+| `up`    | Start stacks (`docker compose up -d`), auto-migrates if host changed |
+| `down`  | Stop stacks (`docker compose down`). Use `--orphaned` to stop stacks removed from config |
+| `stop`  | Stop services without removing containers (`docker compose stop`) |
 | `pull`  | Pull latest images |
 | `restart` | `down` + `up -d` |
 | `update` | `pull` + `build` + `down` + `up -d` |
-| `apply` | Make reality match config: migrate services + stop orphans. Use `--dry-run` to preview |
-| `logs`  | Show service logs |
-| `ps`    | Show status of all services |
-| `stats` | Show overview (hosts, services, pending migrations; `--live` for container counts) |
-| `refresh` | Update state from reality: discover running services, capture image digests |
+| `apply` | Make reality match config: migrate stacks + stop orphans. Use `--dry-run` to preview |
+| `compose` | Run any docker compose command on a stack (passthrough) |
+| `logs`  | Show stack logs |
+| `ps`    | Show status of all stacks |
+| `stats` | Show overview (hosts, stacks, pending migrations; `--live` for container counts) |
+| `refresh` | Update state from reality: discover running stacks, capture image digests |
 | `check` | Validate config, traefik labels, mounts, networks; show host compatibility |
 | `init-network` | Create Docker network on hosts with consistent subnet/gateway |
 | `traefik-file` | Generate Traefik file-provider config from compose labels |
-| `config` | Manage config files (init, show, path, validate, edit, symlink) |
+| `config` | Manage config files (init, init-env, show, path, validate, edit, symlink) |
 | `ssh`   | Manage SSH keys (setup, status, keygen) |
 | `web`   | Start web UI server |
--- a/10
+++ b/10
@@ -16,5 +16,13 @@ RUN apk add --no-cache openssh-client
 COPY --from=builder /root/.local/share/uv/tools/compose-farm /root/.local/share/uv/tools/compose-farm
 COPY --from=builder /usr/local/bin/cf /usr/local/bin/compose-farm /usr/local/bin/

-ENTRYPOINT ["cf"]
+# Allow non-root users to access the installed tool
+# (required when running with user: "${CF_UID:-0}:${CF_GID:-0}")
+RUN chmod 755 /root
+
+# Allow non-root users to add passwd entries (required for SSH)
+RUN chmod 666 /etc/passwd
+
+# Entrypoint creates /etc/passwd entry for non-root UIDs (required for SSH)
+ENTRYPOINT ["sh", "-c", "[ $(id -u) != 0 ] && echo ${USER:-u}:x:$(id -u):$(id -g)::${HOME:-/}:/bin/sh >> /etc/passwd; exec cf \"$@\"", "--"]
 CMD ["--help"]
--- a/README.md
+++ b/README.md
--- a/compose-farm.example.yaml
+++ b/compose-farm.example.yaml
@@ -5,7 +5,7 @@ compose_dir: /opt/compose

 # Optional: Auto-regenerate Traefik file-provider config after up/down/restart/update
 traefik_file: /opt/traefik/dynamic.d/compose-farm.yml
-traefik_service: traefik  # Skip services on same host (docker provider handles them)
+traefik_stack: traefik  # Skip stacks on same host (docker provider handles them)

 hosts:
  # Full form with all options
@@ -20,11 +20,11 @@ hosts:
  # Local execution (no SSH)
  local: localhost

-services:
-  # Map service names to hosts
-  # Compose file expected at: {compose_dir}/{service}/compose.yaml
+stacks:
+  # Map stack names to hosts
+  # Compose file expected at: {compose_dir}/{stack}/compose.yaml
  traefik: server-1    # Traefik runs here
-  plex: server-2       # Services on other hosts get file-provider entries
+  plex: server-2       # Stacks on other hosts get file-provider entries
  jellyfin: server-2
-  sonarr: server-1
-  radarr: local
+  grafana: server-1
+  nextcloud: local
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,38 +1,58 @@
 services:
  cf:
    image: ghcr.io/basnijholt/compose-farm:latest
+    # Run as current user to preserve file ownership on mounted volumes
+    # Set CF_UID=$(id -u) CF_GID=$(id -g) in your environment or .env file
+    # Defaults to root (0:0) for backwards compatibility
+    user: "${CF_UID:-0}:${CF_GID:-0}"
    volumes:
-      - ${SSH_AUTH_SOCK}:/ssh-agent:ro
      # Compose directory (contains compose files AND compose-farm.yaml config)
      - ${CF_COMPOSE_DIR:-/opt/stacks}:${CF_COMPOSE_DIR:-/opt/stacks}
      # SSH keys for passwordless auth (generated by `cf ssh setup`)
      # Choose ONE option below (use the same option for both cf and web services):
      # Option 1: Host path (default) - keys at ~/.ssh/compose-farm/id_ed25519
-      - ${CF_SSH_DIR:-~/.ssh/compose-farm}:/root/.ssh
+      - ${CF_SSH_DIR:-~/.ssh/compose-farm}:${CF_HOME:-/root}/.ssh/compose-farm
      # Option 2: Named volume - managed by Docker, shared between services
-      # - cf-ssh:/root/.ssh
+      # - cf-ssh:${CF_HOME:-/root}/.ssh
+      # Option 3: SSH agent forwarding (uncomment if using ssh-agent)
+      # - ${SSH_AUTH_SOCK}:/ssh-agent:ro
    environment:
      - SSH_AUTH_SOCK=/ssh-agent
      # Config file path (state stored alongside it)
      - CF_CONFIG=${CF_COMPOSE_DIR:-/opt/stacks}/compose-farm.yaml
+      # HOME must match the user running the container for SSH to find keys
+      - HOME=${CF_HOME:-/root}
+      # USER is required for SSH when running as non-root (UID not in /etc/passwd)
+      - USER=${CF_USER:-root}

  web:
    image: ghcr.io/basnijholt/compose-farm:latest
    restart: unless-stopped
    command: web --host 0.0.0.0 --port 9000
+    # Run as current user to preserve file ownership on mounted volumes
+    user: "${CF_UID:-0}:${CF_GID:-0}"
    volumes:
-      - ${SSH_AUTH_SOCK}:/ssh-agent:ro
      - ${CF_COMPOSE_DIR:-/opt/stacks}:${CF_COMPOSE_DIR:-/opt/stacks}
      # SSH keys - use the SAME option as cf service above
      # Option 1: Host path (default)
-      - ${CF_SSH_DIR:-~/.ssh/compose-farm}:/root/.ssh
+      - ${CF_SSH_DIR:-~/.ssh/compose-farm}:${CF_HOME:-/root}/.ssh/compose-farm
      # Option 2: Named volume
-      # - cf-ssh:/root/.ssh
+      # - cf-ssh:${CF_HOME:-/root}/.ssh
+      # Option 3: SSH agent forwarding (uncomment if using ssh-agent)
+      # - ${SSH_AUTH_SOCK}:/ssh-agent:ro
+      # XDG config dir for backups and image digest logs (persists across restarts)
+      - ${CF_XDG_CONFIG:-~/.config/compose-farm}:${CF_HOME:-/root}/.config/compose-farm
    environment:
      - SSH_AUTH_SOCK=/ssh-agent
      - CF_CONFIG=${CF_COMPOSE_DIR:-/opt/stacks}/compose-farm.yaml
      # Used to detect self-updates and run via SSH to survive container restart
-      - CF_WEB_SERVICE=compose-farm
+      - CF_WEB_STACK=compose-farm
+      # Local host for Glances (use container name instead of IP to avoid Docker network issues)
+      - CF_LOCAL_HOST=${CF_LOCAL_HOST:-}
+      # HOME must match the user running the container for SSH to find keys
+      - HOME=${CF_HOME:-/root}
+      # USER is required for SSH when running as non-root (UID not in /etc/passwd)
+      - USER=${CF_USER:-root}
    labels:
      - traefik.enable=true
      - traefik.http.routers.compose-farm.rule=Host(`compose-farm.${DOMAIN}`)
--- a/docs/architecture.md
+++ b/docs/architecture.md
@@ -46,13 +46,12 @@ Compose Farm follows three core principles:

 Pydantic models for YAML configuration:

- **Config** - Root configuration with compose_dir, hosts, services
- **HostConfig** - Host address and SSH user
- **ServiceConfig** - Service-to-host mappings
+- **Config** - Root configuration with compose_dir, hosts, stacks
+- **Host** - Host address, SSH user, and port

 Key features:
 - Validation with Pydantic
- Multi-host service expansion (`all` → list of hosts)
+- Multi-host stack expansion (`all` → list of hosts)
 - YAML loading with sensible defaults

 ### State Tracking (`src/compose_farm/state.py`)
@@ -62,22 +61,22 @@ Tracks deployment state in `compose-farm-state.yaml` (stored alongside the confi
 ```yaml
 deployed:
  plex: nuc
-  sonarr: nuc
+  grafana: nuc
 ```

 Used for:
- Detecting migrations (service moved to different host)
- Identifying orphans (services removed from config)
+- Detecting migrations (stack moved to different host)
+- Identifying orphans (stacks removed from config)
 - `cf ps` status display

 ### Operations (`src/compose_farm/operations.py`)

-Business logic for service operations:
+Business logic for stack operations:

- **up** - Start service, handle migration if needed
- **down** - Stop service
+- **up** - Start stack, handle migration if needed
+- **down** - Stop stack
 - **preflight checks** - Verify mounts, networks exist before operations
- **discover** - Find running services on hosts
+- **discover** - Find running stacks on hosts
 - **migrate** - Down on old host, up on new host

 ### Executor (`src/compose_farm/executor.py`)
@@ -85,8 +84,8 @@ Business logic for service operations:
 SSH and local command execution:

 - **Hybrid SSH approach**: asyncssh for parallel streaming, native `ssh -t` for raw mode
- **Parallel by default**: Multiple services via `asyncio.gather`
- **Streaming output**: Real-time stdout/stderr with `[service]` prefix
+- **Parallel by default**: Multiple stacks via `asyncio.gather`
+- **Streaming output**: Real-time stdout/stderr with `[stack]` prefix
 - **Local detection**: Skips SSH when target matches local machine IP

 ### CLI (`src/compose_farm/cli/`)
@@ -98,7 +97,7 @@ cli/
 ├── app.py          # Shared Typer app, version callback
 ├── common.py       # Shared helpers, options, progress utilities
 ├── config.py       # config subcommand (init, show, path, validate, edit, symlink)
-├── lifecycle.py    # up, down, pull, restart, update, apply
+├── lifecycle.py    # up, down, stop, pull, restart, update, apply, compose
 ├── management.py   # refresh, check, init-network, traefik-file
 ├── monitoring.py   # logs, ps, stats
 ├── ssh.py          # SSH key management (setup, status, keygen)
@@ -112,7 +111,7 @@ cli/
 ```
 1. Load configuration
   └─► Parse compose-farm.yaml
-   └─► Validate service exists
+   └─► Validate stack exists

 2. Check state
   └─► Load state.yaml
@@ -129,7 +128,7 @@ cli/
   └─► SSH to old host
   └─► Run: docker compose down

-5. Start service
+5. Start stack
   └─► SSH to target host
   └─► cd /opt/compose/plex
   └─► Run: docker compose up -d
@@ -154,7 +153,7 @@ cli/
 3. Stop orphans
   └─► For each orphan: cf down

-4. Migrate services
+4. Migrate stacks
   └─► For each migration: down old, up new

 5. Start missing
@@ -176,7 +175,7 @@ async def run_command(host, command):
        return result.stdout, result.stderr
 ```

-Multiple services run concurrently via `asyncio.gather`.
+Multiple stacks run concurrently via `asyncio.gather`.

 ### Raw Mode (native ssh)

@@ -208,7 +207,7 @@ Location: `compose-farm-state.yaml` (stored alongside the config file)
 ```yaml
 deployed:
  plex: nuc
-  sonarr: nuc
+  grafana: nuc
 ```

 Image digests are stored separately in `dockerfarm-log.toml` (also in the config directory).
@@ -218,8 +217,8 @@ Image digests are stored separately in `dockerfarm-log.toml` (also in the config
 ```
 Config Change          State Change           Action
 ─────────────────────────────────────────────────────
-Add service           Missing                 cf up
-Remove service        Orphaned                cf down
+Add stack            Missing                 cf up
+Remove stack         Orphaned                cf down
 Change host           Migration               down old, up new
 No change             No change               none (or refresh)
 ```
@@ -236,10 +235,10 @@ Updates state.yaml to match what's actually running.

 ## Compose File Discovery

-For each service, Compose Farm looks for compose files in:
+For each stack, Compose Farm looks for compose files in:

 ```
-{compose_dir}/{service}/
+{compose_dir}/{stack}/
 ├── compose.yaml         # preferred
 ├── compose.yml
 ├── docker-compose.yml
@@ -255,7 +254,7 @@ First match wins.
 Compose Farm parses Traefik labels from compose files:

 ```yaml
-services:
+stacks:
  plex:
    labels:
      - traefik.enable=true
@@ -300,9 +299,9 @@ If checks fail, operation aborts with clear error.

 ### Partial Failures

-When operating on multiple services:
- Each service is independent
- Failures are logged, but other services continue
+When operating on multiple stacks:
+- Each stack is independent
+- Failures are logged, but other stacks continue
 - Exit code reflects overall success/failure

 ## Performance Considerations
@@ -313,7 +312,7 @@ Services are started/stopped in parallel:

 ```python
 await asyncio.gather(*[
-    up_service(service) for service in services
+    up_stack(stack) for stack in stacks
 ])
 ```

--- a/docs/assets/apply.gif
+++ b/docs/assets/apply.gif
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bb1372a59a4ed1ac74d3864d7a84dd5311fce4cb6c6a00bf3a574bc2f98d5595
-size 895927
+oid sha256:01dabdd8f62773823ba2b8dc74f9931f1a1b88215117e6a080004096025491b0
+size 901456
--- a/docs/assets/apply.webm
+++ b/docs/assets/apply.webm
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f339a85f3d930db5a020c9f77e106edc5f44ea7dee6f68557106721493c24ef8
-size 205907
+oid sha256:134c903a6b3acfb933617b33755b0cdb9bac2a59e5e35b64236e248a141d396d
+size 206883
--- a/docs/assets/compose.gif
+++ b/docs/assets/compose.gif
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d8b3cdb3486ec79b3ddb2f7571c13d54ac9aed182edfe708eff76a966a90cfc7
+size 1132310
--- a/docs/assets/compose.webm
+++ b/docs/assets/compose.webm
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a3c4d4a62f062f717df4e6752efced3caea29004dc90fe97fd7633e7f0ded9db
+size 341057
--- a/docs/assets/install.gif
+++ b/docs/assets/install.gif
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:388aa49a1269145698f9763452aaf6b9c6232ea9229abe1dae304df558e29695
-size 403442
+oid sha256:6c1bb48cc2f364681515a4d8bd0c586d133f5a32789b7bb64524ad7d9ed0a8e9
+size 543135
--- a/docs/assets/install.webm
+++ b/docs/assets/install.webm
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9b8bf4dcb8ee67270d4a88124b4dd4abe0dab518e73812ee73f7c66d77f146e2
-size 228025
+oid sha256:5f82d96137f039f21964c15c1550aa1b1f0bb2d52c04d012d253dbfbd6fad096
+size 268086
--- a/docs/assets/logs.gif
+++ b/docs/assets/logs.gif
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:16b9a28137dfae25488e2094de85766a039457f5dca20c2d84ac72e3967c10b9
-size 164237
+oid sha256:2a4045b00d90928f42c7764b3c24751576cfb68a34c6e84d12b4e282d2e67378
+size 146467
--- a/docs/assets/logs.webm
+++ b/docs/assets/logs.webm
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e0fbe697a1f8256ce3b9a6a64c7019d42769134df9b5b964e5abe98a29e918fd
-size 68242
+oid sha256:f1b94416ed3740853f863e19bf45f26241a203fb0d7d187160a537f79aa544fa
+size 60353
--- a/docs/assets/migration.gif
+++ b/docs/assets/migration.gif
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:629b8c80b98eb996b75439745676fd99a83f391ca25f778a71bd59173f814c2f
-size 1194931
+oid sha256:848d9c48fb7511da7996149277c038589fad1ee406ff2f30c28f777fc441d919
+size 1183641
--- a/docs/assets/migration.webm
+++ b/docs/assets/migration.webm
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:33fd46f2d8538cc43be4cb553b3af9d8b412f282ee354b6373e2793fe41c799b
-size 405057
+oid sha256:e747ee71bb38b19946005d5a4def4d423dadeaaade452dec875c4cb2d24a5b77
+size 407373
--- a/docs/assets/quickstart.gif
+++ b/docs/assets/quickstart.gif
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ccd96e33faba5f297999917d89834b29d58bd2a8929eea8d62875e3d8830bd5c
-size 3198466
+oid sha256:d32c9a3eec06e57df085ad347e6bf61e323f8bd8322d0c540f0b9d4834196dfd
+size 3589776
--- a/docs/assets/quickstart.webm
+++ b/docs/assets/quickstart.webm
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:979a1a21303bbf284b3510981066ef05c41c1035b34392fecc7bee472116e6db
-size 967564
+oid sha256:6c54eda599389dac74c24c83527f95cd1399e653d7faf2972c2693d90e590597
+size 1085344
--- a/docs/assets/update.gif
+++ b/docs/assets/update.gif
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2067f4967a93b7ee3a8db7750c435f41b1fccd2919f3443da4b848c20cc54f23
-size 124559
+oid sha256:62f9b5ec71496197a3f1c3e3bca8967d603838804279ea7dbf00a70d3391ff6c
+size 127123
--- a/docs/assets/update.webm
+++ b/docs/assets/update.webm
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5471bd94e6d1b9d415547fa44de6021fdad2e1cc5b8b295680e217104aa749d6
-size 98149
+oid sha256:ac2b93d3630af87b44a135723c5d10e8287529bed17c28301b2802cd9593e9e8
+size 98748
--- a/docs/assets/web-console.gif
+++ b/docs/assets/web-console.gif
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7b50a7e9836c496c0989363d1440fa0a6ccdaa38ee16aae92b389b3cf3c3732f
+size 2385110
--- a/docs/assets/web-console.webm
+++ b/docs/assets/web-console.webm
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ccbb3d5366c7734377e12f98cca0b361028f5722124f1bb7efa231f6aeffc116
+size 2208044
--- a/docs/assets/web-live_stats.gif
+++ b/docs/assets/web-live_stats.gif
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4135888689a10c5ae2904825d98f2a6d215c174a4bd823e25761f619590f04ff
+size 3990104
--- a/docs/assets/web-live_stats.webm
+++ b/docs/assets/web-live_stats.webm
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:87739cd6f6576a81100392d8d1e59d3e776fecc8f0721a31332df89e7fc8593d
+size 5814274
--- a/docs/assets/web-navigation.gif
+++ b/docs/assets/web-navigation.gif
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:269993b52721ce70674d3aab2a4cd8c58aa621d4ba0739afedae661c90965b26
+size 3678371
--- a/docs/assets/web-navigation.webm
+++ b/docs/assets/web-navigation.webm
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0098b55bb6a52fa39f807a01fa352ce112bcb446e2a2acb963fb02d21b28c934
+size 3088813
--- a/docs/assets/web-shell.gif
+++ b/docs/assets/web-shell.gif
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4bf9d8c247d278799d1daea784fc662a22f12b1bd7883f808ef30f35025ebca6
+size 4166443
--- a/docs/assets/web-shell.webm
+++ b/docs/assets/web-shell.webm
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:02d5124217a94849bf2971d6d13d28da18c557195a81b9cca121fb7c07f0501b
+size 3523244
--- a/docs/assets/web-stack.gif
+++ b/docs/assets/web-stack.gif
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:412a0e68f8e52801cafbb9a703ca9577e7c14cc7c0e439160b9185961997f23c
+size 4435697
--- a/docs/assets/web-stack.webm
+++ b/docs/assets/web-stack.webm
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0e600a1d3216b44497a889f91eac94d62ef7207b4ed0471465dcb72408caa28e
+size 3764693
--- a/docs/assets/web-themes.gif
+++ b/docs/assets/web-themes.gif
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3c07a283f4f70c4ab205b0f0acb5d6f55e3ced4c12caa7a8d5914ffe3548233a
+size 5768166
--- a/docs/assets/web-themes.webm
+++ b/docs/assets/web-themes.webm
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:562228841de976d70ee80999b930eadf3866a13ff2867d900279993744c44671
+size 6667918
--- a/docs/assets/web-workflow.gif
+++ b/docs/assets/web-workflow.gif
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:845746ac1cb101c3077d420c4f3fda3ca372492582dc123ac8a031a68ae9b6b1
+size 12943150
--- a/docs/assets/web-workflow.webm
+++ b/docs/assets/web-workflow.webm
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:189259558b5760c02583885168d7b0b47cf476cba81c7c028ec770f9d6033129
+size 12415357
--- a/docs/best-practices.md
+++ b/docs/best-practices.md
@@ -12,7 +12,7 @@ Tips, limitations, and recommendations for using Compose Farm effectively.

 Compose Farm moves containers between hosts but **does not provide cross-host networking**. Docker's internal DNS and networks don't span hosts.

-**What breaks when you move a service:**
+**What breaks when you move a stack:**

 | Feature | Works? | Why |
 |---------|--------|-----|
@@ -29,7 +29,7 @@ Compose Farm moves containers between hosts but **does not provide cross-host ne
 - No health checks or restart policies beyond Docker's
 - No secrets management beyond Docker's

-## Service Organization
+## Stack Organization

 ### Keep Dependencies Together

@@ -53,16 +53,16 @@ services:

 ```yaml
 # compose-farm.yaml
-services:
+stacks:
  myapp: nuc  # All three containers stay together
 ```

-### Separate Standalone Services
+### Separate Standalone Stacks

-Services that don't talk to other containers can be anywhere:
+Stacks whose services don't talk to other containers can be anywhere:

 ```yaml
-services:
+stacks:
  # These can run on any host
  plex: nuc
  jellyfin: hp
@@ -92,14 +92,14 @@ services:
      - "5432:5432"
 ```

-## Multi-Host Services
+## Multi-Host Stacks

 ### When to Use `all`

-Use `all` for services that need local access to each host:
+Use `all` for stacks that need local access to each host:

 ```yaml
-services:
+stacks:
  # Need Docker socket
  dozzle: all          # Log viewer
  portainer-agent: all  # Portainer agents
@@ -112,10 +112,10 @@ services:

 ### Host-Specific Lists

-For services on specific hosts only:
+For stacks on specific hosts only:

 ```yaml
-services:
+stacks:
  # Only on compute nodes
  gitlab-runner: [nuc, hp]

@@ -146,7 +146,7 @@ Before migrating, Compose Farm verifies:
 # 1. Preview changes
 cf apply --dry-run

-# 2. Verify target host can run the service
+# 2. Verify target host can run the stack
 cf check myservice

 # 3. Apply changes
@@ -221,7 +221,7 @@ Keep config and data separate:

 /opt/appdata/          # Local: per-host app data
 ├── plex/
-└── sonarr/
+└── grafana/
 ```

 ## Performance
@@ -235,7 +235,7 @@ Compose Farm runs operations in parallel. For large deployments:
 cf up --all

 # Avoid: sequential updates when possible
-for svc in plex sonarr radarr; do
+for svc in plex grafana nextcloud; do
  cf update $svc
 done
 ```
@@ -249,28 +249,28 @@ SSH connections are reused within a command. For many operations:
 cf update --all

 # Multiple commands, multiple connections (slower)
-cf update plex && cf update sonarr && cf update radarr
+cf update plex && cf update grafana && cf update nextcloud
 ```

 ## Traefik Setup

-### Service Placement
+### Stack Placement

 Put Traefik on a reliable host:

 ```yaml
-services:
+stacks:
  traefik: nuc  # Primary host with good uptime
 ```

-### Same-Host Services
+### Same-Host Stacks

-Services on the same host as Traefik use Docker provider:
+Stacks on the same host as Traefik use Docker provider:

 ```yaml
-traefik_service: traefik
+traefik_stack: traefik

-services:
+stacks:
  traefik: nuc
  portainer: nuc   # Docker provider handles this
  plex: hp         # File provider handles this
@@ -297,7 +297,7 @@ http:
 |------|----------|--------|
 | Compose Farm config | `~/.config/compose-farm/` | Git or copy |
 | Compose files | `/opt/compose/` | Git |
-| State file | `~/.config/compose-farm/state.yaml` | Optional (can refresh) |
+| State file | `~/.config/compose-farm/compose-farm-state.yaml` | Optional (can refresh) |
 | App data | `/opt/appdata/` | Backup solution |

 ### Disaster Recovery
@@ -317,7 +317,7 @@ cf apply

 ### Common Issues

-**Service won't start:**
+**Stack won't start:**
 ```bash
 cf check myservice      # Verify mounts/networks
 cf logs myservice       # Check container logs
@@ -341,15 +341,6 @@ cf ssh status           # Check key status
 cf ssh setup            # Re-setup keys
 ```

-### Debug Mode
-
-For more verbose output:
-
-```bash
-# See exact commands being run
-cf --verbose up myservice
-```
-
 ## Security Considerations

 ### SSH Keys
@@ -374,7 +365,7 @@ cf --verbose up myservice

 | Scenario | Solution |
 |----------|----------|
-| 2-10 hosts, static services | **Compose Farm** |
+| 2-10 hosts, static stacks | **Compose Farm** |
 | Cross-host container networking | Docker Swarm |
 | Auto-scaling, self-healing | Kubernetes |
 | Infrastructure as code | Ansible + Compose Farm |
--- a/docs/commands.md
+++ b/docs/commands.md
@@ -11,13 +11,15 @@ The Compose Farm CLI is available as both `compose-farm` and the shorter alias `
 | Category | Command | Description |
 |----------|---------|-------------|
 | **Lifecycle** | `apply` | Make reality match config |
-| | `up` | Start services |
-| | `down` | Stop services |
-| | `restart` | Restart services (down + up) |
-| | `update` | Update services (pull + down + up) |
+| | `up` | Start stacks |
+| | `down` | Stop stacks |
+| | `stop` | Stop services without removing containers |
+| | `restart` | Restart stacks (down + up) |
+| | `update` | Update stacks (pull + build + down + up) |
 | | `pull` | Pull latest images |
-| **Monitoring** | `ps` | Show service status |
-| | `logs` | Show service logs |
+| | `compose` | Run any docker compose command |
+| **Monitoring** | `ps` | Show stack status |
+| | `logs` | Show stack logs |
 | | `stats` | Show overview statistics |
 | **Configuration** | `check` | Validate config and mounts |
 | | `refresh` | Sync state from reality |
@@ -43,7 +45,7 @@ cf --help, -h       # Show help
 Make reality match your configuration. The primary reconciliation command.

 <video autoplay loop muted playsinline>
-  <source src="assets/apply.webm" type="video/webm">
+  <source src="/assets/apply.webm" type="video/webm">
 </video>

 ```bash
@@ -55,15 +57,15 @@ cf apply [OPTIONS]
 | Option | Description |
 |--------|-------------|
 | `--dry-run, -n` | Preview changes without executing |
-| `--no-orphans` | Skip stopping orphaned services |
-| `--full, -f` | Also refresh running services |
+| `--no-orphans` | Skip stopping orphaned stacks |
+| `--full, -f` | Also refresh running stacks |
 | `--config, -c PATH` | Path to config file |

 **What it does:**

-1. Stops orphaned services (in state but removed from config)
-2. Migrates services on wrong host
-3. Starts missing services (in config but not running)
+1. Stops orphaned stacks (in state but removed from config)
+2. Migrates stacks on wrong host
+3. Starts missing stacks (in config but not running)

 **Examples:**

@@ -77,7 +79,7 @@ cf apply
 # Only start/migrate, don't stop orphans
 cf apply --no-orphans

-# Also refresh all running services
+# Also refresh all running stacks
 cf apply --full
 ```

@@ -85,36 +87,40 @@ cf apply --full

 ### cf up

-Start services. Auto-migrates if host assignment changed.
+Start stacks. Auto-migrates if host assignment changed.

 ```bash
-cf up [OPTIONS] [SERVICES]...
+cf up [OPTIONS] [STACKS]...
 ```

 **Options:**

 | Option | Description |
 |--------|-------------|
-| `--all, -a` | Start all services |
-| `--host, -H TEXT` | Filter to services on this host |
+| `--all, -a` | Start all stacks |
+| `--host, -H TEXT` | Filter to stacks on this host |
+| `--service, -s TEXT` | Target a specific service within the stack |
 | `--config, -c PATH` | Path to config file |

 **Examples:**

 ```bash
-# Start specific services
-cf up plex sonarr
+# Start specific stacks
+cf up plex grafana

-# Start all services
+# Start all stacks
 cf up --all

-# Start all services on a specific host
+# Start all stacks on a specific host
 cf up --all --host nuc
+
+# Start a specific service within a stack
+cf up immich --service database
 ```

 **Auto-migration:**

-If you change a service's host in config and run `cf up`:
+If you change a stack's host in config and run `cf up`:

 1. Verifies mounts/networks exist on new host
 2. Runs `down` on old host
@@ -125,52 +131,84 @@ If you change a service's host in config and run `cf up`:

 ### cf down

-Stop services.
+Stop stacks.

 ```bash
-cf down [OPTIONS] [SERVICES]...
+cf down [OPTIONS] [STACKS]...
 ```

 **Options:**

 | Option | Description |
 |--------|-------------|
-| `--all, -a` | Stop all services |
-| `--orphaned` | Stop orphaned services only |
-| `--host, -H TEXT` | Filter to services on this host |
+| `--all, -a` | Stop all stacks |
+| `--orphaned` | Stop orphaned stacks only |
+| `--host, -H TEXT` | Filter to stacks on this host |
 | `--config, -c PATH` | Path to config file |

 **Examples:**

 ```bash
-# Stop specific services
+# Stop specific stacks
 cf down plex

-# Stop all services
+# Stop all stacks
 cf down --all

-# Stop services removed from config
+# Stop stacks removed from config
 cf down --orphaned

-# Stop all services on a host
+# Stop all stacks on a host
 cf down --all --host nuc
 ```

 ---

+### cf stop
+
+Stop services without removing containers.
+
+```bash
+cf stop [OPTIONS] [STACKS]...
+```
+
+**Options:**
+
+| Option | Description |
+|--------|-------------|
+| `--all, -a` | Stop all stacks |
+| `--service, -s TEXT` | Target a specific service within the stack |
+| `--config, -c PATH` | Path to config file |
+
+**Examples:**
+
+```bash
+# Stop specific stacks
+cf stop plex
+
+# Stop all stacks
+cf stop --all
+
+# Stop a specific service within a stack
+cf stop immich --service database
+```
+
+---
+
 ### cf restart

-Restart services (down + up).
+Restart stacks (down + up). With `--service`, restarts just that service.

 ```bash
-cf restart [OPTIONS] [SERVICES]...
+cf restart [OPTIONS] [STACKS]...
 ```

 **Options:**

 | Option | Description |
 |--------|-------------|
-| `--all, -a` | Restart all services |
+| `--all, -a` | Restart all stacks |
+| `--service, -s TEXT` | Target a specific service within the stack |
 | `--config, -c PATH` | Path to config file |

 **Examples:**
@@ -178,37 +216,44 @@ cf restart [OPTIONS] [SERVICES]...
 ```bash
 cf restart plex
 cf restart --all
+
+# Restart a specific service
+cf restart immich --service database
 ```

 ---

 ### cf update

-Update services (pull + build + down + up).
+Update stacks (pull + build + down + up). With `--service`, updates just that service.

 <video autoplay loop muted playsinline>
-  <source src="assets/update.webm" type="video/webm">
+  <source src="/assets/update.webm" type="video/webm">
 </video>

 ```bash
-cf update [OPTIONS] [SERVICES]...
+cf update [OPTIONS] [STACKS]...
 ```

 **Options:**

 | Option | Description |
 |--------|-------------|
-| `--all, -a` | Update all services |
+| `--all, -a` | Update all stacks |
+| `--service, -s TEXT` | Target a specific service within the stack |
 | `--config, -c PATH` | Path to config file |

 **Examples:**

 ```bash
-# Update specific service
+# Update specific stack
 cf update plex

-# Update all services
+# Update all stacks
 cf update --all
+
+# Update a specific service
+cf update immich --service database
 ```

 ---
@@ -218,14 +263,15 @@ cf update --all
 Pull latest images.

 ```bash
-cf pull [OPTIONS] [SERVICES]...
+cf pull [OPTIONS] [STACKS]...
 ```

 **Options:**

 | Option | Description |
 |--------|-------------|
-| `--all, -a` | Pull for all services |
+| `--all, -a` | Pull for all stacks |
+| `--service, -s TEXT` | Target a specific service within the stack |
 | `--config, -c PATH` | Path to config file |

 **Examples:**
@@ -233,6 +279,60 @@ cf pull [OPTIONS] [SERVICES]...
 ```bash
 cf pull plex
 cf pull --all
+
+# Pull a specific service
+cf pull immich --service database
+```
+
+---
+
+### cf compose
+
+Run any docker compose command on a stack. This is a passthrough to docker compose for commands not wrapped by cf.
+
+<video autoplay loop muted playsinline>
+  <source src="/assets/compose.webm" type="video/webm">
+</video>
+
+```bash
+cf compose [OPTIONS] STACK COMMAND [ARGS]...
+```
+
+**Arguments:**
+
+| Argument | Description |
+|----------|-------------|
+| `STACK` | Stack to operate on (use `.` for current dir) |
+| `COMMAND` | Docker compose command to run |
+| `ARGS` | Additional arguments passed to docker compose |
+
+**Options:**
+
+| Option | Description |
+|--------|-------------|
+| `--host, -H TEXT` | Filter to stacks on this host (required for multi-host stacks) |
+| `--config, -c PATH` | Path to config file |
+
+**Examples:**
+
+```bash
+# Show docker compose help
+cf compose mystack --help
+
+# View running processes
+cf compose mystack top
+
+# List images
+cf compose mystack images
+
+# Interactive shell
+cf compose mystack exec web bash
+
+# View parsed config
+cf compose mystack config
+
+# Use current directory as stack
+cf compose . ps
 ```

 ---
@@ -241,53 +341,58 @@ cf pull --all

 ### cf ps

-Show status of services.
+Show status of stacks.

 ```bash
-cf ps [OPTIONS] [SERVICES]...
+cf ps [OPTIONS] [STACKS]...
 ```

 **Options:**

 | Option | Description |
 |--------|-------------|
-| `--all, -a` | Show all services (default) |
-| `--host, -H TEXT` | Filter to services on this host |
+| `--all, -a` | Show all stacks (default) |
+| `--host, -H TEXT` | Filter to stacks on this host |
+| `--service, -s TEXT` | Target a specific service within the stack |
 | `--config, -c PATH` | Path to config file |

 **Examples:**

 ```bash
-# Show all services
+# Show all stacks
 cf ps

-# Show specific services
-cf ps plex sonarr
+# Show specific stacks
+cf ps plex grafana

 # Filter by host
 cf ps --host nuc
+
+# Show status of a specific service
+cf ps immich --service database
 ```

 ---

 ### cf logs

-Show service logs.
+Show stack logs.

 <video autoplay loop muted playsinline>
-  <source src="assets/logs.webm" type="video/webm">
+  <source src="/assets/logs.webm" type="video/webm">
 </video>

 ```bash
-cf logs [OPTIONS] [SERVICES]...
+cf logs [OPTIONS] [STACKS]...
 ```

 **Options:**

 | Option | Description |
 |--------|-------------|
-| `--all, -a` | Show logs for all services |
-| `--host, -H TEXT` | Filter to services on this host |
+| `--all, -a` | Show logs for all stacks |
+| `--host, -H TEXT` | Filter to stacks on this host |
+| `--service, -s TEXT` | Target a specific service within the stack |
 | `--follow, -f` | Follow logs (live stream) |
 | `--tail, -n INTEGER` | Number of lines (default: 20 for --all, 100 otherwise) |
 | `--config, -c PATH` | Path to config file |
@@ -301,11 +406,14 @@ cf logs plex
 # Follow logs
 cf logs -f plex

-# Show last 50 lines of multiple services
-cf logs -n 50 plex sonarr
+# Show last 50 lines of multiple stacks
+cf logs -n 50 plex grafana

-# Show last 20 lines of all services
+# Show last 20 lines of all stacks
 cf logs --all
+
+# Show logs for a specific service
+cf logs immich --service database
 ```

 ---
@@ -344,7 +452,7 @@ cf stats --live
 Validate configuration, mounts, and networks.

 ```bash
-cf check [OPTIONS] [SERVICES]...
+cf check [OPTIONS] [STACKS]...
 ```

 **Options:**
@@ -363,7 +471,7 @@ cf check
 # Fast local-only validation
 cf check --local

-# Check specific service and show host compatibility
+# Check specific stack and show host compatibility
 cf check jellyfin
 ```

@@ -371,28 +479,34 @@ cf check jellyfin

 ### cf refresh

-Update local state from running services.
+Update local state from running stacks.

 ```bash
-cf refresh [OPTIONS]
+cf refresh [OPTIONS] [STACKS]...
 ```

 **Options:**

 | Option | Description |
 |--------|-------------|
+| `--all, -a` | Refresh all stacks |
 | `--dry-run, -n` | Show what would change |
 | `--log-path, -l PATH` | Path to Dockerfarm TOML log |
 | `--config, -c PATH` | Path to config file |

+Without arguments, refreshes all stacks (same as `--all`). With stack names, refreshes only those stacks.
+
 **Examples:**

 ```bash
-# Sync state with reality
+# Sync state with reality (all stacks)
 cf refresh

 # Preview changes
 cf refresh --dry-run
+
+# Refresh specific stacks only
+cf refresh plex sonarr
 ```

 ---
@@ -434,14 +548,14 @@ cf init-network -n production -s 10.0.0.0/16 -g 10.0.0.1
 Generate Traefik file-provider config from compose labels.

 ```bash
-cf traefik-file [OPTIONS] [SERVICES]...
+cf traefik-file [OPTIONS] [STACKS]...
 ```

 **Options:**

 | Option | Description |
 |--------|-------------|
-| `--all, -a` | Generate for all services |
+| `--all, -a` | Generate for all stacks |
 | `--output, -o PATH` | Output file (stdout if omitted) |
 | `--config, -c PATH` | Path to config file |

@@ -454,7 +568,7 @@ cf traefik-file --all
 # Write to file
 cf traefik-file --all -o /opt/traefik/dynamic.d/cf.yml

-# Specific services
+# Specific stacks
 cf traefik-file plex jellyfin -o /opt/traefik/cf.yml
 ```

@@ -539,7 +653,20 @@ cf ssh COMMAND
 | `status` | Show SSH key status and host connectivity |
 | `keygen` | Generate key without distributing |

-**Options for `cf ssh setup` and `cf ssh keygen`:**
+**Options for `cf ssh setup`:**
+
+| Option | Description |
+|--------|-------------|
+| `--config, -c PATH` | Path to config file |
+| `--force, -f` | Regenerate key even if it exists |
+
+**Options for `cf ssh status`:**
+
+| Option | Description |
+|--------|-------------|
+| `--config, -c PATH` | Path to config file |
+
+**Options for `cf ssh keygen`:**

 | Option | Description |
 |--------|-------------|
@@ -604,7 +731,7 @@ cf web --reload
 cf ps
 cf stats --live

-# Update a specific service
+# Update a specific stack
 cf update plex

 # View logs
@@ -614,7 +741,7 @@ cf logs -f plex
 ### Maintenance

 ```bash
-# Update all services
+# Update all stacks
 cf update --all

 # Refresh state after manual changes
@@ -627,7 +754,7 @@ cf refresh
 # Preview what would change
 cf apply --dry-run

-# Move a service: edit config, then
+# Move a stack: edit config, then
 cf up plex  # auto-migrates

 # Or reconcile everything
@@ -641,7 +768,7 @@ cf apply
 cf check --local
 cf check

-# Check specific service
+# Check specific stack
 cf check jellyfin

 # Sync state
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -4,7 +4,7 @@ icon: lucide/settings

 # Configuration Reference

-Compose Farm uses a YAML configuration file to define hosts and service assignments.
+Compose Farm uses a YAML configuration file to define hosts and stack assignments.

 ## Config File Location

@@ -27,15 +27,34 @@ Or set the environment variable:
 export CF_CONFIG=/path/to/config.yaml
 ```

-## Full Example
+## Examples
+
+### Single host (local-only)

 ```yaml
 # Required: directory containing compose files
+compose_dir: /opt/stacks
+
+# Define local host
+hosts:
+  local: localhost
+
+# Map stacks to the local host
+stacks:
+  plex: local
+  grafana: local
+  nextcloud: local
+```
+
+### Multi-host (full example)
+
+```yaml
+# Required: directory containing compose files (same path on all hosts)
 compose_dir: /opt/compose

 # Optional: auto-regenerate Traefik config
 traefik_file: /opt/traefik/dynamic.d/compose-farm.yml
-traefik_service: traefik
+traefik_stack: traefik

 # Define Docker hosts
 hosts:
@@ -45,17 +64,15 @@ hosts:
  hp:
    address: 192.168.1.11
    user: admin
-  local: localhost

-# Map services to hosts
-services:
-  # Single-host services
+# Map stacks to hosts
+stacks:
+  # Single-host stacks
  plex: nuc
-  sonarr: nuc
-  radarr: hp
-  jellyfin: local
+  grafana: nuc
+  nextcloud: hp

-  # Multi-host services
+  # Multi-host stacks
  dozzle: all                    # Run on ALL hosts
  node-exporter: [nuc, hp]       # Run on specific hosts
 ```
@@ -64,7 +81,7 @@ services:

 ### compose_dir (required)

-Directory containing your compose service folders. Must be the same path on all hosts.
+Directory containing your compose stack folders. Must be the same path on all hosts.

 ```yaml
 compose_dir: /opt/compose
@@ -77,7 +94,7 @@ compose_dir: /opt/compose
 ├── plex/
 │   ├── docker-compose.yml    # or compose.yaml
 │   └── .env                  # optional environment file
-├── sonarr/
+├── grafana/
 │   └── docker-compose.yml
 └── ...
 ```
@@ -96,12 +113,12 @@ Path to auto-generated Traefik file-provider config. When set, Compose Farm rege
 traefik_file: /opt/traefik/dynamic.d/compose-farm.yml
 ```

-### traefik_service
+### traefik_stack

-Service name running Traefik. Services on the same host are skipped in file-provider config (Traefik's docker provider handles them).
+Stack name running Traefik. Stacks on the same host are skipped in file-provider config (Traefik's docker provider handles them).

 ```yaml
-traefik_service: traefik
+traefik_stack: traefik
 ```

 ## Hosts Configuration
@@ -137,14 +154,14 @@ hosts:

 ### Localhost

-For services running on the same machine where you invoke Compose Farm:
+For stacks running on the same machine where you invoke Compose Farm:

 ```yaml
 hosts:
  local: localhost
 ```

-No SSH is used for localhost services.
+No SSH is used for localhost stacks.

 ### Multiple Hosts

@@ -161,23 +178,23 @@ hosts:
  local: localhost
 ```

-## Services Configuration
+## Stacks Configuration

-### Single-Host Service
+### Single-Host Stack

 ```yaml
-services:
+stacks:
  plex: nuc
-  sonarr: nuc
-  radarr: hp
+  grafana: nuc
+  nextcloud: hp
 ```

-### Multi-Host Service
+### Multi-Host Stack

-For services that need to run on every host (e.g., log shippers, monitoring agents):
+For stacks that need to run on every host (e.g., log shippers, monitoring agents):

 ```yaml
-services:
+stacks:
  # Run on ALL configured hosts
  dozzle: all
  promtail: all
@@ -186,19 +203,19 @@ services:
  node-exporter: [nuc, hp, truenas]
 ```

-**Common multi-host services:**
+**Common multi-host stacks:**
 - **Dozzle** - Docker log viewer (needs local socket)
 - **Promtail/Alloy** - Log shipping (needs local socket)
 - **node-exporter** - Host metrics (needs /proc, /sys)
 - **AutoKuma** - Uptime Kuma monitors (needs local socket)

-### Service Names
+### Stack Names

-Service names must match directory names in `compose_dir`:
+Stack names must match directory names in `compose_dir`:

 ```yaml
 compose_dir: /opt/compose
-services:
+stacks:
  plex: nuc      # expects /opt/compose/plex/docker-compose.yml
  my-app: hp     # expects /opt/compose/my-app/docker-compose.yml
 ```
@@ -212,10 +229,10 @@ For example, if your config is at `~/.config/compose-farm/compose-farm.yaml`, th
 ```yaml
 deployed:
  plex: nuc
-  sonarr: nuc
+  grafana: nuc
 ```

-This file records which services are deployed and on which host.
+This file records which stacks are deployed and on which host.

 **Don't edit manually.** Use `cf refresh` to sync state with reality.

@@ -237,7 +254,7 @@ Compose Farm runs `docker compose` which handles `.env` automatically.

 When generating Traefik config, Compose Farm resolves `${VAR}` and `${VAR:-default}` from:

-1. The service's `.env` file
+1. The stack's `.env` file
 2. Current environment

 ## Config Commands
@@ -303,7 +320,7 @@ cf check --local

 Checks:
 - Config syntax
- Service-to-host mappings
+- Stack-to-host mappings
 - Compose file existence

 ### Full Validation
@@ -318,13 +335,13 @@ Additional SSH-based checks:
 - Docker network existence
 - Traefik label validation

-### Service-Specific Check
+### Stack-Specific Check

 ```bash
 cf check jellyfin
 ```

-Shows which hosts can run the service (have required mounts/networks).
+Shows which hosts can run the stack (have required mounts/networks).

 ## Example Configurations

@@ -336,7 +353,7 @@ compose_dir: /opt/compose
 hosts:
  server: 192.168.1.10

-services:
+stacks:
  myapp: server
 ```

@@ -353,11 +370,11 @@ hosts:
    address: 192.168.1.100
    user: admin

-services:
+stacks:
  # Media
  plex: nuc
-  sonarr: nuc
-  radarr: nuc
+  jellyfin: nuc
+  immich: nuc

  # Infrastructure
  traefik: nuc
@@ -371,9 +388,8 @@ services:

 ```yaml
 compose_dir: /opt/compose
-network: production
 traefik_file: /opt/traefik/dynamic.d/cf.yml
-traefik_service: traefik
+traefik_stack: traefik

 hosts:
  web-1:
@@ -386,7 +402,7 @@ hosts:
    address: 10.0.1.20
    user: deploy

-services:
+stacks:
  # Load balanced
  api: [web-1, web-2]

--- a/docs/demos/README.md
+++ b/docs/demos/README.md
@@ -1,26 +1,17 @@
-# Terminal Demos
+# Demo Recordings

-[VHS](https://github.com/charmbracelet/vhs) tape files for recording terminal demos.
+Demo recording infrastructure for Compose Farm documentation.

-## Demos
+## Structure

-| File | Shows |
-|------|-------|
-| `install.tape` | Installing with `uv tool install` |
-| `quickstart.tape` | `cf ps`, `cf up`, `cf logs` |
-| `logs.tape` | Viewing logs |
-| `update.tape` | `cf update` |
-| `migration.tape` | Service migration |
-| `apply.tape` | `cf apply` |
-
-## Recording
-
-```bash
-# Record all demos (outputs to docs/assets/)
-./docs/demos/record.sh
-
-# Single demo
-cd /opt/stacks && vhs /path/to/docs/demos/quickstart.tape
+```
+docs/demos/
+├── cli/        # VHS-based CLI terminal recordings
+└── web/        # Playwright-based web UI recordings
 ```

-Output files (GIF + WebM) are tracked with Git LFS.
+## Output
+
+All recordings output to `docs/assets/` as WebM (primary) and GIF (fallback).
+
+See subdirectory READMEs for usage.
--- a/docs/demos/cli/README.md
+++ b/docs/demos/cli/README.md
@@ -0,0 +1,33 @@
+# CLI Demo Recordings
+
+VHS-based terminal demo recordings for Compose Farm CLI.
+
+## Requirements
+
+- [VHS](https://github.com/charmbracelet/vhs): `go install github.com/charmbracelet/vhs@latest`
+
+## Usage
+
+```bash
+# Record all demos
+python docs/demos/cli/record.py
+
+# Record specific demos
+python docs/demos/cli/record.py quickstart migration
+```
+
+## Demos
+
+| Tape | Description |
+|------|-------------|
+| `install.tape` | Installing with `uv tool install` |
+| `quickstart.tape` | `cf ps`, `cf up`, `cf logs` |
+| `logs.tape` | Viewing logs |
+| `compose.tape` | `cf compose` passthrough (--help, images, exec) |
+| `update.tape` | `cf update` |
+| `migration.tape` | Service migration |
+| `apply.tape` | `cf apply` |
+
+## Output
+
+GIF and WebM files saved to `docs/assets/`.
--- a/docs/demos/cli/apply.tape
+++ b/docs/demos/cli/apply.tape
--- a/docs/demos/cli/compose.tape
+++ b/docs/demos/cli/compose.tape
@@ -0,0 +1,50 @@
+# Compose Demo
+# Shows that cf compose passes through ANY docker compose command
+
+Output docs/assets/compose.gif
+Output docs/assets/compose.webm
+
+Set Shell "bash"
+Set FontSize 14
+Set Width 900
+Set Height 550
+Set Theme "Catppuccin Mocha"
+Set TypingSpeed 50ms
+
+Type "# cf compose runs ANY docker compose command on the right host"
+Enter
+Sleep 500ms
+
+Type "# See ALL available compose commands"
+Enter
+Sleep 500ms
+
+Type "cf compose immich --help"
+Enter
+Sleep 4s
+
+Type "# Show images"
+Enter
+Sleep 500ms
+
+Type "cf compose immich images"
+Enter
+Wait+Screen /immich/
+Sleep 2s
+
+Type "# Open shell in a container"
+Enter
+Sleep 500ms
+
+Type "cf compose immich exec immich-machine-learning sh"
+Enter
+Wait+Screen /#/
+Sleep 1s
+
+Type "python3 --version"
+Enter
+Sleep 1s
+
+Type "exit"
+Enter
+Sleep 500ms
--- a/docs/demos/cli/install.tape
+++ b/docs/demos/cli/install.tape
--- a/docs/demos/cli/logs.tape
+++ b/docs/demos/cli/logs.tape
@@ -1,5 +1,5 @@
 # Logs Demo
-# Shows viewing service logs
+# Shows viewing stack logs

 Output docs/assets/logs.gif
 Output docs/assets/logs.webm
--- a/docs/demos/cli/migration.tape
+++ b/docs/demos/cli/migration.tape
@@ -1,5 +1,5 @@
 # Migration Demo
-# Shows automatic service migration when host changes
+# Shows automatic stack migration when host changes

 Output docs/assets/migration.gif
 Output docs/assets/migration.webm
@@ -25,7 +25,7 @@ Sleep 1s

 Type "nvim /opt/stacks/compose-farm.yaml"
 Enter
-Wait+Screen /services:/
+Wait+Screen /stacks:/

 # Search for audiobookshelf
 Type "/audiobookshelf"
--- a/docs/demos/cli/quickstart.tape
+++ b/docs/demos/cli/quickstart.tape
@@ -13,7 +13,7 @@ Set FontFamily "FiraCode Nerd Font"
 Set TypingSpeed 50ms
 Env BAT_PAGING "always"

-Type "# Config is just: service  host"
+Type "# Config is just: stack  host"
 Enter
 Sleep 500ms

@@ -21,23 +21,23 @@ Type "# First, define your hosts..."
 Enter
 Sleep 500ms

-Type "bat -r 1:11 compose-farm.yaml"
+Type "bat -r 1:16 compose-farm.yaml"
 Enter
 Sleep 3s
 Type "q"
 Sleep 500ms

-Type "# Then map each service to a host"
+Type "# Then map each stack to a host"
 Enter
 Sleep 500ms

-Type "bat -r 13:30 compose-farm.yaml"
+Type "bat -r 17:35 compose-farm.yaml"
 Enter
 Sleep 3s
 Type "q"
 Sleep 500ms

-Type "# Check service status"
+Type "# Check stack status"
 Enter
 Sleep 500ms

@@ -45,7 +45,7 @@ Type "cf ps immich"
 Enter
 Wait+Screen /PORTS/

-Type "# Start a service"
+Type "# Start a stack"
 Enter
 Sleep 500ms

--- a/docs/demos/cli/record.py
+++ b/docs/demos/cli/record.py
@@ -0,0 +1,134 @@
+#!/usr/bin/env python3
+"""Record CLI demos using VHS."""
+
+import shutil
+import subprocess
+import sys
+from pathlib import Path
+
+from rich.console import Console
+
+from compose_farm.config import load_config
+from compose_farm.state import load_state
+
+console = Console()
+SCRIPT_DIR = Path(__file__).parent
+STACKS_DIR = Path("/opt/stacks")
+CONFIG_FILE = STACKS_DIR / "compose-farm.yaml"
+OUTPUT_DIR = SCRIPT_DIR.parent.parent / "assets"
+
+DEMOS = ["install", "quickstart", "logs", "compose", "update", "migration", "apply"]
+
+
+def _run(cmd: list[str], **kw) -> bool:
+    return subprocess.run(cmd, check=False, **kw).returncode == 0
+
+
+def _set_config(host: str) -> None:
+    """Set audiobookshelf host in config file."""
+    _run(["sed", "-i", f"s/audiobookshelf: .*/audiobookshelf: {host}/", str(CONFIG_FILE)])
+
+
+def _get_hosts() -> tuple[str | None, str | None]:
+    """Return (config_host, state_host) for audiobookshelf."""
+    config = load_config()
+    state = load_state(config)
+    return config.stacks.get("audiobookshelf"), state.get("audiobookshelf")
+
+
+def _setup_state(demo: str) -> bool:
+    """Set up required state for demo. Returns False on failure."""
+    if demo not in ("migration", "apply"):
+        return True
+
+    config_host, state_host = _get_hosts()
+
+    if demo == "migration":
+        # Migration needs audiobookshelf on nas in BOTH config and state
+        if config_host != "nas":
+            console.print("[yellow]Setting up: config → nas[/yellow]")
+            _set_config("nas")
+        if state_host != "nas":
+            console.print("[yellow]Setting up: state → nas[/yellow]")
+            if not _run(["cf", "apply"], cwd=STACKS_DIR):
+                return False
+
+    elif demo == "apply":
+        # Apply needs config=nas, state=anton (so there's something to apply)
+        if config_host != "nas":
+            console.print("[yellow]Setting up: config → nas[/yellow]")
+            _set_config("nas")
+        if state_host == "nas":
+            console.print("[yellow]Setting up: state → anton[/yellow]")
+            _set_config("anton")
+            if not _run(["cf", "apply"], cwd=STACKS_DIR):
+                return False
+            _set_config("nas")
+
+    return True
+
+
+def _record(name: str, index: int, total: int) -> bool:
+    """Record a single demo."""
+    console.print(f"[cyan][{index}/{total}][/cyan] [green]Recording:[/green] {name}")
+    if _run(["vhs", str(SCRIPT_DIR / f"{name}.tape")], cwd=STACKS_DIR):
+        console.print("[green]  ✓ Done[/green]")
+        return True
+    console.print("[red]  ✗ Failed[/red]")
+    return False
+
+
+def _reset_after(demo: str, next_demo: str | None) -> None:
+    """Reset state after demos that modify audiobookshelf."""
+    if demo not in ("quickstart", "migration"):
+        return
+    _set_config("nas")
+    if next_demo != "apply":  # Let apply demo show the migration
+        _run(["cf", "apply"], cwd=STACKS_DIR)
+
+
+def _restore_config(original: str) -> None:
+    """Restore original config and sync state."""
+    console.print("[yellow]Restoring original config...[/yellow]")
+    CONFIG_FILE.write_text(original)
+    _run(["cf", "apply"], cwd=STACKS_DIR)
+
+
+def _main() -> int:
+    if not shutil.which("vhs"):
+        console.print("[red]VHS not found. Install: brew install vhs[/red]")
+        return 1
+
+    if not _run(["git", "-C", str(STACKS_DIR), "diff", "--quiet", "compose-farm.yaml"]):
+        console.print("[red]compose-farm.yaml has uncommitted changes[/red]")
+        return 1
+
+    demos = [d for d in sys.argv[1:] if d in DEMOS] or DEMOS
+    if sys.argv[1:] and not demos:
+        console.print(f"[red]Unknown demo. Available: {', '.join(DEMOS)}[/red]")
+        return 1
+
+    # Save original config to restore after recording
+    original_config = CONFIG_FILE.read_text()
+
+    try:
+        for i, demo in enumerate(demos, 1):
+            if not _setup_state(demo):
+                return 1
+            if not _record(demo, i, len(demos)):
+                return 1
+            _reset_after(demo, demos[i] if i < len(demos) else None)
+    finally:
+        _restore_config(original_config)
+
+    # Move outputs
+    OUTPUT_DIR.mkdir(exist_ok=True)
+    for f in (STACKS_DIR / "docs/assets").glob("*.[gw]*"):
+        shutil.move(str(f), str(OUTPUT_DIR / f.name))
+
+    console.print(f"\n[green]Done![/green] Saved to {OUTPUT_DIR}")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(_main())
--- a/docs/demos/cli/update.tape
+++ b/docs/demos/cli/update.tape
@@ -1,5 +1,5 @@
 # Update Demo
-# Shows updating services (pull + down + up)
+# Shows updating stacks (pull + build + down + up)

 Output docs/assets/update.gif
 Output docs/assets/update.webm
@@ -11,7 +11,7 @@ Set Height 500
 Set Theme "Catppuccin Mocha"
 Set TypingSpeed 50ms

-Type "# Update a single service"
+Type "# Update a single stack"
 Enter
 Sleep 500ms

--- a/docs/demos/record.sh
+++ b/docs/demos/record.sh
@@ -1,88 +0,0 @@
-#!/usr/bin/env bash
-# Record all VHS demos
-# Run this on a Docker host with compose-farm configured
-
-set -euo pipefail
-
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-DOCS_DIR="$(dirname "$SCRIPT_DIR")"
-REPO_DIR="$(dirname "$DOCS_DIR")"
-OUTPUT_DIR="$DOCS_DIR/assets"
-
-# Colors
-GREEN='\033[0;32m'
-BLUE='\033[0;34m'
-YELLOW='\033[0;33m'
-RED='\033[0;31m'
-NC='\033[0m' # No Color
-
-# Check for VHS
-if ! command -v vhs &> /dev/null; then
-    echo "VHS not found. Install with:"
-    echo "  brew install vhs"
-    echo "  # or"
-    echo "  go install github.com/charmbracelet/vhs@latest"
-    exit 1
-fi
-
-# Ensure output directory exists
-mkdir -p "$OUTPUT_DIR"
-
-# Temp output dir (VHS runs from /opt/stacks, so relative paths go here)
-TEMP_OUTPUT="/opt/stacks/docs/assets"
-mkdir -p "$TEMP_OUTPUT"
-
-# Change to /opt/stacks so cf commands use installed version (not editable install)
-cd /opt/stacks
-
-# Ensure compose-farm.yaml has no uncommitted changes (safety check)
-if ! git diff --quiet compose-farm.yaml; then
-    echo -e "${RED}Error: compose-farm.yaml has uncommitted changes${NC}"
-    echo "Commit or stash your changes before recording demos"
-    exit 1
-fi
-
-echo -e "${BLUE}Recording VHS demos...${NC}"
-echo "Output directory: $OUTPUT_DIR"
-echo ""
-
-# Function to record a tape
-record_tape() {
-    local tape=$1
-    local name=$(basename "$tape" .tape)
-    echo -e "${GREEN}Recording:${NC} $name"
-    if vhs "$tape"; then
-        echo -e "${GREEN}  ✓ Done${NC}"
-    else
-        echo -e "${RED}  ✗ Failed${NC}"
-        return 1
-    fi
-}
-
-# Record demos in logical order
-echo -e "${YELLOW}=== Phase 1: Basic demos ===${NC}"
-record_tape "$SCRIPT_DIR/install.tape"
-record_tape "$SCRIPT_DIR/quickstart.tape"
-record_tape "$SCRIPT_DIR/logs.tape"
-
-echo -e "${YELLOW}=== Phase 2: Update demo ===${NC}"
-record_tape "$SCRIPT_DIR/update.tape"
-
-echo -e "${YELLOW}=== Phase 3: Migration demo ===${NC}"
-record_tape "$SCRIPT_DIR/migration.tape"
-git -C /opt/stacks checkout compose-farm.yaml  # Reset after migration
-
-echo -e "${YELLOW}=== Phase 4: Apply demo ===${NC}"
-record_tape "$SCRIPT_DIR/apply.tape"
-
-# Move GIFs and WebMs from temp location to repo
-echo ""
-echo -e "${BLUE}Moving recordings to repo...${NC}"
-mv "$TEMP_OUTPUT"/*.gif "$OUTPUT_DIR/" 2>/dev/null || true
-mv "$TEMP_OUTPUT"/*.webm "$OUTPUT_DIR/" 2>/dev/null || true
-rmdir "$TEMP_OUTPUT" 2>/dev/null || true
-rmdir "$(dirname "$TEMP_OUTPUT")" 2>/dev/null || true
-
-echo ""
-echo -e "${GREEN}Done!${NC} Recordings saved to $OUTPUT_DIR/"
-ls -la "$OUTPUT_DIR"/*.gif "$OUTPUT_DIR"/*.webm 2>/dev/null || echo "No recordings found (check for errors above)"
--- a/docs/demos/web/README.md
+++ b/docs/demos/web/README.md
@@ -0,0 +1,45 @@
+# Web UI Demo Recordings
+
+Playwright-based demo recording for Compose Farm web UI.
+
+## Requirements
+
+- Chromium: `playwright install chromium`
+- ffmpeg: `apt install ffmpeg` or `brew install ffmpeg`
+
+## Usage
+
+```bash
+# Record all demos
+python docs/demos/web/record.py
+
+# Record specific demo
+python docs/demos/web/record.py navigation
+```
+
+## Demos
+
+| Demo | Description |
+|------|-------------|
+| `navigation` | Command palette fuzzy search and navigation |
+| `stack` | Stack restart/logs via command palette |
+| `themes` | Theme switching with arrow key preview |
+| `workflow` | Full workflow: filter, navigate, logs, themes |
+| `console` | Console terminal running cf commands |
+| `shell` | Container shell exec with top |
+
+## Output
+
+WebM and GIF files saved to `docs/assets/web-{demo}.{webm,gif}`.
+
+## Files
+
+- `record.py` - Orchestration script
+- `conftest.py` - Playwright fixtures, helper functions
+- `demo_*.py` - Individual demo scripts
+
+## Notes
+
+- Uses real config at `/opt/stacks/compose-farm.yaml`
+- Adjust `pause(page, ms)` calls to control timing
+- Viewport: 1280x720
--- a/docs/demos/web/init.py
+++ b/docs/demos/web/init.py
@@ -0,0 +1 @@
+"""Web UI demo recording scripts."""
--- a/docs/demos/web/conftest.py
+++ b/docs/demos/web/conftest.py
@@ -0,0 +1,302 @@
+"""Shared fixtures for web UI demo recordings.
+
+Based on tests/web/test_htmx_browser.py patterns for consistency.
+"""
+
+from __future__ import annotations
+
+import os
+import re
+import shutil
+import socket
+import threading
+import time
+import urllib.request
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
+from unittest.mock import patch
+
+import pytest
+import uvicorn
+
+from compose_farm.config import Config as CFConfig
+from compose_farm.config import load_config
+from compose_farm.executor import (
+    get_container_compose_labels as _original_get_compose_labels,
+)
+from compose_farm.glances import ContainerStats
+from compose_farm.glances import fetch_container_stats as _original_fetch_container_stats
+from compose_farm.state import load_state as _original_load_state
+from compose_farm.web.cdn import CDN_ASSETS, ensure_vendor_cache
+
+# NOTE: Do NOT import create_app here - it must be imported AFTER patches are applied
+# to ensure the patched get_config is used by all route modules
+
+if TYPE_CHECKING:
+    from collections.abc import Generator
+
+    from playwright.sync_api import BrowserContext, Page, Route
+
+# Substrings to exclude from demo recordings (case-insensitive)
+DEMO_EXCLUDE_PATTERNS = {"arr", "vpn", "tash"}
+
+
+def _should_exclude(name: str) -> bool:
+    """Check if a stack/container name should be excluded from demo."""
+    name_lower = name.lower()
+    return any(pattern in name_lower for pattern in DEMO_EXCLUDE_PATTERNS)
+
+
+def _get_filtered_config() -> CFConfig:
+    """Load config but filter out excluded stacks."""
+    config = load_config()
+    filtered_stacks = {
+        name: host for name, host in config.stacks.items() if not _should_exclude(name)
+    }
+    return CFConfig(
+        compose_dir=config.compose_dir,
+        hosts=config.hosts,
+        stacks=filtered_stacks,
+        traefik_file=config.traefik_file,
+        traefik_stack=config.traefik_stack,
+        glances_stack=config.glances_stack,
+        config_path=config.config_path,
+    )
+
+
+def _get_filtered_state(config: CFConfig) -> dict[str, str | list[str]]:
+    """Load state but filter out excluded stacks."""
+    state = _original_load_state(config)
+    return {name: host for name, host in state.items() if not _should_exclude(name)}
+
+
+async def _filtered_fetch_container_stats(
+    host_name: str,
+    host_address: str,
+    port: int = 61208,
+    request_timeout: float = 10.0,
+) -> tuple[list[ContainerStats] | None, str | None]:
+    """Fetch container stats but filter out excluded containers."""
+    containers, error = await _original_fetch_container_stats(
+        host_name, host_address, port, request_timeout
+    )
+    if containers:
+        # Filter by container name (stack is empty at this point)
+        containers = [c for c in containers if not _should_exclude(c.name)]
+    return containers, error
+
+
+async def _filtered_get_compose_labels(
+    config: CFConfig,
+    host_name: str,
+) -> dict[str, tuple[str, str]]:
+    """Get compose labels but filter out excluded stacks."""
+    labels = await _original_get_compose_labels(config, host_name)
+    # Filter out containers whose stack (project) name should be excluded
+    return {
+        name: (stack, service)
+        for name, (stack, service) in labels.items()
+        if not _should_exclude(stack)
+    }
+
+
+@pytest.fixture(scope="session")
+def vendor_cache(request: pytest.FixtureRequest) -> Path:
+    """Download CDN assets once and cache to disk for faster recordings."""
+    cache_dir = Path(str(request.config.rootdir)) / ".pytest_cache" / "vendor"
+    return ensure_vendor_cache(cache_dir)
+
+
+@pytest.fixture(scope="session")
+def browser_type_launch_args() -> dict[str, str]:
+    """Configure Playwright to use system Chromium if available."""
+    for name in ["chromium", "chromium-browser", "google-chrome", "chrome"]:
+        path = shutil.which(name)
+        if path:
+            return {"executable_path": path}
+    return {}
+
+
+# Path to real compose-farm config
+REAL_CONFIG_PATH = Path("/opt/stacks/compose-farm.yaml")
+
+
+@pytest.fixture(scope="module")
+def server_url() -> Generator[str, None, None]:
+    """Start demo server using real config (with filtered stacks) and return URL."""
+    os.environ["CF_CONFIG"] = str(REAL_CONFIG_PATH)
+
+    # Patch at source module level so all callers get filtered versions
+    patches = [
+        # Patch load_config at source - get_config() calls this internally
+        patch("compose_farm.config.load_config", _get_filtered_config),
+        # Patch load_state at source and where imported
+        patch("compose_farm.state.load_state", _get_filtered_state),
+        patch("compose_farm.web.routes.pages.load_state", _get_filtered_state),
+        # Patch container fetch to filter out excluded containers (Live Stats page)
+        patch("compose_farm.glances.fetch_container_stats", _filtered_fetch_container_stats),
+        # Patch compose labels to filter out excluded stacks
+        patch("compose_farm.executor.get_container_compose_labels", _filtered_get_compose_labels),
+    ]
+
+    for p in patches:
+        p.start()
+
+    # Import create_app AFTER patches are started so route modules see patched get_config
+    from compose_farm.web.app import create_app  # noqa: PLC0415
+
+    with socket.socket() as s:
+        s.bind(("127.0.0.1", 0))
+        port = s.getsockname()[1]
+
+    app = create_app()
+    uvicorn_config = uvicorn.Config(app, host="127.0.0.1", port=port, log_level="error")
+    server = uvicorn.Server(uvicorn_config)
+
+    thread = threading.Thread(target=server.run, daemon=True)
+    thread.start()
+
+    url = f"http://127.0.0.1:{port}"
+    server_ready = False
+    for _ in range(50):
+        try:
+            urllib.request.urlopen(url, timeout=0.5)  # noqa: S310
+            server_ready = True
+            break
+        except Exception:
+            time.sleep(0.1)
+
+    if not server_ready:
+        msg = f"Demo server failed to start on {url}"
+        raise RuntimeError(msg)
+
+    yield url
+
+    server.should_exit = True
+    thread.join(timeout=2)
+    os.environ.pop("CF_CONFIG", None)
+
+    for p in patches:
+        p.stop()
+
+
+@pytest.fixture(scope="module")
+def recording_output_dir(tmp_path_factory: pytest.TempPathFactory) -> Path:
+    """Directory for video recordings."""
+    return Path(tmp_path_factory.mktemp("recordings"))
+
+
+@pytest.fixture
+def recording_context(
+    browser: Any,  # pytest-playwright's browser fixture
+    vendor_cache: Path,
+    recording_output_dir: Path,
+) -> Generator[BrowserContext, None, None]:
+    """Browser context with video recording enabled."""
+    context = browser.new_context(
+        viewport={"width": 1280, "height": 720},
+        record_video_dir=str(recording_output_dir),
+        record_video_size={"width": 1280, "height": 720},
+    )
+
+    # Set up CDN interception
+    cache = {url: (vendor_cache / f, ct) for url, (f, ct) in CDN_ASSETS.items()}
+
+    def handle_cdn(route: Route) -> None:
+        url = route.request.url
+        for url_prefix, (filepath, content_type) in cache.items():
+            if url.startswith(url_prefix):
+                route.fulfill(status=200, content_type=content_type, body=filepath.read_bytes())
+                return
+        print(f"UNCACHED CDN request: {url}")
+        route.abort("failed")
+
+    context.route(re.compile(r"https://(cdn\.jsdelivr\.net|unpkg\.com)/.*"), handle_cdn)
+
+    yield context
+    context.close()
+
+
+@pytest.fixture
+def recording_page(recording_context: BrowserContext) -> Generator[Page, None, None]:
+    """Page with recording and slow motion enabled."""
+    page = recording_context.new_page()
+    yield page
+    page.close()
+
+
+@pytest.fixture
+def wide_recording_context(
+    browser: Any,  # pytest-playwright's browser fixture
+    recording_output_dir: Path,
+) -> Generator[BrowserContext, None, None]:
+    """Browser context with wider viewport for demos needing more horizontal space.
+
+    NOTE: This fixture does NOT use CDN interception (unlike recording_context).
+    CDN interception was causing inline scripts from containers.html to be
+    removed from the DOM, likely due to Tailwind's browser plugin behavior.
+    """
+    context = browser.new_context(
+        viewport={"width": 1920, "height": 1080},
+        record_video_dir=str(recording_output_dir),
+        record_video_size={"width": 1920, "height": 1080},
+    )
+
+    yield context
+    context.close()
+
+
+@pytest.fixture
+def wide_recording_page(wide_recording_context: BrowserContext) -> Generator[Page, None, None]:
+    """Page with wider viewport for demos needing more horizontal space."""
+    page = wide_recording_context.new_page()
+    yield page
+    page.close()
+
+
+# Demo helper functions
+
+
+def pause(page: Page, ms: int = 500) -> None:
+    """Pause for visibility in recording."""
+    page.wait_for_timeout(ms)
+
+
+def slow_type(page: Page, selector: str, text: str, delay: int = 100) -> None:
+    """Type with visible delay between keystrokes."""
+    page.type(selector, text, delay=delay)
+
+
+def open_command_palette(page: Page) -> None:
+    """Open command palette with Ctrl+K."""
+    page.keyboard.press("Control+k")
+    page.wait_for_selector("#cmd-palette[open]", timeout=2000)
+    pause(page, 300)
+
+
+def close_command_palette(page: Page) -> None:
+    """Close command palette with Escape."""
+    page.keyboard.press("Escape")
+    page.wait_for_selector("#cmd-palette:not([open])", timeout=2000)
+    pause(page, 200)
+
+
+def wait_for_sidebar(page: Page) -> None:
+    """Wait for sidebar to load with stacks."""
+    page.wait_for_selector("#sidebar-stacks", timeout=5000)
+    pause(page, 300)
+
+
+def navigate_to_stack(page: Page, stack: str) -> None:
+    """Navigate to a stack page via sidebar click."""
+    page.locator("#sidebar-stacks a", has_text=stack).click()
+    page.wait_for_url(f"**/stack/{stack}", timeout=5000)
+    pause(page, 500)
+
+
+def select_command(page: Page, command: str) -> None:
+    """Filter and select a command from the palette."""
+    page.locator("#cmd-input").fill(command)
+    pause(page, 300)
+    page.keyboard.press("Enter")
+    pause(page, 200)
--- a/docs/demos/web/demo_console.py
+++ b/docs/demos/web/demo_console.py
@@ -0,0 +1,77 @@
+"""Demo: Console terminal.
+
+Records a ~30 second demo showing:
+- Navigating to Console page
+- Running cf commands in the terminal
+- Showing the Compose Farm config in Monaco editor
+
+Run: pytest docs/demos/web/demo_console.py -v --no-cov
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import pytest
+from conftest import (
+    pause,
+    slow_type,
+    wait_for_sidebar,
+)
+
+if TYPE_CHECKING:
+    from playwright.sync_api import Page
+
+
+@pytest.mark.browser  # type: ignore[misc]
+def test_demo_console(recording_page: Page, server_url: str) -> None:
+    """Record console terminal demo."""
+    page = recording_page
+
+    # Start on dashboard
+    page.goto(server_url)
+    wait_for_sidebar(page)
+    pause(page, 800)
+
+    # Navigate to Console page via sidebar menu
+    page.locator(".menu a", has_text="Console").click()
+    page.wait_for_url("**/console", timeout=5000)
+    pause(page, 1000)
+
+    # Wait for terminal to be ready (auto-connects)
+    page.wait_for_selector("#console-terminal .xterm", timeout=10000)
+    pause(page, 1500)
+
+    # Run fastfetch first
+    slow_type(page, "#console-terminal .xterm-helper-textarea", "fastfetch", delay=80)
+    pause(page, 300)
+    page.keyboard.press("Enter")
+    pause(page, 2500)  # Wait for output
+
+    # Type cf stats command
+    slow_type(page, "#console-terminal .xterm-helper-textarea", "cf stats", delay=80)
+    pause(page, 300)
+    page.keyboard.press("Enter")
+    pause(page, 3000)  # Wait for output
+
+    # Type cf ps command
+    slow_type(page, "#console-terminal .xterm-helper-textarea", "cf ps grocy", delay=80)
+    pause(page, 300)
+    page.keyboard.press("Enter")
+    pause(page, 2500)  # Wait for output
+
+    # Smoothly scroll down to show the Editor section with Compose Farm config
+    page.evaluate("""
+        const editor = document.getElementById('console-editor');
+        if (editor) {
+            editor.scrollIntoView({ behavior: 'smooth', block: 'center' });
+        }
+    """)
+    pause(page, 1200)  # Wait for smooth scroll animation
+
+    # Wait for Monaco editor to load with config content
+    page.wait_for_selector("#console-editor .monaco-editor", timeout=10000)
+    pause(page, 2500)  # Let viewer see the Compose Farm config file
+
+    # Final pause
+    pause(page, 800)
--- a/docs/demos/web/demo_live_stats.py
+++ b/docs/demos/web/demo_live_stats.py
@@ -0,0 +1,85 @@
+"""Demo: Live Stats page.
+
+Records a ~20 second demo showing:
+- Navigating to Live Stats via command palette
+- Container table with real-time stats
+- Filtering containers
+- Sorting by different columns
+- Auto-refresh countdown
+
+Run: pytest docs/demos/web/demo_live_stats.py -v --no-cov
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import pytest
+from conftest import (
+    open_command_palette,
+    pause,
+    slow_type,
+    wait_for_sidebar,
+)
+
+if TYPE_CHECKING:
+    from playwright.sync_api import Page
+
+
+@pytest.mark.browser  # type: ignore[misc]
+def test_demo_live_stats(wide_recording_page: Page, server_url: str) -> None:
+    """Record Live Stats page demo."""
+    page = wide_recording_page
+
+    # Start on dashboard
+    page.goto(server_url)
+    wait_for_sidebar(page)
+    pause(page, 1000)
+
+    # Navigate to Live Stats via command palette
+    open_command_palette(page)
+    pause(page, 400)
+    slow_type(page, "#cmd-input", "live", delay=100)
+    pause(page, 500)
+    page.keyboard.press("Enter")
+    page.wait_for_url("**/live-stats", timeout=5000)
+
+    # Wait for containers to load (may take ~10s on first load due to SSH)
+    page.wait_for_selector("#container-rows tr:not(:has(.loading))", timeout=30000)
+    pause(page, 2000)  # Let viewer see the full table with timer
+
+    # Demonstrate filtering
+    slow_type(page, "#filter-input", "grocy", delay=100)
+    pause(page, 1500)  # Show filtered results
+
+    # Clear filter
+    page.fill("#filter-input", "")
+    pause(page, 1000)
+
+    # Sort by memory (click header)
+    page.click("th:has-text('Mem')")
+    pause(page, 1500)
+
+    # Sort by CPU
+    page.click("th:has-text('CPU')")
+    pause(page, 1500)
+
+    # Sort by host
+    page.click("th:has-text('Host')")
+    pause(page, 1500)
+
+    # Watch auto-refresh timer count down
+    pause(page, 3500)  # Wait for refresh to happen
+
+    # Hover on action menu to show pause behavior
+    action_btn = page.locator('button[onclick^="openActionMenu"]').first
+    action_btn.scroll_into_view_if_needed()
+    action_btn.hover()
+    pause(page, 2000)  # Show paused state (timer shows ⏸) and action menu
+
+    # Move away to close menu and resume refresh
+    page.locator("h2").first.hover()  # Move to header
+    pause(page, 3500)  # Watch countdown resume and refresh happen
+
+    # Final pause
+    pause(page, 1000)
--- a/docs/demos/web/demo_navigation.py
+++ b/docs/demos/web/demo_navigation.py
@@ -0,0 +1,74 @@
+"""Demo: Command palette navigation.
+
+Records a ~15 second demo showing:
+- Opening command palette with Ctrl+K
+- Fuzzy search filtering
+- Arrow key navigation
+- Stack and page navigation
+
+Run: pytest docs/demos/web/demo_navigation.py -v --no-cov
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import pytest
+from conftest import (
+    open_command_palette,
+    pause,
+    slow_type,
+    wait_for_sidebar,
+)
+
+if TYPE_CHECKING:
+    from playwright.sync_api import Page
+
+
+@pytest.mark.browser  # type: ignore[misc]
+def test_demo_navigation(recording_page: Page, server_url: str) -> None:
+    """Record command palette navigation demo."""
+    page = recording_page
+
+    # Start on dashboard
+    page.goto(server_url)
+    wait_for_sidebar(page)
+    pause(page, 1000)  # Let viewer see dashboard
+
+    # Open command palette with keyboard shortcut
+    open_command_palette(page)
+    pause(page, 500)
+
+    # Type partial stack name for fuzzy search
+    slow_type(page, "#cmd-input", "grocy", delay=120)
+    pause(page, 800)
+
+    # Arrow down to show selection movement
+    page.keyboard.press("ArrowDown")
+    pause(page, 400)
+    page.keyboard.press("ArrowUp")
+    pause(page, 400)
+
+    # Press Enter to navigate to stack
+    page.keyboard.press("Enter")
+    page.wait_for_url("**/stack/grocy", timeout=5000)
+    pause(page, 1500)  # Show stack page
+
+    # Open palette again to navigate elsewhere
+    open_command_palette(page)
+    pause(page, 400)
+
+    # Navigate to another stack (immich) to show more navigation
+    slow_type(page, "#cmd-input", "imm", delay=120)
+    pause(page, 600)
+    page.keyboard.press("Enter")
+    page.wait_for_url("**/stack/immich", timeout=5000)
+    pause(page, 1200)  # Show immich stack page
+
+    # Open palette one more time, navigate back to dashboard
+    open_command_palette(page)
+    slow_type(page, "#cmd-input", "dashb", delay=120)
+    pause(page, 500)
+    page.keyboard.press("Enter")
+    page.wait_for_url(server_url, timeout=5000)
+    pause(page, 1000)  # Final dashboard view
--- a/docs/demos/web/demo_shell.py
+++ b/docs/demos/web/demo_shell.py
@@ -0,0 +1,106 @@
+"""Demo: Container shell exec via command palette.
+
+Records a ~35 second demo showing:
+- Navigating to immich stack (multiple containers)
+- Using command palette with fuzzy matching ("sh mach") to open shell
+- Running a command
+- Using command palette to switch to server container shell
+- Running another command
+
+Run: pytest docs/demos/web/demo_shell.py -v --no-cov
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import pytest
+from conftest import (
+    open_command_palette,
+    pause,
+    slow_type,
+    wait_for_sidebar,
+)
+
+if TYPE_CHECKING:
+    from playwright.sync_api import Page
+
+
+@pytest.mark.browser  # type: ignore[misc]
+def test_demo_shell(recording_page: Page, server_url: str) -> None:
+    """Record container shell demo."""
+    page = recording_page
+
+    # Start on dashboard
+    page.goto(server_url)
+    wait_for_sidebar(page)
+    pause(page, 800)
+
+    # Navigate to immich via command palette (has multiple containers)
+    open_command_palette(page)
+    pause(page, 400)
+    slow_type(page, "#cmd-input", "immich", delay=100)
+    pause(page, 600)
+    page.keyboard.press("Enter")
+    page.wait_for_url("**/stack/immich", timeout=5000)
+    pause(page, 1500)
+
+    # Wait for containers list to load (so shell commands are available)
+    page.wait_for_selector("#containers-list button", timeout=10000)
+    pause(page, 800)
+
+    # Use command palette with fuzzy matching: "sh mach" -> "Shell: immich-machine-learning"
+    open_command_palette(page)
+    pause(page, 400)
+    slow_type(page, "#cmd-input", "sh mach", delay=100)
+    pause(page, 600)
+    page.keyboard.press("Enter")
+    pause(page, 1000)
+
+    # Wait for exec terminal to appear
+    page.wait_for_selector("#exec-terminal .xterm", timeout=10000)
+
+    # Smoothly scroll down to make the terminal visible
+    page.evaluate("""
+        const terminal = document.getElementById('exec-terminal');
+        if (terminal) {
+            terminal.scrollIntoView({ behavior: 'smooth', block: 'center' });
+        }
+    """)
+    pause(page, 1200)
+
+    # Run python version command
+    slow_type(page, "#exec-terminal .xterm-helper-textarea", "python3 --version", delay=60)
+    pause(page, 300)
+    page.keyboard.press("Enter")
+    pause(page, 1500)
+
+    # Blur the terminal to release focus (won't scroll)
+    page.evaluate("document.activeElement?.blur()")
+    pause(page, 500)
+
+    # Use command palette to switch to server container: "sh serv" -> "Shell: immich-server"
+    open_command_palette(page)
+    pause(page, 400)
+    slow_type(page, "#cmd-input", "sh serv", delay=100)
+    pause(page, 600)
+    page.keyboard.press("Enter")
+    pause(page, 1000)
+
+    # Wait for new terminal
+    page.wait_for_selector("#exec-terminal .xterm", timeout=10000)
+
+    # Scroll to terminal
+    page.evaluate("""
+        const terminal = document.getElementById('exec-terminal');
+        if (terminal) {
+            terminal.scrollIntoView({ behavior: 'smooth', block: 'center' });
+        }
+    """)
+    pause(page, 1200)
+
+    # Run ls command
+    slow_type(page, "#exec-terminal .xterm-helper-textarea", "ls /usr/src/app", delay=60)
+    pause(page, 300)
+    page.keyboard.press("Enter")
+    pause(page, 2000)
--- a/docs/demos/web/demo_stack.py
+++ b/docs/demos/web/demo_stack.py
@@ -0,0 +1,101 @@
+"""Demo: Stack actions.
+
+Records a ~30 second demo showing:
+- Navigating to a stack page
+- Viewing compose file in Monaco editor
+- Triggering Restart action via command palette
+- Watching terminal output stream
+- Triggering Logs action
+
+Run: pytest docs/demos/web/demo_stack.py -v --no-cov
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import pytest
+from conftest import (
+    open_command_palette,
+    pause,
+    slow_type,
+    wait_for_sidebar,
+)
+
+if TYPE_CHECKING:
+    from playwright.sync_api import Page
+
+
+@pytest.mark.browser  # type: ignore[misc]
+def test_demo_stack(recording_page: Page, server_url: str) -> None:
+    """Record stack actions demo."""
+    page = recording_page
+
+    # Start on dashboard
+    page.goto(server_url)
+    wait_for_sidebar(page)
+    pause(page, 800)
+
+    # Navigate to grocy via command palette
+    open_command_palette(page)
+    pause(page, 400)
+    slow_type(page, "#cmd-input", "grocy", delay=100)
+    pause(page, 500)
+    page.keyboard.press("Enter")
+    page.wait_for_url("**/stack/grocy", timeout=5000)
+    pause(page, 1000)  # Show stack page
+
+    # Click on Compose File collapse to show the Monaco editor
+    # The collapse uses a checkbox input, click it via the parent collapse div
+    compose_collapse = page.locator(".collapse", has_text="Compose File").first
+    compose_collapse.locator("input[type=checkbox]").click(force=True)
+    pause(page, 500)
+
+    # Wait for Monaco editor to load and show content
+    page.wait_for_selector("#compose-editor .monaco-editor", timeout=10000)
+    pause(page, 2000)  # Let viewer see the compose file
+
+    # Smoothly scroll down to show more of the editor
+    page.evaluate("""
+        const editor = document.getElementById('compose-editor');
+        if (editor) {
+            editor.scrollIntoView({ behavior: 'smooth', block: 'center' });
+        }
+    """)
+    pause(page, 1200)  # Wait for smooth scroll animation
+
+    # Close the compose file section
+    compose_collapse.locator("input[type=checkbox]").click(force=True)
+    pause(page, 500)
+
+    # Open command palette for stack actions
+    open_command_palette(page)
+    pause(page, 400)
+
+    # Filter to Restart action
+    slow_type(page, "#cmd-input", "restart", delay=120)
+    pause(page, 600)
+
+    # Execute Restart
+    page.keyboard.press("Enter")
+    pause(page, 300)
+
+    # Wait for terminal to expand and show output
+    page.wait_for_selector("#terminal-output .xterm", timeout=5000)
+    pause(page, 2500)  # Let viewer see terminal streaming
+
+    # Open palette again for Logs
+    open_command_palette(page)
+    pause(page, 400)
+
+    # Filter to Logs action
+    slow_type(page, "#cmd-input", "logs", delay=120)
+    pause(page, 600)
+
+    # Execute Logs
+    page.keyboard.press("Enter")
+    pause(page, 300)
+
+    # Show log output
+    page.wait_for_selector("#terminal-output .xterm", timeout=5000)
+    pause(page, 2500)  # Final view of logs
--- a/docs/demos/web/demo_themes.py
+++ b/docs/demos/web/demo_themes.py
@@ -0,0 +1,81 @@
+"""Demo: Theme switching.
+
+Records a ~15 second demo showing:
+- Opening theme picker via theme button
+- Live theme preview on arrow navigation
+- Selecting different themes
+- Theme persistence
+
+Run: pytest docs/demos/web/demo_themes.py -v --no-cov
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import pytest
+from conftest import (
+    pause,
+    slow_type,
+    wait_for_sidebar,
+)
+
+if TYPE_CHECKING:
+    from playwright.sync_api import Page
+
+
+@pytest.mark.browser  # type: ignore[misc]
+def test_demo_themes(recording_page: Page, server_url: str) -> None:
+    """Record theme switching demo."""
+    page = recording_page
+
+    # Start on dashboard
+    page.goto(server_url)
+    wait_for_sidebar(page)
+    pause(page, 1000)  # Show initial theme
+
+    # Click theme button to open theme picker
+    page.locator("#theme-btn").click()
+    page.wait_for_selector("#cmd-palette[open]", timeout=2000)
+    pause(page, 600)
+
+    # Arrow through many themes to show live preview effect
+    for _ in range(12):
+        page.keyboard.press("ArrowDown")
+        pause(page, 350)  # Show each preview
+
+    # Go back up through a few (land on valentine, not cyberpunk)
+    for _ in range(4):
+        page.keyboard.press("ArrowUp")
+        pause(page, 350)
+
+    # Select current theme with Enter
+    page.keyboard.press("Enter")
+    pause(page, 1000)
+
+    # Close palette with Escape
+    page.keyboard.press("Escape")
+    pause(page, 800)
+
+    # Open again and use search to find specific theme
+    page.locator("#theme-btn").click()
+    page.wait_for_selector("#cmd-palette[open]", timeout=2000)
+    pause(page, 400)
+
+    # Type to filter to a light theme (theme button pre-populates "theme:")
+    slow_type(page, "#cmd-input", "cup", delay=100)
+    pause(page, 500)
+    page.keyboard.press("Enter")
+    pause(page, 1000)
+
+    # Close and return to dark
+    page.keyboard.press("Escape")
+    pause(page, 500)
+    page.locator("#theme-btn").click()
+    page.wait_for_selector("#cmd-palette[open]", timeout=2000)
+    pause(page, 300)
+
+    slow_type(page, "#cmd-input", "dark", delay=100)
+    pause(page, 400)
+    page.keyboard.press("Enter")
+    pause(page, 800)
--- a/docs/demos/web/demo_workflow.py
+++ b/docs/demos/web/demo_workflow.py
@@ -0,0 +1,189 @@
+"""Demo: Full workflow.
+
+Records a comprehensive demo (~60 seconds) combining all major features:
+1. Console page: terminal with fastfetch, cf pull command
+2. Editor showing Compose Farm YAML config
+3. Command palette navigation to grocy stack
+4. Stack actions: up, logs
+5. Switch to dozzle stack via command palette, run update
+6. Dashboard overview
+7. Theme cycling via command palette
+
+This demo is used on the homepage and Web UI page as the main showcase.
+
+Run: pytest docs/demos/web/demo_workflow.py -v --no-cov
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import pytest
+from conftest import open_command_palette, pause, slow_type, wait_for_sidebar
+
+if TYPE_CHECKING:
+    from playwright.sync_api import Page
+
+
+def _demo_console_terminal(page: Page, server_url: str) -> None:
+    """Demo part 1: Console page with terminal and editor."""
+    # Start on dashboard briefly
+    page.goto(server_url)
+    wait_for_sidebar(page)
+    pause(page, 800)
+
+    # Navigate to Console page via command palette
+    open_command_palette(page)
+    pause(page, 300)
+    slow_type(page, "#cmd-input", "cons", delay=100)
+    pause(page, 400)
+    page.keyboard.press("Enter")
+    page.wait_for_url("**/console", timeout=5000)
+    pause(page, 800)
+
+    # Wait for terminal to be ready
+    page.wait_for_selector("#console-terminal .xterm", timeout=10000)
+    pause(page, 1000)
+
+    # Run fastfetch first
+    slow_type(page, "#console-terminal .xterm-helper-textarea", "fastfetch", delay=60)
+    pause(page, 200)
+    page.keyboard.press("Enter")
+    pause(page, 2000)  # Wait for output
+
+    # Run cf pull on a stack to show Compose Farm in action
+    slow_type(page, "#console-terminal .xterm-helper-textarea", "cf pull grocy", delay=60)
+    pause(page, 200)
+    page.keyboard.press("Enter")
+    pause(page, 3000)  # Wait for pull output
+
+
+def _demo_config_editor(page: Page) -> None:
+    """Demo part 2: Show the Compose Farm config in editor."""
+    # Smoothly scroll down to show the Editor section
+    # Use JavaScript for smooth scrolling animation
+    page.evaluate("""
+        const editor = document.getElementById('console-editor');
+        if (editor) {
+            editor.scrollIntoView({ behavior: 'smooth', block: 'center' });
+        }
+    """)
+    pause(page, 1200)  # Wait for smooth scroll animation
+
+    # Wait for Monaco editor to load with config content
+    page.wait_for_selector("#console-editor .monaco-editor", timeout=10000)
+    pause(page, 2000)  # Let viewer see the Compose Farm config file
+
+
+def _demo_stack_actions(page: Page) -> None:
+    """Demo part 3: Navigate to stack and run actions."""
+    # Click on sidebar to take focus away from terminal, then use command palette
+    page.locator("#sidebar-stacks").click()
+    pause(page, 300)
+
+    # Navigate to grocy via command palette
+    open_command_palette(page)
+    pause(page, 300)
+    slow_type(page, "#cmd-input", "grocy", delay=100)
+    pause(page, 400)
+    page.keyboard.press("Enter")
+    page.wait_for_url("**/stack/grocy", timeout=5000)
+    pause(page, 1000)
+
+    # Open Compose File editor to show the compose.yaml
+    compose_collapse = page.locator(".collapse", has_text="Compose File").first
+    compose_collapse.locator("input[type=checkbox]").click(force=True)
+    pause(page, 500)
+
+    # Wait for Monaco editor to load and show content
+    page.wait_for_selector("#compose-editor .monaco-editor", timeout=10000)
+    pause(page, 2000)  # Let viewer see the compose file
+
+    # Close the compose file section
+    compose_collapse.locator("input[type=checkbox]").click(force=True)
+    pause(page, 500)
+
+    # Run Up action via command palette
+    open_command_palette(page)
+    pause(page, 300)
+    slow_type(page, "#cmd-input", "up", delay=100)
+    pause(page, 400)
+    page.keyboard.press("Enter")
+    pause(page, 200)
+
+    # Wait for terminal output
+    page.wait_for_selector("#terminal-output .xterm", timeout=5000)
+    pause(page, 2500)
+
+    # Show logs
+    open_command_palette(page)
+    pause(page, 300)
+    slow_type(page, "#cmd-input", "logs", delay=100)
+    pause(page, 400)
+    page.keyboard.press("Enter")
+    pause(page, 200)
+
+    page.wait_for_selector("#terminal-output .xterm", timeout=5000)
+    pause(page, 2500)
+
+    # Switch to dozzle via command palette (on nas for lower latency)
+    open_command_palette(page)
+    pause(page, 300)
+    slow_type(page, "#cmd-input", "dozzle", delay=100)
+    pause(page, 400)
+    page.keyboard.press("Enter")
+    page.wait_for_url("**/stack/dozzle", timeout=5000)
+    pause(page, 1000)
+
+    # Run update action
+    open_command_palette(page)
+    pause(page, 300)
+    slow_type(page, "#cmd-input", "upda", delay=100)
+    pause(page, 400)
+    page.keyboard.press("Enter")
+    pause(page, 200)
+
+    page.wait_for_selector("#terminal-output .xterm", timeout=5000)
+    pause(page, 2500)
+
+
+def _demo_dashboard_and_themes(page: Page, server_url: str) -> None:
+    """Demo part 4: Dashboard and theme cycling."""
+    # Navigate to dashboard via command palette
+    open_command_palette(page)
+    pause(page, 300)
+    slow_type(page, "#cmd-input", "dash", delay=100)
+    pause(page, 400)
+    page.keyboard.press("Enter")
+    page.wait_for_url(server_url, timeout=5000)
+    pause(page, 800)
+
+    # Scroll to top of page to ensure dashboard is fully visible
+    page.evaluate("window.scrollTo(0, 0)")
+    pause(page, 600)
+
+    # Open theme picker and arrow down to Dracula (shows live preview)
+    page.locator("#theme-btn").click()
+    page.wait_for_selector("#cmd-palette[open]", timeout=2000)
+    pause(page, 400)
+
+    # Arrow down through themes with live preview until we reach Dracula
+    for _ in range(19):
+        page.keyboard.press("ArrowDown")
+        pause(page, 180)
+
+    # Select Dracula theme and end on it
+    pause(page, 400)
+    page.keyboard.press("Enter")
+    pause(page, 1500)
+
+
+@pytest.mark.browser  # type: ignore[misc]
+def test_demo_workflow(recording_page: Page, server_url: str) -> None:
+    """Record full workflow demo."""
+    page = recording_page
+
+    _demo_console_terminal(page, server_url)
+    _demo_config_editor(page)
+    _demo_stack_actions(page)
+    _demo_dashboard_and_themes(page, server_url)
--- a/docs/demos/web/record.py
+++ b/docs/demos/web/record.py
@@ -0,0 +1,259 @@
+#!/usr/bin/env python3
+"""Record all web UI demos.
+
+This script orchestrates recording of web UI demos using Playwright,
+then converts the WebM recordings to GIF format.
+
+Usage:
+    python docs/demos/web/record.py           # Record all demos
+    python docs/demos/web/record.py navigation  # Record specific demo
+
+Requirements:
+    - Playwright with Chromium: playwright install chromium
+    - ffmpeg for GIF conversion: apt install ffmpeg / brew install ffmpeg
+"""
+
+from __future__ import annotations
+
+import os
+import re
+import shutil
+import subprocess
+import sys
+from pathlib import Path
+
+from rich.console import Console
+
+console = Console()
+
+SCRIPT_DIR = Path(__file__).parent
+REPO_DIR = SCRIPT_DIR.parent.parent.parent
+OUTPUT_DIR = REPO_DIR / "docs" / "assets"
+
+DEMOS = [
+    "navigation",
+    "stack",
+    "themes",
+    "workflow",
+    "console",
+    "shell",
+    "live_stats",
+]
+
+# High-quality ffmpeg settings for VP8 encoding
+# See: https://github.com/microsoft/playwright/issues/10855
+# See: https://github.com/microsoft/playwright/issues/31424
+#
+# MAX_QUALITY: Lossless-like, largest files
+# BALANCED_QUALITY: ~43% file size, nearly indistinguishable quality
+MAX_QUALITY_ARGS = "-c:v vp8 -qmin 0 -qmax 0 -crf 0 -deadline best -speed 0 -b:v 0 -threads 0"
+BALANCED_QUALITY_ARGS = "-c:v vp8 -qmin 0 -qmax 10 -crf 4 -deadline best -speed 0 -b:v 0 -threads 0"
+
+# Choose which quality to use
+VIDEO_QUALITY_ARGS = MAX_QUALITY_ARGS
+
+
+def patch_playwright_video_quality() -> None:
+    """Patch Playwright's videoRecorder.js to use high-quality encoding settings."""
+    from playwright._impl._driver import compute_driver_executable  # noqa: PLC0415
+
+    # compute_driver_executable returns (node_path, cli_path)
+    result = compute_driver_executable()
+    node_path = result[0] if isinstance(result, tuple) else result
+    driver_path = Path(node_path).parent
+
+    video_recorder = driver_path / "package" / "lib" / "server" / "chromium" / "videoRecorder.js"
+
+    if not video_recorder.exists():
+        msg = f"videoRecorder.js not found at {video_recorder}"
+        raise FileNotFoundError(msg)
+
+    content = video_recorder.read_text()
+
+    # Check if already patched
+    if "deadline best" in content:
+        return  # Already patched
+
+    # Pattern to match the ffmpeg args line
+    pattern = (
+        r"-c:v vp8 -qmin \d+ -qmax \d+ -crf \d+ -deadline \w+ -speed \d+ -b:v \w+ -threads \d+"
+    )
+
+    if not re.search(pattern, content):
+        msg = "Could not find ffmpeg args pattern in videoRecorder.js"
+        raise ValueError(msg)
+
+    # Replace with high-quality settings
+    new_content = re.sub(pattern, VIDEO_QUALITY_ARGS, content)
+    video_recorder.write_text(new_content)
+    console.print("[green]Patched Playwright for high-quality video recording[/green]")
+
+
+def record_demo(name: str, index: int, total: int) -> Path | None:
+    """Run a single demo and return the video path."""
+    console.print(f"[cyan][{index}/{total}][/cyan] [green]Recording:[/green] web-{name}")
+
+    demo_file = SCRIPT_DIR / f"demo_{name}.py"
+    if not demo_file.exists():
+        console.print(f"[red]  Demo file not found: {demo_file}[/red]")
+        return None
+
+    # Create temp output dir for this recording
+    temp_dir = SCRIPT_DIR / ".recordings"
+    temp_dir.mkdir(exist_ok=True)
+
+    # Run pytest with video recording
+    # Set PYTHONPATH so conftest.py imports work
+    env = {**os.environ, "PYTHONPATH": str(SCRIPT_DIR)}
+    result = subprocess.run(
+        [
+            sys.executable,
+            "-m",
+            "pytest",
+            str(demo_file),
+            "-v",
+            "--no-cov",
+            "-x",  # Stop on first failure
+            f"--basetemp={temp_dir}",
+        ],
+        check=False,
+        cwd=REPO_DIR,
+        capture_output=True,
+        text=True,
+        env=env,
+    )
+
+    if result.returncode != 0:
+        console.print(f"[red]  Failed to record {name}[/red]")
+        console.print(result.stdout)
+        console.print(result.stderr)
+        return None
+
+    # Find the recorded video
+    videos = list(temp_dir.rglob("*.webm"))
+    if not videos:
+        console.print(f"[red]  No video found for {name}[/red]")
+        return None
+
+    # Use the most recent video
+    video = max(videos, key=lambda p: p.stat().st_mtime)
+    console.print(f"[green]  Recorded: {video.name}[/green]")
+    return video
+
+
+def convert_to_gif(webm_path: Path, output_name: str) -> Path:
+    """Convert WebM to GIF using ffmpeg with palette optimization."""
+    gif_path = OUTPUT_DIR / f"{output_name}.gif"
+    palette_path = webm_path.parent / "palette.png"
+
+    # Two-pass approach for better quality
+    # Pass 1: Generate palette
+    subprocess.run(
+        [  # noqa: S607
+            "ffmpeg",
+            "-y",
+            "-i",
+            str(webm_path),
+            "-vf",
+            "fps=10,scale=1280:-1:flags=lanczos,palettegen=stats_mode=diff",
+            str(palette_path),
+        ],
+        check=True,
+        capture_output=True,
+    )
+
+    # Pass 2: Generate GIF with palette
+    subprocess.run(
+        [  # noqa: S607
+            "ffmpeg",
+            "-y",
+            "-i",
+            str(webm_path),
+            "-i",
+            str(palette_path),
+            "-lavfi",
+            "fps=10,scale=1280:-1:flags=lanczos[x];[x][1:v]paletteuse=dither=bayer:bayer_scale=5:diff_mode=rectangle",
+            str(gif_path),
+        ],
+        check=True,
+        capture_output=True,
+    )
+
+    palette_path.unlink(missing_ok=True)
+    return gif_path
+
+
+def move_recording(video_path: Path, name: str) -> tuple[Path, Path]:
+    """Move WebM and convert to GIF, returning both paths."""
+    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
+
+    output_name = f"web-{name}"
+    webm_dest = OUTPUT_DIR / f"{output_name}.webm"
+
+    shutil.copy2(video_path, webm_dest)
+    console.print(f"[blue]  WebM: {webm_dest.relative_to(REPO_DIR)}[/blue]")
+
+    gif_path = convert_to_gif(video_path, output_name)
+    console.print(f"[blue]  GIF:  {gif_path.relative_to(REPO_DIR)}[/blue]")
+
+    return webm_dest, gif_path
+
+
+def cleanup() -> None:
+    """Clean up temporary recording files."""
+    temp_dir = SCRIPT_DIR / ".recordings"
+    if temp_dir.exists():
+        shutil.rmtree(temp_dir)
+
+
+def main() -> int:
+    """Record all web UI demos."""
+    console.print("[blue]Recording web UI demos...[/blue]")
+    console.print(f"Output directory: {OUTPUT_DIR}")
+    console.print()
+
+    # Patch Playwright for high-quality video recording
+    patch_playwright_video_quality()
+
+    # Determine which demos to record
+    if len(sys.argv) > 1:
+        demos_to_record = [d for d in sys.argv[1:] if d in DEMOS]
+        if not demos_to_record:
+            console.print(f"[red]Unknown demo(s). Available: {', '.join(DEMOS)}[/red]")
+            return 1
+    else:
+        demos_to_record = DEMOS
+
+    results: dict[str, tuple[Path | None, Path | None]] = {}
+
+    try:
+        for i, demo in enumerate(demos_to_record, 1):
+            video_path = record_demo(demo, i, len(demos_to_record))
+            if video_path:
+                webm, gif = move_recording(video_path, demo)
+                results[demo] = (webm, gif)
+            else:
+                results[demo] = (None, None)
+            console.print()
+    finally:
+        cleanup()
+
+    # Summary
+    console.print("[blue]=== Summary ===[/blue]")
+    success_count = sum(1 for w, _ in results.values() if w is not None)
+    console.print(f"Recorded: {success_count}/{len(demos_to_record)} demos")
+    console.print()
+
+    for demo, (webm, gif) in results.items():  # type: ignore[assignment]
+        status = "[green]OK[/green]" if webm else "[red]FAILED[/red]"
+        console.print(f"  {demo}: {status}")
+        if webm:
+            console.print(f"    {webm.relative_to(REPO_DIR)}")
+        if gif:
+            console.print(f"    {gif.relative_to(REPO_DIR)}")
+
+    return 0 if success_count == len(demos_to_record) else 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/docs/dev/docker-swarm-network.md
+++ b/docs/dev/docker-swarm-network.md
@@ -1,90 +0,0 @@
-# Docker Swarm Overlay Networks with Compose Farm
-
-Notes from testing Docker Swarm's attachable overlay networks as a way to get cross-host container networking while still using `docker compose`.
-
-## The Idea
-
-Docker Swarm overlay networks can be made "attachable", allowing regular `docker compose` containers (not just swarm services) to join them. This would give us:
-
- Cross-host Docker DNS (containers find each other by name)
- No need to publish ports for inter-container communication
- Keep using `docker compose up` instead of `docker stack deploy`
-
-## Setup Steps
-
-```bash
-# On manager node
-docker swarm init --advertise-addr <manager-ip>
-
-# On worker nodes (use token from init output)
-docker swarm join --token <token> <manager-ip>:2377
-
-# Create attachable overlay network (on manager)
-docker network create --driver overlay --attachable my-network
-
-# In compose files, add the network
-networks:
-  my-network:
-    external: true
-```
-
-## Required Ports
-
-Docker Swarm requires these ports open **bidirectionally** between all nodes:
-
-| Port | Protocol | Purpose |
-|------|----------|---------|
-| 2377 | TCP | Cluster management |
-| 7946 | TCP + UDP | Node communication |
-| 4789 | UDP | Overlay network traffic (VXLAN) |
-
-## Test Results (2024-12-13)
-
- docker-debian (192.168.1.66) as manager
- dev-lxc (192.168.1.167) as worker
-
-### What worked
-
- Swarm init and join
- Overlay network creation
- Nodes showed as Ready
-
-### What failed
-
- Container on dev-lxc couldn't attach to overlay network
- Error: `attaching to network failed... context deadline exceeded`
- Cause: Port 7946 blocked from docker-debian → dev-lxc
-
-### Root cause
-
-Firewall on dev-lxc wasn't configured to allow swarm ports. Opening these ports requires sudo access on each node.
-
-## Conclusion
-
-Docker Swarm overlay networks are **not plug-and-play**. Requirements:
-
-1. Swarm init/join on all nodes
-2. Firewall rules on all nodes (needs sudo/root)
-3. All nodes must have bidirectional connectivity on 3 ports
-
-For a simpler alternative, consider:
-
- **Tailscale**: VPN mesh, containers use host's Tailscale IP
- **Host networking + published ports**: What compose-farm does today
- **Keep dependent services together**: Avoid cross-host networking entirely
-
-## Future Work
-
-If we decide to support overlay networks:
-
-1. Add a `compose-farm network create` command that:
-   - Initializes swarm if needed
-   - Creates attachable overlay network
-   - Documents required firewall rules
-
-2. Add network config to compose-farm.yaml:
-   ```yaml
-   overlay_network: compose-farm-net
-   ```
-
-3. Auto-inject network into compose files (or document manual setup)
--- a/docs/dev/future-improvements.md
+++ b/docs/dev/future-improvements.md
@@ -1,128 +0,0 @@
-# Future Improvements
-
-Low-priority improvements identified during code review. These are not currently causing issues but could be addressed if they become pain points.
-
-## 1. State Module Efficiency (LOW)
-
-**Current:** Every state operation reads and writes the entire file.
-
-```python
-def set_service_host(config, service, host):
-    state = load_state(config)   # Read file
-    state[service] = host
-    save_state(config, state)    # Write file
-```
-
-**Impact:** With 87 services, this is fine. With 1000+, it would be slow.
-
-**Potential fix:** Add batch operations:
-```python
-def update_state(config, updates: dict[str, str | None]) -> None:
-    """Batch update: set services to hosts, None means remove."""
-    state = load_state(config)
-    for service, host in updates.items():
-        if host is None:
-            state.pop(service, None)
-        else:
-            state[service] = host
-    save_state(config, state)
-```
-
-**When to do:** Only if state operations become noticeably slow.
-
---
-
-## 2. Remote-Aware Compose Path Resolution (LOW)
-
-**Current:** `config.get_compose_path()` checks if files exist on the local filesystem:
-
-```python
-def get_compose_path(self, service: str) -> Path:
-    for filename in ("compose.yaml", "compose.yml", ...):
-        candidate = service_dir / filename
-        if candidate.exists():  # Local check!
-            return candidate
-```
-
-**Why this works:** NFS/shared storage means local = remote.
-
-**Why it could break:** If running compose-farm from a machine without the NFS mount, it returns `compose.yaml` (the default) even if `docker-compose.yml` exists on the remote host.
-
-**Potential fix:** Query the remote host for file existence, or accept this limitation and document it.
-
-**When to do:** Only if users need to run compose-farm from non-NFS machines.
-
---
-
-## 3. Add Integration Tests for CLI Commands (MEDIUM)
-
-**Current:** No integration tests for the actual CLI commands. Tests cover the underlying functions but not the Typer commands themselves.
-
-**Potential fix:** Add integration tests using `CliRunner` from Typer:
-
-```python
-from typer.testing import CliRunner
-from compose_farm.cli import app
-
-runner = CliRunner()
-
-def test_check_command_validates_config():
-    result = runner.invoke(app, ["check", "--local"])
-    assert result.exit_code == 0
-```
-
-**When to do:** When CLI behavior becomes complex enough to warrant dedicated testing.
-
---
-
-## 4. Add Tests for operations.py (MEDIUM)
-
-**Current:** Operations module has 30% coverage. Most logic is tested indirectly through test_sync.py.
-
-**Potential fix:** Add dedicated tests for:
- `up_services()` with migration scenarios
- `preflight_check()`
- `check_host_compatibility()`
-
-**When to do:** When adding new operations or modifying migration logic.
-
---
-
-## 5. Consider Structured Logging (LOW)
-
-**Current:** Operations print directly to console using Rich. This couples the operations module to the Rich library.
-
-**Potential fix:** Use Python's logging module with a custom Rich handler:
-
-```python
-import logging
-
-logger = logging.getLogger(__name__)
-
-# In operations:
-logger.info("Migrating %s from %s to %s", service, old_host, new_host)
-
-# In cli.py - configure Rich handler:
-from rich.logging import RichHandler
-logging.basicConfig(handlers=[RichHandler()])
-```
-
-**Benefits:**
- Operations become testable without capturing stdout
- Logs can be redirected to files
- Log levels provide filtering
-
-**When to do:** Only if console output coupling becomes a problem for testing or extensibility.
-
---
-
-## Design Decisions to Keep
-
-These patterns are working well and should be preserved:
-
-1. **asyncio + asyncssh** - Solid async foundation
-2. **Pydantic models** - Clean validation
-3. **Rich for output** - Good UX
-4. **Test structure** - Good coverage
-5. **Module separation** - cli/operations/executor/compose pattern
-6. **KISS principle** - Don't over-engineer
--- a/docs/docker-deployment.md
+++ b/docs/docker-deployment.md
@@ -0,0 +1,116 @@
+---
+icon: lucide/container
+---
+
+# Docker Deployment
+
+Run the Compose Farm web UI in Docker.
+
+## Quick Start
+
+**1. Get the compose file:**
+
+```bash
+curl -O https://raw.githubusercontent.com/basnijholt/compose-farm/main/docker-compose.yml
+```
+
+**2. Generate `.env` file:**
+
+```bash
+cf config init-env -o .env
+```
+
+This auto-detects settings from your `compose-farm.yaml`:
+- `DOMAIN` from existing traefik labels
+- `CF_COMPOSE_DIR` from config
+- `CF_UID/GID/HOME/USER` from current user
+- `CF_LOCAL_HOST` by matching local IPs to config hosts
+
+Review the output and edit if needed.
+
+**3. Set up SSH keys:**
+
+```bash
+docker compose run --rm cf ssh setup
+```
+
+**4. Start the web UI:**
+
+```bash
+docker compose up -d web
+```
+
+Open `http://localhost:9000` (or `https://compose-farm.example.com` if using Traefik).
+
+---
+
+## Configuration
+
+The `cf config init-env` command auto-detects most settings. After running it, review the generated `.env` file and edit if needed:
+
+```bash
+$EDITOR .env
+```
+
+### What init-env detects
+
+| Variable | How it's detected |
+|----------|-------------------|
+| `DOMAIN` | Extracted from traefik labels in your stacks |
+| `CF_COMPOSE_DIR` | From `compose_dir` in your config |
+| `CF_UID/GID/HOME/USER` | From current user (for NFS compatibility) |
+| `CF_LOCAL_HOST` | By matching local IPs to configured hosts |
+
+If auto-detection fails for any value, edit the `.env` file manually.
+
+### Glances Monitoring
+
+To show host CPU/memory stats in the dashboard, deploy [Glances](https://nicolargo.github.io/glances/) on your hosts. If `CF_LOCAL_HOST` wasn't detected correctly, set it to your local hostname:
+
+```bash
+CF_LOCAL_HOST=nas  # Replace with your local host name
+```
+
+See [Host Resource Monitoring](https://github.com/basnijholt/compose-farm#host-resource-monitoring-glances) in the README.
+
+---
+
+## Troubleshooting
+
+### SSH "Permission denied" or "Host key verification failed"
+
+Regenerate keys:
+
+```bash
+docker compose run --rm cf ssh setup
+```
+
+### Glances shows error for local host
+
+Add your local hostname to `.env`:
+
+```bash
+echo "CF_LOCAL_HOST=nas" >> .env
+docker compose restart web
+```
+
+### Files created as root
+
+Add the non-root variables above and restart.
+
+---
+
+## All Environment Variables
+
+For advanced users, here's the complete reference:
+
+| Variable | Description | Default |
+|----------|-------------|---------|
+| `DOMAIN` | Domain for Traefik labels | *(required)* |
+| `CF_COMPOSE_DIR` | Compose files directory | `/opt/stacks` |
+| `CF_UID` / `CF_GID` | User/group ID | `0` (root) |
+| `CF_HOME` | Home directory | `/root` |
+| `CF_USER` | Username for SSH | `root` |
+| `CF_LOCAL_HOST` | Local hostname for Glances | *(auto-detect)* |
+| `CF_SSH_DIR` | SSH keys directory | `~/.ssh/compose-farm` |
+| `CF_XDG_CONFIG` | Config/backup directory | `~/.config/compose-farm` |
--- a/docs/getting-started.md
+++ b/docs/getting-started.md
@@ -18,13 +18,13 @@ Before you begin, ensure you have:
 ## Installation

 <video autoplay loop muted playsinline>
-  <source src="assets/install.webm" type="video/webm">
+  <source src="/assets/install.webm" type="video/webm">
 </video>

 ### One-liner (recommended)

 ```bash
-curl -fsSL https://raw.githubusercontent.com/basnijholt/compose-farm/main/bootstrap.sh | sh
+curl -fsSL https://compose-farm.nijho.lt/install | sh
 ```

 This installs [uv](https://docs.astral.sh/uv/) if needed, then installs compose-farm.
@@ -54,6 +54,25 @@ docker run --rm \
  ghcr.io/basnijholt/compose-farm up --all
 ```

+**Running as non-root user** (recommended for NFS mounts):
+
+By default, containers run as root. To preserve file ownership on mounted volumes, set these environment variables in your `.env` file:
+
+```bash
+# Add to .env file (one-time setup)
+echo "CF_UID=$(id -u)" >> .env
+echo "CF_GID=$(id -g)" >> .env
+echo "CF_HOME=$HOME" >> .env
+echo "CF_USER=$USER" >> .env
+```
+
+Or use [direnv](https://direnv.net/) to auto-set these variables when entering the directory:
+```bash
+cp .envrc.example .envrc && direnv allow
+```
+
+This ensures files like `compose-farm-state.yaml` and web UI edits are owned by your user instead of root. The `CF_USER` variable is required for SSH to work when running as a non-root user.
+
 ### Verify Installation

 ```bash
@@ -111,9 +130,9 @@ nas:/volume1/compose /opt/compose nfs defaults 0 0
 /opt/compose/           # compose_dir in config
 ├── plex/
 │   └── docker-compose.yml
-├── sonarr/
+├── grafana/
 │   └── docker-compose.yml
-├── radarr/
+├── nextcloud/
 │   └── docker-compose.yml
 └── jellyfin/
    └── docker-compose.yml
@@ -123,8 +142,38 @@ nas:/volume1/compose /opt/compose nfs defaults 0 0

 ### Create Config File

-Create `~/.config/compose-farm/compose-farm.yaml`:
+Create `compose-farm.yaml` in the directory where you'll run commands. For example, if your stacks are in `/opt/stacks`, place the config there too:

+```bash
+cd /opt/stacks
+cf config init
+```
+
+Alternatively, use `~/.config/compose-farm/compose-farm.yaml` for a global config. You can also symlink a working directory config to the global location:
+
+```bash
+# Create config in your stacks directory, symlink to ~/.config
+cf config symlink /opt/stacks/compose-farm.yaml
+```
+
+This way, `cf` commands work from anywhere while the config lives with your stacks.
+
+#### Single host example
+
+```yaml
+# Where compose files are located (one folder per stack)
+compose_dir: /opt/stacks
+
+hosts:
+  local: localhost
+
+stacks:
+  plex: local
+  grafana: local
+  nextcloud: local
+```
+
+#### Multi-host example
 ```yaml
 # Where compose files are located (same path on all hosts)
 compose_dir: /opt/compose
@@ -137,16 +186,17 @@ hosts:
  hp:
    address: 192.168.1.11
    # user defaults to current user
-  local: localhost         # Run locally without SSH

-# Map services to hosts
-services:
+# Map stacks to hosts
+stacks:
  plex: nuc
-  sonarr: nuc
-  radarr: hp
-  jellyfin: local
+  grafana: nuc
+  nextcloud: hp
 ```

+Each entry in `stacks:` maps to a folder under `compose_dir` that contains a compose file.
+
+For cross-host HTTP routing, add Traefik labels and configure `traefik_file` (see [Traefik Integration](traefik.md)).
 ### Validate Configuration

 ```bash
@@ -167,20 +217,20 @@ cf check
 cf ps
 ```

-Shows all configured services and their status.
+Shows all configured stacks and their status.

-### Start All Services
+### Start All Stacks

 ```bash
 cf up --all
 ```

-Starts all services on their assigned hosts.
+Starts all stacks on their assigned hosts.

-### Start Specific Services
+### Start Specific Stacks

 ```bash
-cf up plex sonarr
+cf up plex grafana
 ```

 ### Apply Configuration
@@ -193,13 +243,13 @@ cf apply             # Execute changes
 ```

 This will:
-1. Start services in config but not running
-2. Migrate services on wrong host
-3. Stop services removed from config
+1. Start stacks in config but not running
+2. Migrate stacks on wrong host
+3. Stop stacks removed from config

 ## Docker Network Setup

-If your services use an external Docker network:
+If your stacks use an external Docker network:

 ```bash
 # Create network on all hosts
@@ -213,25 +263,28 @@ Default network: `mynetwork` with subnet `172.20.0.0/16`

 ## Example Workflow

-### 1. Add a New Service
+### 1. Add a New Stack

 Create the compose file:

 ```bash
 # On any host (shared storage)
-mkdir -p /opt/compose/prowlarr
-cat > /opt/compose/prowlarr/docker-compose.yml << 'EOF'
+mkdir -p /opt/compose/gitea
+cat > /opt/compose/gitea/docker-compose.yml << 'EOF'
 services:
-  prowlarr:
-    image: lscr.io/linuxserver/prowlarr:latest
-    container_name: prowlarr
+  gitea:
+    image: docker.gitea.com/gitea:latest
+    container_name: gitea
    environment:
-      - PUID=1000
-      - PGID=1000
+      - USER_UID=1000
+      - USER_GID=1000
    volumes:
-      - /opt/config/prowlarr:/config
+      - /opt/config/gitea:/data
+      - /etc/timezone:/etc/timezone:ro
+      - /etc/localtime:/etc/localtime:ro
    ports:
-      - "9696:9696"
+      - "3000:3000"
+      - "2222:22"
    restart: unless-stopped
 EOF
 ```
@@ -239,23 +292,23 @@ EOF
 Add to config:

 ```yaml
-services:
-  # ... existing services
-  prowlarr: nuc
+stacks:
+  # ... existing stacks
+  gitea: nuc
 ```

-Start the service:
+Start the stack:

 ```bash
-cf up prowlarr
+cf up gitea
 ```

-### 2. Move a Service to Another Host
+### 2. Move a Stack to Another Host

 Edit `compose-farm.yaml`:

 ```yaml
-services:
+stacks:
  plex: hp  # Changed from nuc
 ```

@@ -272,11 +325,11 @@ Or use apply to reconcile everything:
 cf apply
 ```

-### 3. Update All Services
+### 3. Update All Stacks

 ```bash
 cf update --all
-# Runs: pull + down + up for each service
+# Runs: pull + build + down + up for each stack
 ```

 ## Next Steps
--- a/docs/index.md
+++ b/docs/index.md
@@ -8,14 +8,21 @@ A minimal CLI tool to run Docker Compose commands across multiple hosts via SSH.

 ## What is Compose Farm?

-Compose Farm lets you manage Docker Compose services across multiple machines from a single command line. Think [Dockge](https://dockge.kuma.pet/) but with a CLI and web interface, designed for multi-host deployments.
+Compose Farm lets you manage Docker Compose stacks across multiple machines from a single command line. Think [Dockge](https://dockge.kuma.pet/) but with a CLI and web interface, designed for multi-host deployments.

-Define which services run where in one YAML file, then use `cf apply` to make reality match your configuration.
+Define which stacks run where in one YAML file, then use `cf apply` to make reality match your configuration.
+It also works great on a single host with one folder per stack; just map stacks to `localhost`.

 ## Quick Demo

+**CLI:**
 <video autoplay loop muted playsinline>
-  <source src="assets/quickstart.webm" type="video/webm">
+  <source src="/assets/quickstart.webm" type="video/webm">
+</video>
+
+**[Web UI](web-ui.md):**
+<video autoplay loop muted playsinline>
+  <source src="/assets/web-workflow.webm" type="video/webm">
 </video>

 ## Why Compose Farm?
@@ -31,6 +38,31 @@ Define which services run where in one YAML file, then use `cf apply` to make re

 ## Quick Start

+### Single host
+
+No SSH, shared storage, or Traefik file-provider required.
+
+```yaml
+# compose-farm.yaml
+compose_dir: /opt/stacks
+
+hosts:
+  local: localhost
+
+stacks:
+  plex: local
+  jellyfin: local
+  traefik: local
+```
+
+```bash
+cf apply  # Start/stop stacks to match config
+```
+
+### Multi-host
+
+Requires SSH plus a shared `compose_dir` path on all hosts (NFS or sync).
+
 ```yaml
 # compose-farm.yaml
 compose_dir: /opt/compose
@@ -41,16 +73,19 @@ hosts:
  server-2:
    address: 192.168.1.11

-services:
+stacks:
  plex: server-1
  jellyfin: server-2
-  sonarr: server-1
+  grafana: server-1
 ```

 ```bash
-cf apply  # Services start, migrate, or stop as needed
+cf apply  # Stacks start, migrate, or stop as needed
 ```

+Each entry in `stacks:` maps to a folder under `compose_dir` that contains a compose file.
+
+For cross-host HTTP routing, add Traefik labels and configure `traefik_file` to generate file-provider config.
 ### Installation

 ```bash
@@ -61,7 +96,7 @@ pip install compose-farm

 ### Configuration

-Create `~/.config/compose-farm/compose-farm.yaml`:
+Create `compose-farm.yaml` in the directory where you'll run commands (e.g., `/opt/stacks`), or in `~/.config/compose-farm/`:

 ```yaml
 compose_dir: /opt/compose
@@ -73,20 +108,22 @@ hosts:
  hp:
    address: 192.168.1.11

-services:
+stacks:
  plex: nuc
-  sonarr: nuc
-  radarr: hp
+  grafana: nuc
+  nextcloud: hp
 ```

+See [Configuration](configuration.md) for all options and the full search order.
+
 ### Usage

 ```bash
 # Make reality match config
 cf apply

-# Start specific services
-cf up plex sonarr
+# Start specific stacks
+cf up plex grafana

 # Check status
 cf ps
@@ -98,13 +135,13 @@ cf logs -f plex
 ## Key Features

 - **Declarative configuration**: One YAML defines where everything runs
- **Auto-migration**: Change a host assignment, run `cf up`, service moves automatically
+- **Auto-migration**: Change a host assignment, run `cf up`, stack moves automatically

 <video autoplay loop muted playsinline>
-  <source src="assets/migration.webm" type="video/webm">
+  <source src="/assets/migration.webm" type="video/webm">
 </video>
- **Parallel execution**: Multiple services start/stop concurrently
- **State tracking**: Knows which services are running where
+- **Parallel execution**: Multiple stacks start/stop concurrently
+- **State tracking**: Knows which stacks are running where
 - **Traefik integration**: Generate file-provider config for cross-host routing
 - **Zero changes**: Your compose files work as-is

@@ -120,6 +157,7 @@ cf logs -f plex
 - [Getting Started](getting-started.md) - Installation and first steps
 - [Configuration](configuration.md) - All configuration options
 - [Commands](commands.md) - CLI reference
+- [Web UI](web-ui.md) - Browser-based management interface
 - [Architecture](architecture.md) - How it works under the hood
 - [Traefik Integration](traefik.md) - Multi-host routing setup
 - [Best Practices](best-practices.md) - Tips and limitations
--- a/docs/install
+++ b/docs/install
@@ -1,6 +1,6 @@
 #!/bin/sh
 # Compose Farm bootstrap script
-# Usage: curl -fsSL https://raw.githubusercontent.com/basnijholt/compose-farm/main/bootstrap.sh | sh
+# Usage: curl -fsSL https://compose-farm.nijho.lt/install | sh
 #
 # This script installs uv (if needed) and then installs compose-farm as a uv tool.

--- a/docs/javascripts/video-fix.js
+++ b/docs/javascripts/video-fix.js
@@ -0,0 +1,21 @@
+// Fix Safari video autoplay issues
+(function() {
+  function initVideos() {
+    document.querySelectorAll('video[autoplay]').forEach(function(video) {
+      video.load();
+      video.play().catch(function() {});
+    });
+  }
+
+  // For initial page load (needed for Chrome)
+  if (document.readyState === 'loading') {
+    document.addEventListener('DOMContentLoaded', initVideos);
+  } else {
+    initVideos();
+  }
+
+  // For MkDocs instant navigation (needed for Safari)
+  if (typeof document$ !== 'undefined') {
+    document$.subscribe(initVideos);
+  }
+})();
--- a/docs/reddit-post.md
+++ b/docs/reddit-post.md
@@ -5,7 +5,7 @@
 - I made a CLI to run Docker Compose across multiple hosts without Kubernetes or Swarm
 ---

-I've been running 100+ Docker Compose stacks on a single machine, and it kept running out of memory. I needed to spread services across multiple hosts, but:
+I've been running 100+ Docker Compose stacks on a single machine, and it kept running out of memory. I needed to spread stacks across multiple hosts, but:

 - **Kubernetes** felt like overkill. I don't need pods, ingress controllers, or 10x more YAML.
 - **Docker Swarm** is basically in maintenance mode.
@@ -15,7 +15,7 @@ So I built **Compose Farm**, a simple CLI that runs `docker compose` commands ov

 ## How it works

-One YAML file maps services to hosts:
+One YAML file maps stacks to hosts:

 ```yaml
 compose_dir: /opt/stacks
@@ -24,11 +24,11 @@ hosts:
  nuc: 192.168.1.10
  hp: 192.168.1.11

-services:
+stacks:
  plex: nuc
  jellyfin: hp
-  sonarr: nuc
-  radarr: nuc
+  grafana: nuc
+  nextcloud: nuc
 ```

 Then just:
@@ -43,7 +43,7 @@ cf ps             # shows status across all hosts

 ## Auto-migration

-Change a service's host in the config and run `cf up`. It stops the service on the old host and starts it on the new one. No manual SSH needed.
+Change a stack's host in the config and run `cf up`. It stops the stack on the old host and starts it on the new one. No manual SSH needed.

 ```yaml
 # Before
@@ -65,7 +65,7 @@ cf up plex  # migrates automatically

 ## What it doesn't do

- No high availability (if a host goes down, services don't auto-migrate)
+- No high availability (if a host goes down, stacks don't auto-migrate)
 - No overlay networking (containers on different hosts can't talk via Docker DNS)
 - No health checks or automatic restarts

--- a/docs/traefik.md
+++ b/docs/traefik.md
@@ -8,7 +8,7 @@ Compose Farm can generate Traefik file-provider configuration for routing traffi

 ## The Problem

-When you run Traefik on one host but services on others, Traefik's docker provider can't see remote containers. The file provider bridges this gap.
+When you run Traefik on one host but stacks on others, Traefik's docker provider can't see remote containers. The file provider bridges this gap.

 ```
                    Internet
@@ -20,7 +20,7 @@ When you run Traefik on one host but services on others, Traefik's docker provid
 │  ┌─────────┐                                                │
 │  │ Traefik │◄─── Docker provider sees local containers      │
 │  │         │                                                │
-│  │         │◄─── File provider sees remote services         │
+│  │         │◄─── File provider sees remote stacks           │
 │  └────┬────┘     (from compose-farm.yml)                    │
 │       │                                                     │
 └───────┼─────────────────────────────────────────────────────┘
@@ -40,7 +40,7 @@ When you run Traefik on one host but services on others, Traefik's docker provid
 1. Your compose files have standard Traefik labels
 2. Compose Farm reads labels and generates file-provider config
 3. Traefik watches the generated file
-4. Traffic routes to remote services via host IP + published port
+4. Traffic routes to remote stacks via host IP + published port

 ## Setup

@@ -122,7 +122,7 @@ Configure automatic regeneration in `compose-farm.yaml`:
 ```yaml
 compose_dir: /opt/compose
 traefik_file: /opt/traefik/dynamic.d/compose-farm.yml
-traefik_service: traefik
+traefik_stack: traefik

 hosts:
  nuc:
@@ -130,10 +130,10 @@ hosts:
  hp:
    address: 192.168.1.11

-services:
+stacks:
  traefik: nuc      # Traefik runs here
  plex: hp          # Routed via file-provider
-  sonarr: hp
+  grafana: hp
 ```

 With `traefik_file` set, these commands auto-regenerate the config:
@@ -143,13 +143,13 @@ With `traefik_file` set, these commands auto-regenerate the config:
 - `cf update`
 - `cf apply`

-### traefik_service Option
+### traefik_stack Option

-When set, services on the **same host as Traefik** are skipped in file-provider output. Traefik's docker provider handles them directly.
+When set, stacks on the **same host as Traefik** are skipped in file-provider output. Traefik's docker provider handles them directly.

 ```yaml
-traefik_service: traefik  # traefik runs on nuc
-services:
+traefik_stack: traefik  # traefik runs on nuc
+stacks:
  traefik: nuc            # NOT in file-provider (docker provider)
  portainer: nuc          # NOT in file-provider (docker provider)
  plex: hp                # IN file-provider (cross-host)
@@ -215,7 +215,7 @@ labels:
 ```

 Compose Farm resolves variables from:
-1. Service's `.env` file
+1. Stack's `.env` file
 2. Current environment

 ```bash
@@ -242,7 +242,7 @@ If no suitable port is found, a warning is shown.
 ```yaml
 compose_dir: /opt/compose
 traefik_file: /opt/traefik/dynamic.d/compose-farm.yml
-traefik_service: traefik
+traefik_stack: traefik

 hosts:
  nuc:
@@ -252,12 +252,12 @@ hosts:
  nas:
    address: 192.168.1.100

-services:
+stacks:
  traefik: nuc
  plex: hp
  jellyfin: nas
-  sonarr: nuc
-  radarr: nuc
+  grafana: nuc
+  nextcloud: nuc
 ```

 ### /opt/compose/plex/docker-compose.yml
@@ -309,7 +309,7 @@ http:
          - url: http://192.168.1.100:8096
 ```

-Note: `sonarr` and `radarr` are NOT in the file because they're on the same host as Traefik (`nuc`).
+Note: `grafana` and `nextcloud` are NOT in the file because they're on the same host as Traefik (`nuc`).

 ## Combining with Existing Config

@@ -331,7 +331,7 @@ Traefik merges all YAML files in the directory.

 ## Troubleshooting

-### Service Not Accessible
+### Stack Not Accessible

 1. **Check port is published:**
   ```yaml
@@ -341,12 +341,12 @@ Traefik merges all YAML files in the directory.

 2. **Check label syntax:**
   ```bash
-   cf check myservice
+   cf check mystack
   ```

 3. **Verify generated config:**
   ```bash
-   cf traefik-file myservice
+   cf traefik-file mystack
   ```

 4. **Check Traefik logs:**
--- a/docs/web-ui.md
+++ b/docs/web-ui.md
@@ -0,0 +1,154 @@
+---
+icon: lucide/layout-dashboard
+---
+
+# Web UI
+
+Compose Farm includes a web interface for managing stacks from your browser. Start it with:
+
+```bash
+cf web
+```
+
+Then open [http://localhost:8000](http://localhost:8000).
+
+## Features
+
+### Full Workflow
+
+Console terminal, config editor, stack navigation, actions (up, logs, update), dashboard overview, and theme switching - all in one flow.
+
+<video autoplay loop muted playsinline>
+  <source src="/assets/web-workflow.webm" type="video/webm">
+</video>
+
+### Stack Actions
+
+Navigate to any stack and use the command palette to trigger actions like restart, pull, update, or view logs. Output streams in real-time via WebSocket.
+
+<video autoplay loop muted playsinline>
+  <source src="/assets/web-stack.webm" type="video/webm">
+</video>
+
+### Theme Switching
+
+35 themes available via the command palette. Type `theme:` to filter, then use arrow keys to preview themes live before selecting.
+
+<video autoplay loop muted playsinline>
+  <source src="/assets/web-themes.webm" type="video/webm">
+</video>
+
+### Command Palette
+
+Press `Ctrl+K` (or `Cmd+K` on macOS) to open the command palette. Use fuzzy search to quickly navigate, trigger actions, or change themes.
+
+<video autoplay loop muted playsinline>
+  <source src="/assets/web-navigation.webm" type="video/webm">
+</video>
+
+## Pages
+
+### Dashboard (`/`)
+
+- Stack overview with status indicators
+- Host statistics (CPU, memory, disk, load via Glances)
+- Pending operations (migrations, orphaned stacks)
+- Quick actions via command palette
+
+### Live Stats (`/live-stats`)
+
+Real-time container monitoring across all hosts, powered by [Glances](https://nicolargo.github.io/glances/).
+
+- **Live metrics**: CPU, memory, network I/O for every container
+- **Auto-refresh**: Updates every 3 seconds (pauses when dropdown menus are open)
+- **Filtering**: Type to filter containers by name, stack, host, or image
+- **Sorting**: Click column headers to sort by any metric
+- **Update detection**: Shows when container images have updates available
+
+<video autoplay loop muted playsinline>
+  <source src="/assets/web-live_stats.webm" type="video/webm">
+</video>
+
+#### Requirements
+
+Live Stats requires Glances to be deployed on all hosts:
+
+1. Add `glances_stack: glances` to your `compose-farm.yaml`
+2. Deploy a Glances stack that runs on all hosts (see [example](https://github.com/basnijholt/compose-farm/tree/main/examples/glances))
+3. Glances must expose its REST API on port 61208
+
+### Stack Detail (`/stack/{name}`)
+
+- Compose file editor (Monaco)
+- Environment file editor
+- Action buttons: Up, Down, Restart, Update, Pull, Logs
+- Container shell access (exec into running containers)
+- Terminal output for running commands
+
+Files are automatically backed up before saving to `~/.config/compose-farm/backups/`.
+
+### Console (`/console`)
+
+- Full shell access to any host
+- File editor for remote files
+- Monaco editor with syntax highlighting
+
+<video autoplay loop muted playsinline>
+  <source src="/assets/web-console.webm" type="video/webm">
+</video>
+
+### Container Shell
+
+Click the Shell button on any running container to exec into it directly from the browser.
+
+<video autoplay loop muted playsinline>
+  <source src="/assets/web-shell.webm" type="video/webm">
+</video>
+
+## Keyboard Shortcuts
+
+| Shortcut | Action |
+|----------|--------|
+| `Ctrl+K` / `Cmd+K` | Open command palette |
+| `Ctrl+S` / `Cmd+S` | Save editors |
+| `Escape` | Close command palette |
+| `Arrow keys` | Navigate command list |
+| `Enter` | Execute selected command |
+
+## Starting the Server
+
+```bash
+# Default: http://0.0.0.0:8000
+cf web
+
+# Custom port
+cf web --port 3000
+
+# Development mode with auto-reload
+cf web --reload
+
+# Bind to specific interface
+cf web --host 127.0.0.1
+```
+
+## Requirements
+
+The web UI requires additional dependencies:
+
+```bash
+# If installed via pip
+pip install compose-farm[web]
+
+# If installed via uv
+uv tool install 'compose-farm[web]'
+```
+
+## Architecture
+
+The web UI uses:
+
+- **FastAPI** - Backend API and WebSocket handling
+- **HTMX** - Dynamic page updates without full reloads
+- **DaisyUI + Tailwind** - Theming and styling
+- **Monaco Editor** - Code editing for compose/env files
+- **xterm.js** - Terminal emulation for logs and shell access
--- a/examples/README.md
+++ b/examples/README.md
@@ -2,9 +2,9 @@

 Real-world examples demonstrating compose-farm patterns for multi-host Docker deployments.

-## Services
+## Stacks

-| Service | Type | Demonstrates |
+| Stack | Type | Demonstrates |
 |---------|------|--------------|
 | [traefik](traefik/) | Infrastructure | Reverse proxy, Let's Encrypt, file-provider |
 | [mealie](mealie/) | Single container | Traefik labels, resource limits, environment vars |
@@ -16,7 +16,7 @@ Real-world examples demonstrating compose-farm patterns for multi-host Docker de

 ### External Network

-All services connect to a shared external network for inter-service communication:
+All stacks connect to a shared external network for inter-service communication:

 ```yaml
 networks:
@@ -32,12 +32,12 @@ compose-farm init-network --network mynetwork --subnet 172.20.0.0/16

 ### Traefik Labels (Dual Routes)

-Services expose two routes for different access patterns:
+Stacks expose two routes for different access patterns:

 1. **HTTPS route** (`websecure` entrypoint): For your custom domain with Let's Encrypt TLS
 2. **HTTP route** (`web` entrypoint): For `.local` domains on your LAN (no TLS needed)

-This pattern allows accessing services via:
+This pattern allows accessing stacks via:
 - `https://mealie.example.com` - from anywhere, with TLS
 - `http://mealie.local` - from your local network, no TLS overhead

@@ -57,7 +57,7 @@ labels:

 ### Environment Variables

-Each service has a `.env` file for secrets and domain configuration.
+Each stack has a `.env` file for secrets and domain configuration.
 Edit these files to set your domain and credentials:

 ```bash
@@ -76,15 +76,15 @@ volumes:
  - /mnt/data/myapp:/app/data
 ```

-This allows services to migrate between hosts without data loss.
+This allows stacks to migrate between hosts without data loss.

-### Multi-Host Services
+### Multi-Host Stacks

-Services that need to run on every host (e.g., monitoring agents):
+Stacks that need to run on every host (e.g., monitoring agents):

 ```yaml
 # In compose-farm.yaml
-services:
+stacks:
  autokuma: all  # Runs on every configured host
 ```

@@ -107,7 +107,7 @@ services:

 ### AutoKuma Labels (Optional)

-The autokuma example demonstrates compose-farm's **multi-host feature** - running the same service on all hosts using the `all` keyword. AutoKuma itself is not part of compose-farm; it's just a good example because it needs to run on every host to monitor local Docker containers.
+The autokuma example demonstrates compose-farm's **multi-host feature** - running the same stack on all hosts using the `all` keyword. AutoKuma itself is not part of compose-farm; it's just a good example because it needs to run on every host to monitor local Docker containers.

 [AutoKuma](https://github.com/BigBoot/AutoKuma) automatically creates Uptime Kuma monitors from Docker labels:

@@ -128,7 +128,7 @@ compose-farm init-network
 # 2. Start Traefik first (the reverse proxy)
 compose-farm up traefik

-# 3. Start other services
+# 3. Start other stacks
 compose-farm up mealie uptime-kuma

 # 4. Check status
@@ -148,24 +148,24 @@ compose-farm down --all

 The `compose-farm.yaml` shows a multi-host setup:

- **primary** (192.168.1.10): Runs Traefik and heavy services
- **secondary** (192.168.1.11): Runs lighter services
+- **primary** (192.168.1.10): Runs Traefik and heavy stacks
+- **secondary** (192.168.1.11): Runs lighter stacks
 - **autokuma**: Runs on ALL hosts to monitor local containers

-When Traefik runs on `primary` and a service runs on `secondary`, compose-farm
+When Traefik runs on `primary` and a stack runs on `secondary`, compose-farm
 automatically generates file-provider config so Traefik can route to it.

 ## Traefik File-Provider

-When services run on different hosts than Traefik, use `traefik-file` to generate routing config:
+When stacks run on different hosts than Traefik, use `traefik-file` to generate routing config:

 ```bash
-# Generate config for all services
+# Generate config for all stacks
 compose-farm traefik-file --all -o traefik/dynamic.d/compose-farm.yml

 # Or configure auto-generation in compose-farm.yaml:
 traefik_file: /opt/stacks/traefik/dynamic.d/compose-farm.yml
-traefik_service: traefik
+traefik_stack: traefik
 ```

 With `traefik_file` configured, compose-farm automatically regenerates the config after `up`, `down`, `restart`, and `update` commands.
--- a/examples/compose-farm.yaml
+++ b/examples/compose-farm.yaml
@@ -7,34 +7,34 @@ compose_dir: /opt/stacks/compose-farm/examples

 # Auto-regenerate Traefik file-provider config after up/down/restart/update
 traefik_file: /opt/stacks/compose-farm/examples/traefik/dynamic.d/compose-farm.yml
-traefik_service: traefik  # Skip Traefik's host in file-provider (docker provider handles it)
+traefik_stack: traefik  # Skip Traefik's host in file-provider (docker provider handles it)

 hosts:
-  # Primary server - runs Traefik and most services
+  # Primary server - runs Traefik and most stacks
  # Full form with all options
  primary:
    address: 192.168.1.10
    user: deploy
    port: 22

-  # Secondary server - runs some services for load distribution
+  # Secondary server - runs some stacks for load distribution
  # Short form (user defaults to current user, port defaults to 22)
  secondary: 192.168.1.11

  # Local execution (no SSH) - for testing or when running on the host itself
  local: localhost

-services:
+stacks:
  # Infrastructure (runs on primary where Traefik is)
  traefik: primary

-  # Multi-host services (runs on ALL hosts)
+  # Multi-host stacks (runs on ALL hosts)
  # AutoKuma monitors Docker containers on each host
  autokuma: all

-  # Primary server services
+  # Primary server stacks
  paperless-ngx: primary

-  # Secondary server services (distributed for performance)
+  # Secondary server stacks (distributed for performance)
  mealie: secondary
  uptime-kuma: secondary
--- a/60
+++ b/60
@@ -0,0 +1,60 @@
+# Compose Farm Development Commands
+# Run `just` to see available commands
+
+# Default: list available commands
+default:
+    @just --list
+
+# Install development dependencies
+install:
+    uv sync --all-extras --dev
+
+# Run all tests (parallel)
+test:
+    uv run pytest -n auto
+
+# Run CLI tests only (parallel, with coverage)
+test-cli:
+    uv run pytest -m "not browser" -n auto
+
+# Run web UI tests only (parallel)
+test-web:
+    uv run pytest -m browser -n auto
+
+# Lint, format, and type check
+lint:
+    uv run ruff check --fix .
+    uv run ruff format .
+    uv run mypy src
+    uv run ty check src
+
+# Start web UI in development mode with auto-reload
+web:
+    uv run cf web --reload --port 9001
+
+# Kill the web server
+kill-web:
+    lsof -ti :9001 | xargs kill -9 2>/dev/null || true
+
+# Build docs and serve locally
+doc:
+    uvx zensical build
+    python -m http.server -d site 9002
+
+# Kill the docs server
+kill-doc:
+    lsof -ti :9002 | xargs kill -9 2>/dev/null || true
+
+# Record CLI demos (all or specific: just record-cli quickstart)
+record-cli *demos:
+    python docs/demos/cli/record.py {{demos}}
+
+# Record web UI demos (all or specific: just record-web navigation)
+record-web *demos:
+    python docs/demos/web/record.py {{demos}}
+
+# Clean up build artifacts and caches
+clean:
+    rm -rf .pytest_cache .mypy_cache .ruff_cache .coverage htmlcov dist build
+    find . -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true
+    find . -type d -name "*.egg-info" -exec rm -rf {} + 2>/dev/null || true
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -30,7 +30,8 @@ classifiers = [
    "Intended Audience :: Developers",
    "Intended Audience :: System Administrators",
    "License :: OSI Approved :: MIT License",
-    "Operating System :: OS Independent",
+    "Operating System :: MacOS",
+    "Operating System :: POSIX :: Linux",
    "Programming Language :: Python :: 3",
    "Programming Language :: Python :: 3.11",
    "Programming Language :: Python :: 3.12",
@@ -46,6 +47,7 @@ dependencies = [
    "asyncssh>=2.14.0",
    "pyyaml>=6.0",
    "rich>=13.0.0",
+    "python-dotenv>=1.0.0",
 ]

 [project.optional-dependencies]
@@ -53,6 +55,7 @@ web = [
    "fastapi[standard]>=0.109.0",
    "jinja2>=3.1.0",
    "websockets>=12.0",
+    "humanize>=4.0.0",
 ]

 [project.urls]
@@ -133,6 +136,10 @@ disallow_untyped_decorators = false
 module = "compose_farm.web.*"
 disallow_untyped_decorators = false

+[[tool.mypy.overrides]]
+module = "docs.demos.web.*"
+disallow_untyped_decorators = false
+
 [tool.pytest.ini_options]
 asyncio_mode = "auto"
 testpaths = ["tests"]
@@ -160,9 +167,19 @@ exclude_lines = [
    'if __name__ == "__main__":',
 ]

+[tool.ty.environment]
+python-version = "3.11"
+
+[tool.ty.src]
+exclude = [
+    "hatch_build.py",  # Build-time only, hatchling not in dev deps
+    "docs/demos/**",   # Demo scripts with local conftest imports
+]
+
 [dependency-groups]
 dev = [
    "mypy>=1.19.0",
+    "ty>=0.0.1a13",
    "pre-commit>=4.5.0",
    "pytest>=9.0.2",
    "pytest-asyncio>=1.3.0",
--- a/src/compose_farm/cli/app.py
+++ b/src/compose_farm/cli/app.py
@@ -23,6 +23,7 @@ app = typer.Typer(
    help="Compose Farm - run docker compose commands across multiple hosts",
    no_args_is_help=True,
    context_settings={"help_option_names": ["-h", "--help"]},
+    rich_markup_mode="rich",
 )


--- a/src/compose_farm/cli/common.py
+++ b/src/compose_farm/cli/common.py
@@ -20,7 +20,7 @@ from rich.progress import (

 from compose_farm.console import (
    MSG_HOST_NOT_FOUND,
-    MSG_SERVICE_NOT_FOUND,
+    MSG_STACK_NOT_FOUND,
    console,
    print_error,
    print_hint,
@@ -39,13 +39,13 @@ _R = TypeVar("_R")


 # --- Shared CLI Options ---
-ServicesArg = Annotated[
+StacksArg = Annotated[
    list[str] | None,
-    typer.Argument(help="Services to operate on"),
+    typer.Argument(help="Stacks to operate on"),
 ]
 AllOption = Annotated[
    bool,
-    typer.Option("--all", "-a", help="Run on all services"),
+    typer.Option("--all", "-a", help="Run on all stacks"),
 ]
 ConfigOption = Annotated[
    Path | None,
@@ -57,7 +57,11 @@ LogPathOption = Annotated[
 ]
 HostOption = Annotated[
    str | None,
-    typer.Option("--host", "-H", help="Filter to services on this host"),
+    typer.Option("--host", "-H", help="Filter to stacks on this host"),
+]
+ServiceOption = Annotated[
+    str | None,
+    typer.Option("--service", "-s", help="Target a specific service within the stack"),
 ]

 # --- Constants (internal) ---
@@ -138,59 +142,62 @@ def load_config_or_exit(config_path: Path | None) -> Config:
    except FileNotFoundError as e:
        print_error(str(e))
        raise typer.Exit(1) from e
+    except Exception as e:
+        print_error(f"Invalid config: {e}")
+        raise typer.Exit(1) from e


-def get_services(
-    services: list[str],
-    all_services: bool,
+def get_stacks(
+    stacks: list[str],
+    all_stacks: bool,
    config_path: Path | None,
    *,
    host: str | None = None,
    default_all: bool = False,
 ) -> tuple[list[str], Config]:
-    """Resolve service list and load config.
+    """Resolve stack list and load config.

    Handles three mutually exclusive selection methods:
-    - Explicit service names
+    - Explicit stack names
    - --all flag
    - --host filter

    Args:
-        services: Explicit service names
-        all_services: Whether --all was specified
+        stacks: Explicit stack names
+        all_stacks: Whether --all was specified
        config_path: Path to config file
-        host: Filter to services on this host
-        default_all: If True, default to all services when nothing specified (for ps)
+        host: Filter to stacks on this host
+        default_all: If True, default to all stacks when nothing specified (for ps)

    Supports "." as shorthand for the current directory name.

    """
-    validate_service_selection(services, all_services, host)
+    validate_stack_selection(stacks, all_stacks, host)
    config = load_config_or_exit(config_path)

    if host is not None:
        validate_hosts(config, host)
-        svc_list = [s for s in config.services if host in config.get_hosts(s)]
-        if not svc_list:
-            print_warning(f"No services configured for host [magenta]{host}[/]")
+        stack_list = [s for s in config.stacks if host in config.get_hosts(s)]
+        if not stack_list:
+            print_warning(f"No stacks configured for host [magenta]{host}[/]")
            raise typer.Exit(0)
-        return svc_list, config
+        return stack_list, config

-    if all_services:
-        return list(config.services.keys()), config
+    if all_stacks:
+        return list(config.stacks.keys()), config

-    if not services:
+    if not stacks:
        if default_all:
-            return list(config.services.keys()), config
-        print_error("Specify services or use [bold]--all[/] / [bold]--host[/]")
+            return list(config.stacks.keys()), config
+        print_error("Specify stacks or use [bold]--all[/] / [bold]--host[/]")
        raise typer.Exit(1)

    # Resolve "." to current directory name
-    resolved = [Path.cwd().name if svc == "." else svc for svc in services]
+    resolved = [Path.cwd().name if stack == "." else stack for stack in stacks]

-    # Validate all services exist in config
-    validate_services(
-        config, resolved, hint="Add the service to compose-farm.yaml or use [bold]--all[/]"
+    # Validate all stacks exist in config
+    validate_stacks(
+        config, resolved, hint="Add the stack to compose-farm.yaml or use [bold]--all[/]"
    )

    return resolved, config
@@ -215,19 +222,19 @@ def report_results(results: list[CommandResult]) -> None:
        console.print()  # Blank line before summary
        if failed:
            for r in failed:
-                print_error(f"[cyan]{r.service}[/] failed with exit code {r.exit_code}")
+                print_error(f"[cyan]{r.stack}[/] failed with exit code {r.exit_code}")
            console.print()
            console.print(
-                f"[green]✓[/] {len(succeeded)}/{len(results)} services succeeded, "
+                f"[green]✓[/] {len(succeeded)}/{len(results)} stacks succeeded, "
                f"[red]✗[/] {len(failed)} failed"
            )
        else:
-            print_success(f"All {len(results)} services succeeded")
+            print_success(f"All {len(results)} stacks succeeded")

    elif failed:
-        # Single service failed
+        # Single stack failed
        r = failed[0]
-        print_error(f"[cyan]{r.service}[/] failed with exit code {r.exit_code}")
+        print_error(f"[cyan]{r.stack}[/] failed with exit code {r.exit_code}")

    if failed:
        raise typer.Exit(1)
@@ -239,12 +246,12 @@ def maybe_regenerate_traefik(
 ) -> None:
    """Regenerate traefik config if traefik_file is configured.

-    If results are provided, skips regeneration if all services failed.
+    If results are provided, skips regeneration if all stacks failed.
    """
    if cfg.traefik_file is None:
        return

-    # Skip if all services failed
+    # Skip if all stacks failed
    if results and not any(r.success for r in results):
        return

@@ -255,7 +262,7 @@ def maybe_regenerate_traefik(
    )

    try:
-        dynamic, warnings = generate_traefik_config(cfg, list(cfg.services.keys()))
+        dynamic, warnings = generate_traefik_config(cfg, list(cfg.stacks.keys()))
        new_content = render_traefik_config(dynamic)

        # Check if content changed
@@ -275,12 +282,12 @@ def maybe_regenerate_traefik(
        print_warning(f"Failed to update traefik config: {exc}")


-def validate_services(cfg: Config, services: list[str], *, hint: str | None = None) -> None:
-    """Validate that all services exist in config. Exits with error if any not found."""
-    invalid = [s for s in services if s not in cfg.services]
+def validate_stacks(cfg: Config, stacks: list[str], *, hint: str | None = None) -> None:
+    """Validate that all stacks exist in config. Exits with error if any not found."""
+    invalid = [s for s in stacks if s not in cfg.stacks]
    if invalid:
        for svc in invalid:
-            print_error(MSG_SERVICE_NOT_FOUND.format(name=svc))
+            print_error(MSG_STACK_NOT_FOUND.format(name=svc))
        if hint:
            print_hint(hint)
        raise typer.Exit(1)
@@ -296,29 +303,29 @@ def validate_hosts(cfg: Config, hosts: str | list[str]) -> None:
        raise typer.Exit(1)


-def validate_host_for_service(cfg: Config, service: str, host: str) -> None:
-    """Validate that a host is valid for a service."""
+def validate_host_for_stack(cfg: Config, stack: str, host: str) -> None:
+    """Validate that a host is valid for a stack."""
    validate_hosts(cfg, host)
-    allowed_hosts = cfg.get_hosts(service)
+    allowed_hosts = cfg.get_hosts(stack)
    if host not in allowed_hosts:
        print_error(
-            f"Service [cyan]{service}[/] is not configured for host [magenta]{host}[/] "
+            f"Stack [cyan]{stack}[/] is not configured for host [magenta]{host}[/] "
            f"(configured: {', '.join(allowed_hosts)})"
        )
        raise typer.Exit(1)


-def validate_service_selection(
-    services: list[str] | None,
-    all_services: bool,
+def validate_stack_selection(
+    stacks: list[str] | None,
+    all_stacks: bool,
    host: str | None,
 ) -> None:
-    """Validate that only one service selection method is used.
+    """Validate that only one stack selection method is used.

-    The three selection methods (explicit services, --all, --host) are mutually
+    The three selection methods (explicit stacks, --all, --host) are mutually
    exclusive. This ensures consistent behavior across all commands.
    """
-    methods = sum([bool(services), all_services, host is not None])
+    methods = sum([bool(stacks), all_stacks, host is not None])
    if methods > 1:
-        print_error("Use only one of: service names, [bold]--all[/], or [bold]--host[/]")
+        print_error("Use only one of: stack names, [bold]--all[/], or [bold]--host[/]")
        raise typer.Exit(1)
--- a/src/compose_farm/cli/config.py
+++ b/src/compose_farm/cli/config.py
@@ -3,13 +3,12 @@
 from __future__ import annotations

 import os
-import platform
 import shlex
 import shutil
 import subprocess
 from importlib import resources
 from pathlib import Path
-from typing import Annotated
+from typing import TYPE_CHECKING, Annotated

 import typer

@@ -17,6 +16,9 @@ from compose_farm.cli.app import app
 from compose_farm.console import MSG_CONFIG_NOT_FOUND, console, print_error, print_success
 from compose_farm.paths import config_search_paths, default_config_path, find_config_path

+if TYPE_CHECKING:
+    from compose_farm.config import Config
+
 config_app = typer.Typer(
    name="config",
    help="Manage compose-farm configuration files.",
@@ -43,8 +45,6 @@ def _get_editor() -> str:
    """Get the user's preferred editor ($EDITOR > $VISUAL > platform default)."""
    if editor := os.environ.get("EDITOR") or os.environ.get("VISUAL"):
        return editor
-    if platform.system() == "Windows":
-        return "notepad"
    return next((e for e in ("nano", "vim", "vi") if shutil.which(e)), "vi")


@@ -68,6 +68,22 @@ def _get_config_file(path: Path | None) -> Path | None:
    return config_path.resolve() if config_path else None


+def _load_config_with_path(path: Path | None) -> tuple[Path, Config]:
+    """Load config and return both the resolved path and Config object.
+
+    Exits with error if config not found or invalid.
+    """
+    from compose_farm.cli.common import load_config_or_exit  # noqa: PLC0415
+
+    config_file = _get_config_file(path)
+    if config_file is None:
+        print_error(MSG_CONFIG_NOT_FOUND)
+        raise typer.Exit(1)
+
+    cfg = load_config_or_exit(config_file)
+    return config_file, cfg
+
+
 def _report_missing_config(explicit_path: Path | None = None) -> None:
    """Report that a config file was not found."""
    console.print("[yellow]Config file not found.[/yellow]")
@@ -135,7 +151,7 @@ def config_edit(
    console.print(f"[dim]Opening {config_file} with {editor}...[/dim]")

    try:
-        editor_cmd = shlex.split(editor, posix=os.name != "nt")
+        editor_cmd = shlex.split(editor)
    except ValueError as e:
        print_error("Invalid editor command. Check [bold]$EDITOR[/]/[bold]$VISUAL[/]")
        raise typer.Exit(1) from e
@@ -207,27 +223,11 @@ def config_validate(
    path: _PathOption = None,
 ) -> None:
    """Validate the config file syntax and schema."""
-    config_file = _get_config_file(path)
-
-    if config_file is None:
-        print_error(MSG_CONFIG_NOT_FOUND)
-        raise typer.Exit(1)
-
-    # Lazy import: pydantic adds ~50ms to startup, only load when actually needed
-    from compose_farm.config import load_config  # noqa: PLC0415
-
-    try:
-        cfg = load_config(config_file)
-    except FileNotFoundError as e:
-        print_error(str(e))
-        raise typer.Exit(1) from e
-    except Exception as e:
-        print_error(f"Invalid config: {e}")
-        raise typer.Exit(1) from e
+    config_file, cfg = _load_config_with_path(path)

    print_success(f"Valid config: {config_file}")
    console.print(f"  Hosts: {len(cfg.hosts)}")
-    console.print(f"  Services: {len(cfg.services)}")
+    console.print(f"  Stacks: {len(cfg.stacks)}")


@config_app.command("symlink")
@@ -293,5 +293,129 @@ def config_symlink(
    console.print(f"    -> {target_path}")


+def _detect_domain(cfg: Config) -> str | None:
+    """Try to detect DOMAIN from traefik Host() rules in existing stacks.
+
+    Uses extract_website_urls from traefik module to get interpolated
+    URLs, then extracts the domain from the first valid URL.
+    Skips local domains (.local, localhost, etc.).
+    """
+    from urllib.parse import urlparse  # noqa: PLC0415
+
+    from compose_farm.traefik import extract_website_urls  # noqa: PLC0415
+
+    max_stacks_to_check = 10
+    min_domain_parts = 2
+    subdomain_parts = 4
+    skip_tlds = {"local", "localhost", "internal", "lan", "home"}
+
+    for stack_name in list(cfg.stacks.keys())[:max_stacks_to_check]:
+        urls = extract_website_urls(cfg, stack_name)
+        for url in urls:
+            host = urlparse(url).netloc
+            parts = host.split(".")
+            # Skip local/internal domains
+            if parts[-1].lower() in skip_tlds:
+                continue
+            if len(parts) >= subdomain_parts:
+                # e.g., "app.lab.nijho.lt" -> "lab.nijho.lt"
+                return ".".join(parts[-3:])
+            if len(parts) >= min_domain_parts:
+                # e.g., "app.example.com" -> "example.com"
+                return ".".join(parts[-2:])
+    return None
+
+
+def _detect_local_host(cfg: Config) -> str | None:
+    """Find which config host matches local machine's IPs."""
+    from compose_farm.executor import is_local  # noqa: PLC0415
+
+    for name, host in cfg.hosts.items():
+        if is_local(host):
+            return name
+    return None
+
+
+@config_app.command("init-env")
+def config_init_env(
+    path: _PathOption = None,
+    output: Annotated[
+        Path | None,
+        typer.Option(
+            "--output", "-o", help="Output .env file path. Defaults to .env in config directory."
+        ),
+    ] = None,
+    force: _ForceOption = False,
+) -> None:
+    """Generate a .env file for Docker deployment.
+
+    Reads the compose-farm.yaml config and auto-detects settings:
+    - CF_COMPOSE_DIR from compose_dir
+    - CF_LOCAL_HOST by detecting which config host matches local IPs
+    - CF_UID/GID/HOME/USER from current user
+    - DOMAIN from traefik labels in stacks (if found)
+
+    Example::
+
+        cf config init-env           # Create .env next to config
+        cf config init-env -o .env   # Create .env in current directory
+
+    """
+    config_file, cfg = _load_config_with_path(path)
+
+    # Determine output path
+    env_path = output.expanduser().resolve() if output else config_file.parent / ".env"
+
+    if env_path.exists() and not force:
+        console.print(f"[yellow].env file already exists:[/] {env_path}")
+        if not typer.confirm("Overwrite?"):
+            console.print("[dim]Aborted.[/dim]")
+            raise typer.Exit(0)
+
+    # Auto-detect values
+    uid = os.getuid()
+    gid = os.getgid()
+    home = os.environ.get("HOME", "/root")
+    user = os.environ.get("USER", "root")
+    compose_dir = str(cfg.compose_dir)
+    local_host = _detect_local_host(cfg)
+    domain = _detect_domain(cfg)
+
+    # Generate .env content
+    lines = [
+        "# Generated by: cf config init-env",
+        f"# From config: {config_file}",
+        "",
+        "# Domain for Traefik labels",
+        f"DOMAIN={domain or 'example.com'}",
+        "",
+        "# Compose files location",
+        f"CF_COMPOSE_DIR={compose_dir}",
+        "",
+        "# Run as current user (recommended for NFS)",
+        f"CF_UID={uid}",
+        f"CF_GID={gid}",
+        f"CF_HOME={home}",
+        f"CF_USER={user}",
+        "",
+        "# Local hostname for Glances integration",
+        f"CF_LOCAL_HOST={local_host or '# auto-detect failed - set manually'}",
+        "",
+    ]
+
+    env_path.write_text("\n".join(lines), encoding="utf-8")
+
+    print_success(f"Created .env file: {env_path}")
+    console.print()
+    console.print("[dim]Detected settings:[/dim]")
+    console.print(f"  DOMAIN: {domain or '[yellow]example.com[/] (edit this)'}")
+    console.print(f"  CF_COMPOSE_DIR: {compose_dir}")
+    console.print(f"  CF_UID/GID: {uid}:{gid}")
+    console.print(f"  CF_LOCAL_HOST: {local_host or '[yellow]not detected[/] (set manually)'}")
+    console.print()
+    console.print("[dim]Review and edit as needed:[/dim]")
+    console.print(f"  [cyan]$EDITOR {env_path}[/cyan]")
+
+
 # Register config subcommand on the shared app
 app.add_typer(config_app, name="config", rich_help_panel="Configuration")
--- a/src/compose_farm/cli/lifecycle.py
+++ b/src/compose_farm/cli/lifecycle.py
@@ -2,215 +2,327 @@

 from __future__ import annotations

-from typing import Annotated
+from pathlib import Path
+from typing import TYPE_CHECKING, Annotated

 import typer

+if TYPE_CHECKING:
+    from compose_farm.config import Config
+
 from compose_farm.cli.app import app
 from compose_farm.cli.common import (
    AllOption,
    ConfigOption,
    HostOption,
-    ServicesArg,
+    ServiceOption,
+    StacksArg,
    format_host,
-    get_services,
+    get_stacks,
    load_config_or_exit,
    maybe_regenerate_traefik,
    report_results,
    run_async,
+    validate_host_for_stack,
+    validate_stacks,
 )
+from compose_farm.cli.management import _discover_stacks_full
 from compose_farm.console import MSG_DRY_RUN, console, print_error, print_success
-from compose_farm.executor import run_on_services, run_sequential_on_services
-from compose_farm.operations import stop_orphaned_services, up_services
+from compose_farm.executor import run_compose_on_host, run_on_stacks, run_sequential_on_stacks
+from compose_farm.operations import (
+    stop_orphaned_stacks,
+    stop_stray_stacks,
+    up_stacks,
+)
 from compose_farm.state import (
-    get_orphaned_services,
-    get_service_host,
-    get_services_needing_migration,
-    get_services_not_in_state,
-    remove_service,
+    get_orphaned_stacks,
+    get_stack_host,
+    get_stacks_needing_migration,
+    get_stacks_not_in_state,
+    remove_stack,
 )


@app.command(rich_help_panel="Lifecycle")
 def up(
-    services: ServicesArg = None,
-    all_services: AllOption = False,
+    stacks: StacksArg = None,
+    all_stacks: AllOption = False,
    host: HostOption = None,
+    service: ServiceOption = None,
    config: ConfigOption = None,
 ) -> None:
-    """Start services (docker compose up -d). Auto-migrates if host changed."""
-    svc_list, cfg = get_services(services or [], all_services, config, host=host)
-    results = run_async(up_services(cfg, svc_list, raw=True))
+    """Start stacks (docker compose up -d). Auto-migrates if host changed."""
+    stack_list, cfg = get_stacks(stacks or [], all_stacks, config, host=host)
+    if service:
+        if len(stack_list) != 1:
+            print_error("--service requires exactly one stack")
+            raise typer.Exit(1)
+        # For service-level up, use run_on_stacks directly (no migration logic)
+        results = run_async(run_on_stacks(cfg, stack_list, f"up -d {service}", raw=True))
+    else:
+        results = run_async(up_stacks(cfg, stack_list, raw=True))
    maybe_regenerate_traefik(cfg, results)
    report_results(results)


@app.command(rich_help_panel="Lifecycle")
 def down(
-    services: ServicesArg = None,
-    all_services: AllOption = False,
+    stacks: StacksArg = None,
+    all_stacks: AllOption = False,
    orphaned: Annotated[
        bool,
-        typer.Option(
-            "--orphaned", help="Stop orphaned services (in state but removed from config)"
-        ),
+        typer.Option("--orphaned", help="Stop orphaned stacks (in state but removed from config)"),
    ] = False,
    host: HostOption = None,
    config: ConfigOption = None,
 ) -> None:
-    """Stop services (docker compose down)."""
+    """Stop stacks (docker compose down)."""
    # Handle --orphaned flag (mutually exclusive with other selection methods)
    if orphaned:
-        if services or all_services or host:
+        if stacks or all_stacks or host:
            print_error(
-                "Cannot combine [bold]--orphaned[/] with services, [bold]--all[/], or [bold]--host[/]"
+                "Cannot combine [bold]--orphaned[/] with stacks, [bold]--all[/], or [bold]--host[/]"
            )
            raise typer.Exit(1)

        cfg = load_config_or_exit(config)
-        orphaned_services = get_orphaned_services(cfg)
+        orphaned_stacks = get_orphaned_stacks(cfg)

-        if not orphaned_services:
-            print_success("No orphaned services to stop")
+        if not orphaned_stacks:
+            print_success("No orphaned stacks to stop")
            return

        console.print(
-            f"[yellow]Stopping {len(orphaned_services)} orphaned service(s):[/] "
-            f"{', '.join(orphaned_services.keys())}"
+            f"[yellow]Stopping {len(orphaned_stacks)} orphaned stack(s):[/] "
+            f"{', '.join(orphaned_stacks.keys())}"
        )
-        results = run_async(stop_orphaned_services(cfg))
+        results = run_async(stop_orphaned_stacks(cfg))
        report_results(results)
        return

-    svc_list, cfg = get_services(services or [], all_services, config, host=host)
-    raw = len(svc_list) == 1
-    results = run_async(run_on_services(cfg, svc_list, "down", raw=raw))
+    stack_list, cfg = get_stacks(stacks or [], all_stacks, config, host=host)
+    raw = len(stack_list) == 1
+    results = run_async(run_on_stacks(cfg, stack_list, "down", raw=raw))

    # Remove from state on success
-    # For multi-host services, result.service is "svc@host", extract base name
-    removed_services: set[str] = set()
+    # For multi-host stacks, result.stack is "stack@host", extract base name
+    removed_stacks: set[str] = set()
    for result in results:
        if result.success:
-            base_service = result.service.split("@")[0]
-            if base_service not in removed_services:
-                remove_service(cfg, base_service)
-                removed_services.add(base_service)
+            base_stack = result.stack.split("@")[0]
+            if base_stack not in removed_stacks:
+                remove_stack(cfg, base_stack)
+                removed_stacks.add(base_stack)

    maybe_regenerate_traefik(cfg, results)
    report_results(results)


+@app.command(rich_help_panel="Lifecycle")
+def stop(
+    stacks: StacksArg = None,
+    all_stacks: AllOption = False,
+    service: ServiceOption = None,
+    config: ConfigOption = None,
+) -> None:
+    """Stop services without removing containers (docker compose stop)."""
+    stack_list, cfg = get_stacks(stacks or [], all_stacks, config)
+    if service and len(stack_list) != 1:
+        print_error("--service requires exactly one stack")
+        raise typer.Exit(1)
+    cmd = f"stop {service}" if service else "stop"
+    raw = len(stack_list) == 1
+    results = run_async(run_on_stacks(cfg, stack_list, cmd, raw=raw))
+    report_results(results)
+
+
@app.command(rich_help_panel="Lifecycle")
 def pull(
-    services: ServicesArg = None,
-    all_services: AllOption = False,
+    stacks: StacksArg = None,
+    all_stacks: AllOption = False,
+    service: ServiceOption = None,
    config: ConfigOption = None,
 ) -> None:
    """Pull latest images (docker compose pull)."""
-    svc_list, cfg = get_services(services or [], all_services, config)
-    raw = len(svc_list) == 1
-    results = run_async(run_on_services(cfg, svc_list, "pull", raw=raw))
+    stack_list, cfg = get_stacks(stacks or [], all_stacks, config)
+    if service and len(stack_list) != 1:
+        print_error("--service requires exactly one stack")
+        raise typer.Exit(1)
+    cmd = f"pull --ignore-buildable {service}" if service else "pull --ignore-buildable"
+    raw = len(stack_list) == 1
+    results = run_async(run_on_stacks(cfg, stack_list, cmd, raw=raw))
    report_results(results)


@app.command(rich_help_panel="Lifecycle")
 def restart(
-    services: ServicesArg = None,
-    all_services: AllOption = False,
+    stacks: StacksArg = None,
+    all_stacks: AllOption = False,
+    service: ServiceOption = None,
    config: ConfigOption = None,
 ) -> None:
-    """Restart services (down + up)."""
-    svc_list, cfg = get_services(services or [], all_services, config)
-    raw = len(svc_list) == 1
-    results = run_async(run_sequential_on_services(cfg, svc_list, ["down", "up -d"], raw=raw))
+    """Restart stacks (down + up). With --service, restarts just that service."""
+    stack_list, cfg = get_stacks(stacks or [], all_stacks, config)
+    if service:
+        if len(stack_list) != 1:
+            print_error("--service requires exactly one stack")
+            raise typer.Exit(1)
+        # For service-level restart, use docker compose restart (more efficient)
+        raw = True
+        results = run_async(run_on_stacks(cfg, stack_list, f"restart {service}", raw=raw))
+    else:
+        raw = len(stack_list) == 1
+        results = run_async(run_sequential_on_stacks(cfg, stack_list, ["down", "up -d"], raw=raw))
    maybe_regenerate_traefik(cfg, results)
    report_results(results)


@app.command(rich_help_panel="Lifecycle")
 def update(
-    services: ServicesArg = None,
-    all_services: AllOption = False,
+    stacks: StacksArg = None,
+    all_stacks: AllOption = False,
+    service: ServiceOption = None,
    config: ConfigOption = None,
 ) -> None:
-    """Update services (pull + build + down + up)."""
-    svc_list, cfg = get_services(services or [], all_services, config)
-    raw = len(svc_list) == 1
-    results = run_async(
-        run_sequential_on_services(
-            cfg, svc_list, ["pull --ignore-buildable", "build", "down", "up -d"], raw=raw
+    """Update stacks (pull + build + down + up). With --service, updates just that service."""
+    stack_list, cfg = get_stacks(stacks or [], all_stacks, config)
+    if service:
+        if len(stack_list) != 1:
+            print_error("--service requires exactly one stack")
+            raise typer.Exit(1)
+        # For service-level update: pull + build + stop + up (stop instead of down)
+        raw = True
+        results = run_async(
+            run_sequential_on_stacks(
+                cfg,
+                stack_list,
+                [
+                    f"pull --ignore-buildable {service}",
+                    f"build {service}",
+                    f"stop {service}",
+                    f"up -d {service}",
+                ],
+                raw=raw,
+            )
+        )
+    else:
+        raw = len(stack_list) == 1
+        results = run_async(
+            run_sequential_on_stacks(
+                cfg, stack_list, ["pull --ignore-buildable", "build", "down", "up -d"], raw=raw
+            )
        )
-    )
    maybe_regenerate_traefik(cfg, results)
    report_results(results)


+def _discover_strays(cfg: Config) -> dict[str, list[str]]:
+    """Discover stacks running on unauthorized hosts by scanning all hosts."""
+    _, strays, duplicates = _discover_stacks_full(cfg)
+
+    # Merge duplicates into strays (for single-host stacks on multiple hosts,
+    # keep correct host and stop others)
+    for stack, running_hosts in duplicates.items():
+        configured = cfg.get_hosts(stack)[0]
+        stray_hosts = [h for h in running_hosts if h != configured]
+        if stray_hosts:
+            strays[stack] = stray_hosts
+
+    return strays
+
+
@app.command(rich_help_panel="Lifecycle")
-def apply(  # noqa: PLR0912 (multi-phase reconciliation needs these branches)
+def apply(  # noqa: C901, PLR0912, PLR0915 (multi-phase reconciliation needs these branches)
    dry_run: Annotated[
        bool,
        typer.Option("--dry-run", "-n", help="Show what would change without executing"),
    ] = False,
    no_orphans: Annotated[
        bool,
-        typer.Option("--no-orphans", help="Only migrate, don't stop orphaned services"),
+        typer.Option("--no-orphans", help="Only migrate, don't stop orphaned stacks"),
+    ] = False,
+    no_strays: Annotated[
+        bool,
+        typer.Option("--no-strays", help="Don't stop stray stacks (running on wrong host)"),
    ] = False,
    full: Annotated[
        bool,
-        typer.Option("--full", "-f", help="Also run up on all services to apply config changes"),
+        typer.Option("--full", "-f", help="Also run up on all stacks to apply config changes"),
    ] = False,
    config: ConfigOption = None,
 ) -> None:
-    """Make reality match config (start, migrate, stop as needed).
+    """Make reality match config (start, migrate, stop strays/orphans as needed).

-    This is the "reconcile" command that ensures running services match your
+    This is the "reconcile" command that ensures running stacks match your
    config file. It will:

-    1. Stop orphaned services (in state but removed from config)
-    2. Migrate services on wrong host (host in state ≠ host in config)
-    3. Start missing services (in config but not in state)
+    1. Stop orphaned stacks (in state but removed from config)
+    2. Stop stray stacks (running on unauthorized hosts)
+    3. Migrate stacks on wrong host (host in state ≠ host in config)
+    4. Start missing stacks (in config but not in state)

    Use --dry-run to preview changes before applying.
-    Use --no-orphans to only migrate/start without stopping orphaned services.
-    Use --full to also run 'up' on all services (picks up compose/env changes).
+    Use --no-orphans to skip stopping orphaned stacks.
+    Use --no-strays to skip stopping stray stacks.
+    Use --full to also run 'up' on all stacks (picks up compose/env changes).
    """
    cfg = load_config_or_exit(config)
-    orphaned = get_orphaned_services(cfg)
-    migrations = get_services_needing_migration(cfg)
-    missing = get_services_not_in_state(cfg)
+    orphaned = get_orphaned_stacks(cfg)
+    migrations = get_stacks_needing_migration(cfg)
+    missing = get_stacks_not_in_state(cfg)

-    # For --full: refresh all services not already being started/migrated
+    strays: dict[str, list[str]] = {}
+    if not no_strays:
+        console.print("[dim]Scanning hosts for stray containers...[/]")
+        strays = _discover_strays(cfg)
+
+    # For --full: refresh all stacks not already being started/migrated
    handled = set(migrations) | set(missing)
-    to_refresh = [svc for svc in cfg.services if svc not in handled] if full else []
+    to_refresh = [stack for stack in cfg.stacks if stack not in handled] if full else []

    has_orphans = bool(orphaned) and not no_orphans
+    has_strays = bool(strays)
    has_migrations = bool(migrations)
    has_missing = bool(missing)
    has_refresh = bool(to_refresh)

-    if not has_orphans and not has_migrations and not has_missing and not has_refresh:
+    if (
+        not has_orphans
+        and not has_strays
+        and not has_migrations
+        and not has_missing
+        and not has_refresh
+    ):
        print_success("Nothing to apply - reality matches config")
        return

    # Report what will be done
    if has_orphans:
-        console.print(f"[yellow]Orphaned services to stop ({len(orphaned)}):[/]")
+        console.print(f"[yellow]Orphaned stacks to stop ({len(orphaned)}):[/]")
        for svc, hosts in orphaned.items():
            console.print(f"  [cyan]{svc}[/] on [magenta]{format_host(hosts)}[/]")
+    if has_strays:
+        console.print(f"[red]Stray stacks to stop ({len(strays)}):[/]")
+        for stack, hosts in strays.items():
+            configured = cfg.get_hosts(stack)
+            console.print(
+                f"  [cyan]{stack}[/] on [magenta]{', '.join(hosts)}[/] "
+                f"[dim](should be on {', '.join(configured)})[/]"
+            )
    if has_migrations:
-        console.print(f"[cyan]Services to migrate ({len(migrations)}):[/]")
-        for svc in migrations:
-            current = get_service_host(cfg, svc)
-            target = cfg.get_hosts(svc)[0]
-            console.print(f"  [cyan]{svc}[/]: [magenta]{current}[/] → [magenta]{target}[/]")
+        console.print(f"[cyan]Stacks to migrate ({len(migrations)}):[/]")
+        for stack in migrations:
+            current = get_stack_host(cfg, stack)
+            target = cfg.get_hosts(stack)[0]
+            console.print(f"  [cyan]{stack}[/]: [magenta]{current}[/] → [magenta]{target}[/]")
    if has_missing:
-        console.print(f"[green]Services to start ({len(missing)}):[/]")
-        for svc in missing:
-            console.print(f"  [cyan]{svc}[/] on [magenta]{format_host(cfg.get_hosts(svc))}[/]")
+        console.print(f"[green]Stacks to start ({len(missing)}):[/]")
+        for stack in missing:
+            console.print(f"  [cyan]{stack}[/] on [magenta]{format_host(cfg.get_hosts(stack))}[/]")
    if has_refresh:
-        console.print(f"[blue]Services to refresh ({len(to_refresh)}):[/]")
-        for svc in to_refresh:
-            console.print(f"  [cyan]{svc}[/] on [magenta]{format_host(cfg.get_hosts(svc))}[/]")
+        console.print(f"[blue]Stacks to refresh ({len(to_refresh)}):[/]")
+        for stack in to_refresh:
+            console.print(f"  [cyan]{stack}[/] on [magenta]{format_host(cfg.get_hosts(stack))}[/]")

    if dry_run:
        console.print(f"\n{MSG_DRY_RUN}")
@@ -220,34 +332,96 @@ def apply(  # noqa: PLR0912 (multi-phase reconciliation needs these branches)
    console.print()
    all_results = []

-    # 1. Stop orphaned services first
+    # 1. Stop orphaned stacks first
    if has_orphans:
-        console.print("[yellow]Stopping orphaned services...[/]")
-        all_results.extend(run_async(stop_orphaned_services(cfg)))
+        console.print("[yellow]Stopping orphaned stacks...[/]")
+        all_results.extend(run_async(stop_orphaned_stacks(cfg)))

-    # 2. Migrate services on wrong host
+    # 2. Stop stray stacks (running on unauthorized hosts)
+    if has_strays:
+        console.print("[red]Stopping stray stacks...[/]")
+        all_results.extend(run_async(stop_stray_stacks(cfg, strays)))
+
+    # 3. Migrate stacks on wrong host
    if has_migrations:
-        console.print("[cyan]Migrating services...[/]")
-        migrate_results = run_async(up_services(cfg, migrations, raw=True))
+        console.print("[cyan]Migrating stacks...[/]")
+        migrate_results = run_async(up_stacks(cfg, migrations, raw=True))
        all_results.extend(migrate_results)
        maybe_regenerate_traefik(cfg, migrate_results)

-    # 3. Start missing services (reuse up_services which handles state updates)
+    # 4. Start missing stacks (reuse up_stacks which handles state updates)
    if has_missing:
-        console.print("[green]Starting missing services...[/]")
-        start_results = run_async(up_services(cfg, missing, raw=True))
+        console.print("[green]Starting missing stacks...[/]")
+        start_results = run_async(up_stacks(cfg, missing, raw=True))
        all_results.extend(start_results)
        maybe_regenerate_traefik(cfg, start_results)

-    # 4. Refresh remaining services (--full: run up to apply config changes)
+    # 5. Refresh remaining stacks (--full: run up to apply config changes)
    if has_refresh:
-        console.print("[blue]Refreshing services...[/]")
-        refresh_results = run_async(up_services(cfg, to_refresh, raw=True))
+        console.print("[blue]Refreshing stacks...[/]")
+        refresh_results = run_async(up_stacks(cfg, to_refresh, raw=True))
        all_results.extend(refresh_results)
        maybe_regenerate_traefik(cfg, refresh_results)

    report_results(all_results)


+@app.command(
+    rich_help_panel="Lifecycle",
+    context_settings={"allow_interspersed_args": False},
+)
+def compose(
+    stack: Annotated[str, typer.Argument(help="Stack to operate on (use '.' for current dir)")],
+    command: Annotated[str, typer.Argument(help="Docker compose command")],
+    args: Annotated[list[str] | None, typer.Argument(help="Additional arguments")] = None,
+    host: HostOption = None,
+    config: ConfigOption = None,
+) -> None:
+    """Run any docker compose command on a stack.
+
+    Passthrough to docker compose for commands not wrapped by cf.
+    Options after COMMAND are passed to docker compose, not cf.
+
+    Examples:
+      cf compose mystack --help        - show docker compose help
+      cf compose mystack top           - view running processes
+      cf compose mystack images        - list images
+      cf compose mystack exec web bash - interactive shell
+      cf compose mystack config        - view parsed config
+
+    """
+    cfg = load_config_or_exit(config)
+
+    # Resolve "." to current directory name
+    resolved_stack = Path.cwd().name if stack == "." else stack
+    validate_stacks(cfg, [resolved_stack])
+
+    # Handle multi-host stacks
+    hosts = cfg.get_hosts(resolved_stack)
+    if len(hosts) > 1:
+        if host is None:
+            print_error(
+                f"Stack [cyan]{resolved_stack}[/] runs on multiple hosts: {', '.join(hosts)}\n"
+                f"Use [bold]--host[/] to specify which host"
+            )
+            raise typer.Exit(1)
+        validate_host_for_stack(cfg, resolved_stack, host)
+        target_host = host
+    else:
+        target_host = hosts[0]
+
+    # Build the full compose command
+    full_cmd = command
+    if args:
+        full_cmd += " " + " ".join(args)
+
+    # Run with raw=True for proper TTY handling (progress bars, interactive)
+    result = run_async(run_compose_on_host(cfg, resolved_stack, target_host, full_cmd, raw=True))
+    print()  # Ensure newline after raw output
+
+    if not result.success:
+        raise typer.Exit(result.exit_code)
+
+
 # Alias: cf a = cf apply
 app.command("a", hidden=True)(apply)
--- a/src/compose_farm/cli/management.py
+++ b/src/compose_farm/cli/management.py
@@ -15,14 +15,14 @@ from compose_farm.cli.common import (
    AllOption,
    ConfigOption,
    LogPathOption,
-    ServicesArg,
+    StacksArg,
    format_host,
-    get_services,
+    get_stacks,
    load_config_or_exit,
    run_async,
    run_parallel_with_progress,
    validate_hosts,
-    validate_services,
+    validate_stacks,
 )

 if TYPE_CHECKING:
@@ -37,60 +37,62 @@ from compose_farm.console import (
 )
 from compose_farm.executor import (
    CommandResult,
+    get_running_stacks_on_host,
    is_local,
    run_command,
 )
 from compose_farm.logs import (
    DEFAULT_LOG_PATH,
    SnapshotEntry,
-    collect_service_entries,
+    collect_stacks_entries_on_host,
    isoformat,
    load_existing_entries,
    merge_entries,
    write_toml,
 )
 from compose_farm.operations import (
+    build_discovery_results,
    check_host_compatibility,
-    check_service_requirements,
-    discover_service_host,
+    check_stack_requirements,
 )
-from compose_farm.state import get_orphaned_services, load_state, save_state
-from compose_farm.traefik import generate_traefik_config, render_traefik_config
+from compose_farm.state import get_orphaned_stacks, load_state, save_state

 # --- Sync helpers ---


-def _discover_services(cfg: Config) -> dict[str, str | list[str]]:
-    """Discover running services with a progress bar."""
-    results = run_parallel_with_progress(
-        "Discovering",
-        list(cfg.services),
-        lambda s: discover_service_host(cfg, s),
-    )
-    return {svc: host for svc, host in results if host is not None}
-
-
-def _snapshot_services(
+def _snapshot_stacks(
    cfg: Config,
-    services: list[str],
+    discovered: dict[str, str | list[str]],
    log_path: Path | None,
 ) -> Path:
-    """Capture image digests with a progress bar."""
+    """Capture image digests using batched SSH calls (1 per host).
+
+    Args:
+        cfg: Configuration
+        discovered: Dict mapping stack -> host(s) where it's running
+        log_path: Optional path to write the log file
+
+    Returns:
+        Path to the written log file.
+
+    """
    effective_log_path = log_path or DEFAULT_LOG_PATH
    now_dt = datetime.now(UTC)
    now_iso = isoformat(now_dt)

-    async def collect_service(service: str) -> tuple[str, list[SnapshotEntry]]:
-        try:
-            return service, await collect_service_entries(cfg, service, now=now_dt)
-        except RuntimeError:
-            return service, []
+    # Group stacks by host for batched SSH calls
+    stacks_by_host: dict[str, set[str]] = {}
+    for stack, hosts in discovered.items():
+        # Use first host for multi-host stacks (they use the same images)
+        host = hosts[0] if isinstance(hosts, list) else hosts
+        stacks_by_host.setdefault(host, set()).add(stack)

-    results = run_parallel_with_progress(
-        "Capturing",
-        services,
-        collect_service,
-    )
+    # Collect entries with 1 SSH call per host (with progress bar)
+    async def collect_on_host(host: str) -> tuple[str, list[SnapshotEntry]]:
+        entries = await collect_stacks_entries_on_host(cfg, host, stacks_by_host[host], now=now_dt)
+        return host, entries
+
+    results = run_parallel_with_progress("Capturing", list(stacks_by_host.keys()), collect_on_host)
    snapshot_entries = [entry for _, entries in results for entry in entries]

    if not snapshot_entries:
@@ -104,6 +106,18 @@ def _snapshot_services(
    return effective_log_path


+def _merge_state(
+    current_state: dict[str, str | list[str]],
+    discovered: dict[str, str | list[str]],
+    removed: list[str],
+) -> dict[str, str | list[str]]:
+    """Merge discovered stacks into existing state for partial refresh."""
+    new_state = {**current_state, **discovered}
+    for svc in removed:
+        new_state.pop(svc, None)
+    return new_state
+
+
 def _report_sync_changes(
    added: list[str],
    removed: list[str],
@@ -113,25 +127,80 @@ def _report_sync_changes(
 ) -> None:
    """Report sync changes to the user."""
    if added:
-        console.print(f"\nNew services found ({len(added)}):")
-        for service in sorted(added):
-            host_str = format_host(discovered[service])
-            console.print(f"  [green]+[/] [cyan]{service}[/] on [magenta]{host_str}[/]")
+        console.print(f"\nNew stacks found ({len(added)}):")
+        for stack in sorted(added):
+            host_str = format_host(discovered[stack])
+            console.print(f"  [green]+[/] [cyan]{stack}[/] on [magenta]{host_str}[/]")

    if changed:
-        console.print(f"\nServices on different hosts ({len(changed)}):")
-        for service, old_host, new_host in sorted(changed):
+        console.print(f"\nStacks on different hosts ({len(changed)}):")
+        for stack, old_host, new_host in sorted(changed):
            old_str = format_host(old_host)
            new_str = format_host(new_host)
            console.print(
-                f"  [yellow]~[/] [cyan]{service}[/]: [magenta]{old_str}[/] → [magenta]{new_str}[/]"
+                f"  [yellow]~[/] [cyan]{stack}[/]: [magenta]{old_str}[/] → [magenta]{new_str}[/]"
            )

    if removed:
-        console.print(f"\nServices no longer running ({len(removed)}):")
-        for service in sorted(removed):
-            host_str = format_host(current_state[service])
-            console.print(f"  [red]-[/] [cyan]{service}[/] (was on [magenta]{host_str}[/])")
+        console.print(f"\nStacks no longer running ({len(removed)}):")
+        for stack in sorted(removed):
+            host_str = format_host(current_state[stack])
+            console.print(f"  [red]-[/] [cyan]{stack}[/] (was on [magenta]{host_str}[/])")
+
+
+def _discover_stacks_full(
+    cfg: Config,
+    stacks: list[str] | None = None,
+) -> tuple[dict[str, str | list[str]], dict[str, list[str]], dict[str, list[str]]]:
+    """Discover running stacks with full host scanning for stray detection.
+
+    Queries each host once for all running stacks (with progress bar),
+    then delegates to build_discovery_results for categorization.
+    """
+    all_hosts = list(cfg.hosts.keys())
+
+    # Query each host for running stacks (with progress bar)
+    async def get_stacks_on_host(host: str) -> tuple[str, set[str]]:
+        running = await get_running_stacks_on_host(cfg, host)
+        return host, running
+
+    host_results = run_parallel_with_progress("Discovering", all_hosts, get_stacks_on_host)
+    running_on_host: dict[str, set[str]] = dict(host_results)
+
+    return build_discovery_results(cfg, running_on_host, stacks)
+
+
+def _report_stray_stacks(
+    strays: dict[str, list[str]],
+    cfg: Config,
+) -> None:
+    """Report stacks running on unauthorized hosts."""
+    if strays:
+        console.print(f"\n[red]Stray stacks[/] (running on wrong host, {len(strays)}):")
+        console.print("[dim]Run [bold]cf apply[/bold] to stop them.[/]")
+        for stack in sorted(strays):
+            stray_hosts = strays[stack]
+            configured = cfg.get_hosts(stack)
+            console.print(
+                f"  [red]![/] [cyan]{stack}[/] on [magenta]{', '.join(stray_hosts)}[/] "
+                f"[dim](should be on {', '.join(configured)})[/]"
+            )
+
+
+def _report_duplicate_stacks(duplicates: dict[str, list[str]], cfg: Config) -> None:
+    """Report single-host stacks running on multiple hosts."""
+    if duplicates:
+        console.print(
+            f"\n[yellow]Duplicate stacks[/] (running on multiple hosts, {len(duplicates)}):"
+        )
+        console.print("[dim]Run [bold]cf apply[/bold] to stop extras.[/]")
+        for stack in sorted(duplicates):
+            hosts = duplicates[stack]
+            configured = cfg.get_hosts(stack)[0]
+            console.print(
+                f"  [yellow]![/] [cyan]{stack}[/] on [magenta]{', '.join(hosts)}[/] "
+                f"[dim](should only be on {configured})[/]"
+            )


 # --- Check helpers ---
@@ -166,44 +235,44 @@ def _check_ssh_connectivity(cfg: Config) -> list[str]:
    return [host for host, success in results if not success]


-def _check_service_requirements(
+def _check_stack_requirements(
    cfg: Config,
-    services: list[str],
+    stacks: list[str],
 ) -> tuple[list[tuple[str, str, str]], list[tuple[str, str, str]], list[tuple[str, str, str]]]:
-    """Check mounts, networks, and devices for all services with a progress bar.
+    """Check mounts, networks, and devices for all stacks with a progress bar.

    Returns (mount_errors, network_errors, device_errors) where each is a list of
-    (service, host, missing_item) tuples.
+    (stack, host, missing_item) tuples.
    """

-    async def check_service(
-        service: str,
+    async def check_stack(
+        stack: str,
    ) -> tuple[
        str,
        list[tuple[str, str, str]],
        list[tuple[str, str, str]],
        list[tuple[str, str, str]],
    ]:
-        """Check requirements for a single service on all its hosts."""
-        host_names = cfg.get_hosts(service)
+        """Check requirements for a single stack on all its hosts."""
+        host_names = cfg.get_hosts(stack)
        mount_errors: list[tuple[str, str, str]] = []
        network_errors: list[tuple[str, str, str]] = []
        device_errors: list[tuple[str, str, str]] = []

        for host_name in host_names:
-            missing_paths, missing_nets, missing_devs = await check_service_requirements(
-                cfg, service, host_name
+            missing_paths, missing_nets, missing_devs = await check_stack_requirements(
+                cfg, stack, host_name
            )
-            mount_errors.extend((service, host_name, p) for p in missing_paths)
-            network_errors.extend((service, host_name, n) for n in missing_nets)
-            device_errors.extend((service, host_name, d) for d in missing_devs)
+            mount_errors.extend((stack, host_name, p) for p in missing_paths)
+            network_errors.extend((stack, host_name, n) for n in missing_nets)
+            device_errors.extend((stack, host_name, d) for d in missing_devs)

-        return service, mount_errors, network_errors, device_errors
+        return stack, mount_errors, network_errors, device_errors

    results = run_parallel_with_progress(
        "Checking requirements",
-        services,
-        check_service,
+        stacks,
+        check_stack,
    )

    all_mount_errors: list[tuple[str, str, str]] = []
@@ -219,7 +288,7 @@ def _check_service_requirements(

 def _report_config_status(cfg: Config) -> bool:
    """Check and report config vs disk status. Returns True if errors found."""
-    configured = set(cfg.services.keys())
+    configured = set(cfg.stacks.keys())
    on_disk = cfg.discover_compose_dirs()
    unmanaged = sorted(on_disk - configured)
    missing_from_disk = sorted(configured - on_disk)
@@ -240,12 +309,12 @@ def _report_config_status(cfg: Config) -> bool:
    return bool(missing_from_disk)


-def _report_orphaned_services(cfg: Config) -> bool:
-    """Check for services in state but not in config. Returns True if orphans found."""
-    orphaned = get_orphaned_services(cfg)
+def _report_orphaned_stacks(cfg: Config) -> bool:
+    """Check for stacks in state but not in config. Returns True if orphans found."""
+    orphaned = get_orphaned_stacks(cfg)

    if orphaned:
-        console.print("\n[yellow]Orphaned services[/] (in state but not in config):")
+        console.print("\n[yellow]Orphaned stacks[/] (in state but not in config):")
        console.print(
            "[dim]Run [bold]cf apply[/bold] to stop them, or [bold]cf down --orphaned[/bold] for just orphans.[/]"
        )
@@ -256,10 +325,12 @@ def _report_orphaned_services(cfg: Config) -> bool:
    return False


-def _report_traefik_status(cfg: Config, services: list[str]) -> None:
+def _report_traefik_status(cfg: Config, stacks: list[str]) -> None:
    """Check and report traefik label status."""
+    from compose_farm.traefik import generate_traefik_config  # noqa: PLC0415
+
    try:
-        _, warnings = generate_traefik_config(cfg, services, check_all=True)
+        _, warnings = generate_traefik_config(cfg, stacks, check_all=True)
    except (FileNotFoundError, ValueError):
        return

@@ -272,16 +343,16 @@ def _report_traefik_status(cfg: Config, services: list[str]) -> None:


 def _report_requirement_errors(errors: list[tuple[str, str, str]], category: str) -> None:
-    """Report requirement errors (mounts, networks, devices) grouped by service."""
-    by_service: dict[str, list[tuple[str, str]]] = {}
-    for svc, host, item in errors:
-        by_service.setdefault(svc, []).append((host, item))
+    """Report requirement errors (mounts, networks, devices) grouped by stack."""
+    by_stack: dict[str, list[tuple[str, str]]] = {}
+    for stack, host, item in errors:
+        by_stack.setdefault(stack, []).append((host, item))

    console.print(f"[red]Missing {category}[/] ({len(errors)}):")
-    for svc, items in sorted(by_service.items()):
+    for stack, items in sorted(by_stack.items()):
        host = items[0][0]
        missing = [i for _, i in items]
-        console.print(f"  [cyan]{svc}[/] on [magenta]{host}[/]:")
+        console.print(f"  [cyan]{stack}[/] on [magenta]{host}[/]:")
        for item in missing:
            console.print(f"    [red]✗[/] {item}")

@@ -301,7 +372,7 @@ def _report_host_compatibility(
    compat: dict[str, tuple[int, int, list[str]]],
    assigned_hosts: list[str],
 ) -> None:
-    """Report host compatibility for a service."""
+    """Report host compatibility for a stack."""
    for host_name, (found, total, missing) in sorted(compat.items()):
        is_assigned = host_name in assigned_hosts
        marker = " [dim](assigned)[/]" if is_assigned else ""
@@ -332,7 +403,7 @@ def _run_remote_checks(cfg: Config, svc_list: list[str], *, show_host_compat: bo
    console.print()  # Spacing before mounts/networks check

    # Check mounts, networks, and devices
-    mount_errors, network_errors, device_errors = _check_service_requirements(cfg, svc_list)
+    mount_errors, network_errors, device_errors = _check_stack_requirements(cfg, svc_list)

    if mount_errors:
        _report_requirement_errors(mount_errors, "mounts")
@@ -347,10 +418,10 @@ def _run_remote_checks(cfg: Config, svc_list: list[str], *, show_host_compat: bo
        print_success("All mounts, networks, and devices exist")

    if show_host_compat:
-        for service in svc_list:
-            console.print(f"\n[bold]Host compatibility for[/] [cyan]{service}[/]:")
-            compat = run_async(check_host_compatibility(cfg, service))
-            assigned_hosts = cfg.get_hosts(service)
+        for stack in svc_list:
+            console.print(f"\n[bold]Host compatibility for[/] [cyan]{stack}[/]:")
+            compat = run_async(check_host_compatibility(cfg, stack))
+            assigned_hosts = cfg.get_hosts(stack)
            _report_host_compatibility(compat, assigned_hosts)

    return has_errors
@@ -364,8 +435,8 @@ _DEFAULT_NETWORK_GATEWAY = "172.20.0.1"

@app.command("traefik-file", rich_help_panel="Configuration")
 def traefik_file(
-    services: ServicesArg = None,
-    all_services: AllOption = False,
+    stacks: StacksArg = None,
+    all_stacks: AllOption = False,
    output: Annotated[
        Path | None,
        typer.Option(
@@ -377,9 +448,14 @@ def traefik_file(
    config: ConfigOption = None,
 ) -> None:
    """Generate a Traefik file-provider fragment from compose Traefik labels."""
-    svc_list, cfg = get_services(services or [], all_services, config)
+    from compose_farm.traefik import (  # noqa: PLC0415
+        generate_traefik_config,
+        render_traefik_config,
+    )
+
+    stack_list, cfg = get_stacks(stacks or [], all_stacks, config)
    try:
-        dynamic, warnings = generate_traefik_config(cfg, svc_list)
+        dynamic, warnings = generate_traefik_config(cfg, stack_list)
    except (FileNotFoundError, ValueError) as exc:
        print_error(str(exc))
        raise typer.Exit(1) from exc
@@ -399,6 +475,8 @@ def traefik_file(

@app.command(rich_help_panel="Configuration")
 def refresh(
+    stacks: StacksArg = None,
+    all_stacks: AllOption = False,
    config: ConfigOption = None,
    log_path: LogPathOption = None,
    dry_run: Annotated[
@@ -406,22 +484,35 @@ def refresh(
        typer.Option("--dry-run", "-n", help="Show what would change without writing"),
    ] = False,
 ) -> None:
-    """Update local state from running services.
+    """Update local state from running stacks.

-    Discovers which services are running on which hosts, updates the state
+    Discovers which stacks are running on which hosts, updates the state
    file, and captures image digests. This is a read operation - it updates
    your local state to match reality, not the other way around.

+    Without arguments: refreshes all stacks (same as --all).
+    With stack names: refreshes only those stacks.
+
    Use 'cf apply' to make reality match your config (stop orphans, migrate).
    """
-    cfg = load_config_or_exit(config)
+    stack_list, cfg = get_stacks(stacks or [], all_stacks, config, default_all=True)
+
+    # Partial refresh merges with existing state; full refresh replaces it
+    # Partial = specific stacks provided (not --all, not default)
+    partial_refresh = bool(stacks) and not all_stacks
+
    current_state = load_state(cfg)

-    discovered = _discover_services(cfg)
+    discovered, strays, duplicates = _discover_stacks_full(cfg, stack_list)

-    # Calculate changes
+    # Calculate changes (only for the stacks we're refreshing)
    added = [s for s in discovered if s not in current_state]
-    removed = [s for s in current_state if s not in discovered]
+    # Only mark as "removed" if we're doing a full refresh
+    if partial_refresh:
+        # In partial refresh, a stack not running is just "not found"
+        removed = [s for s in stack_list if s in current_state and s not in discovered]
+    else:
+        removed = [s for s in current_state if s not in discovered]
    changed = [
        (s, current_state[s], discovered[s])
        for s in discovered
@@ -435,19 +526,25 @@ def refresh(
    else:
        print_success("State is already in sync.")

+    _report_stray_stacks(strays, cfg)
+    _report_duplicate_stacks(duplicates, cfg)
+
    if dry_run:
        console.print(f"\n{MSG_DRY_RUN}")
        return

    # Update state file
    if state_changed:
-        save_state(cfg, discovered)
-        print_success(f"State updated: {len(discovered)} services tracked.")
+        new_state = (
+            _merge_state(current_state, discovered, removed) if partial_refresh else discovered
+        )
+        save_state(cfg, new_state)
+        print_success(f"State updated: {len(new_state)} stacks tracked.")

-    # Capture image digests for running services
+    # Capture image digests for running stacks (1 SSH call per host)
    if discovered:
        try:
-            path = _snapshot_services(cfg, list(discovered.keys()), log_path)
+            path = _snapshot_stacks(cfg, discovered, log_path)
            print_success(f"Digests written to {path}")
        except RuntimeError as exc:
            print_warning(str(exc))
@@ -455,7 +552,7 @@ def refresh(

@app.command(rich_help_panel="Configuration")
 def check(
-    services: ServicesArg = None,
+    stacks: StacksArg = None,
    local: Annotated[
        bool,
        typer.Option("--local", help="Skip SSH-based checks (faster)"),
@@ -464,31 +561,31 @@ def check(
 ) -> None:
    """Validate configuration, traefik labels, mounts, and networks.

-    Without arguments: validates all services against configured hosts.
-    With service arguments: validates specific services and shows host compatibility.
+    Without arguments: validates all stacks against configured hosts.
+    With stack arguments: validates specific stacks and shows host compatibility.

    Use --local to skip SSH-based checks for faster validation.
    """
    cfg = load_config_or_exit(config)

-    # Determine which services to check and whether to show host compatibility
-    if services:
-        svc_list = list(services)
-        validate_services(cfg, svc_list)
+    # Determine which stacks to check and whether to show host compatibility
+    if stacks:
+        stack_list = list(stacks)
+        validate_stacks(cfg, stack_list)
        show_host_compat = True
    else:
-        svc_list = list(cfg.services.keys())
+        stack_list = list(cfg.stacks.keys())
        show_host_compat = False

    # Run checks
    has_errors = _report_config_status(cfg)
-    _report_traefik_status(cfg, svc_list)
+    _report_traefik_status(cfg, stack_list)

-    if not local and _run_remote_checks(cfg, svc_list, show_host_compat=show_host_compat):
+    if not local and _run_remote_checks(cfg, stack_list, show_host_compat=show_host_compat):
        has_errors = True

-    # Check for orphaned services (in state but removed from config)
-    if _report_orphaned_services(cfg):
+    # Check for orphaned stacks (in state but removed from config)
+    if _report_orphaned_stacks(cfg):
        has_errors = True

    if has_errors:
@@ -517,7 +614,7 @@ def init_network(
 ) -> None:
    """Create Docker network on hosts with consistent settings.

-    Creates an external Docker network that services can use for cross-host
+    Creates an external Docker network that stacks can use for cross-host
    communication. Uses the same subnet/gateway on all hosts to ensure
    consistent networking.
    """
@@ -534,7 +631,7 @@ def init_network(

        if check_result.success:
            console.print(f"[cyan]\\[{host_name}][/] Network '{network}' already exists")
-            return CommandResult(service=host_name, exit_code=0, success=True)
+            return CommandResult(stack=host_name, exit_code=0, success=True)

        # Create the network
        create_cmd = (
--- a/src/compose_farm/cli/monitoring.py
+++ b/src/compose_farm/cli/monitoring.py
@@ -14,16 +14,17 @@ from compose_farm.cli.common import (
    AllOption,
    ConfigOption,
    HostOption,
-    ServicesArg,
-    get_services,
+    ServiceOption,
+    StacksArg,
+    get_stacks,
    load_config_or_exit,
    report_results,
    run_async,
    run_parallel_with_progress,
 )
-from compose_farm.console import console
-from compose_farm.executor import run_command, run_on_services
-from compose_farm.state import get_services_needing_migration, group_services_by_host, load_state
+from compose_farm.console import console, print_error
+from compose_farm.executor import run_command, run_on_stacks
+from compose_farm.state import get_stacks_needing_migration, group_stacks_by_host, load_state

 if TYPE_CHECKING:
    from compose_farm.config import Config
@@ -51,7 +52,7 @@ def _get_container_counts(cfg: Config) -> dict[str, int]:

 def _build_host_table(
    cfg: Config,
-    services_by_host: dict[str, list[str]],
+    stacks_by_host: dict[str, list[str]],
    running_by_host: dict[str, list[str]],
    container_counts: dict[str, int],
    *,
@@ -68,7 +69,7 @@ def _build_host_table(

    for host_name in sorted(cfg.hosts.keys()):
        host = cfg.hosts[host_name]
-        configured = len(services_by_host[host_name])
+        configured = len(stacks_by_host[host_name])
        running = len(running_by_host[host_name])

        row = [
@@ -96,8 +97,8 @@ def _build_summary_table(
    table.add_column("Value", style="bold")

    table.add_row("Total hosts", str(len(cfg.hosts)))
-    table.add_row("Services (configured)", str(len(cfg.services)))
-    table.add_row("Services (tracked)", str(len(state)))
+    table.add_row("Stacks (configured)", str(len(cfg.stacks)))
+    table.add_row("Stacks (tracked)", str(len(state)))
    table.add_row("Compose files on disk", str(len(on_disk)))

    if pending:
@@ -115,9 +116,10 @@ def _build_summary_table(

@app.command(rich_help_panel="Monitoring")
 def logs(
-    services: ServicesArg = None,
-    all_services: AllOption = False,
+    stacks: StacksArg = None,
+    all_stacks: AllOption = False,
    host: HostOption = None,
+    service: ServiceOption = None,
    follow: Annotated[bool, typer.Option("--follow", "-f", help="Follow logs")] = False,
    tail: Annotated[
        int | None,
@@ -125,34 +127,45 @@ def logs(
    ] = None,
    config: ConfigOption = None,
 ) -> None:
-    """Show service logs."""
-    svc_list, cfg = get_services(services or [], all_services, config, host=host)
+    """Show stack logs. With --service, shows logs for just that service."""
+    stack_list, cfg = get_stacks(stacks or [], all_stacks, config, host=host)
+    if service and len(stack_list) != 1:
+        print_error("--service requires exactly one stack")
+        raise typer.Exit(1)

-    # Default to fewer lines when showing multiple services
-    many_services = all_services or host is not None or len(svc_list) > 1
-    effective_tail = tail if tail is not None else (20 if many_services else 100)
+    # Default to fewer lines when showing multiple stacks
+    many_stacks = all_stacks or host is not None or len(stack_list) > 1
+    effective_tail = tail if tail is not None else (20 if many_stacks else 100)
    cmd = f"logs --tail {effective_tail}"
    if follow:
        cmd += " -f"
-    results = run_async(run_on_services(cfg, svc_list, cmd))
+    if service:
+        cmd += f" {service}"
+    results = run_async(run_on_stacks(cfg, stack_list, cmd))
    report_results(results)


@app.command(rich_help_panel="Monitoring")
 def ps(
-    services: ServicesArg = None,
-    all_services: AllOption = False,
+    stacks: StacksArg = None,
+    all_stacks: AllOption = False,
    host: HostOption = None,
+    service: ServiceOption = None,
    config: ConfigOption = None,
 ) -> None:
-    """Show status of services.
+    """Show status of stacks.

-    Without arguments: shows all services (same as --all).
-    With service names: shows only those services.
-    With --host: shows services on that host.
+    Without arguments: shows all stacks (same as --all).
+    With stack names: shows only those stacks.
+    With --host: shows stacks on that host.
+    With --service: filters to a specific service within the stack.
    """
-    svc_list, cfg = get_services(services or [], all_services, config, host=host, default_all=True)
-    results = run_async(run_on_services(cfg, svc_list, "ps"))
+    stack_list, cfg = get_stacks(stacks or [], all_stacks, config, host=host, default_all=True)
+    if service and len(stack_list) != 1:
+        print_error("--service requires exactly one stack")
+        raise typer.Exit(1)
+    cmd = f"ps {service}" if service else "ps"
+    results = run_async(run_on_stacks(cfg, stack_list, cmd))
    report_results(results)


@@ -164,25 +177,25 @@ def stats(
    ] = False,
    config: ConfigOption = None,
 ) -> None:
-    """Show overview statistics for hosts and services.
+    """Show overview statistics for hosts and stacks.

-    Without --live: Shows config/state info (hosts, services, pending migrations).
+    Without --live: Shows config/state info (hosts, stacks, pending migrations).
    With --live: Also queries Docker on each host for container counts.
    """
    cfg = load_config_or_exit(config)
    state = load_state(cfg)
-    pending = get_services_needing_migration(cfg)
+    pending = get_stacks_needing_migration(cfg)

    all_hosts = list(cfg.hosts.keys())
-    services_by_host = group_services_by_host(cfg.services, cfg.hosts, all_hosts)
-    running_by_host = group_services_by_host(state, cfg.hosts, all_hosts)
+    stacks_by_host = group_stacks_by_host(cfg.stacks, cfg.hosts, all_hosts)
+    running_by_host = group_stacks_by_host(state, cfg.hosts, all_hosts)

    container_counts: dict[str, int] = {}
    if live:
        container_counts = _get_container_counts(cfg)

    host_table = _build_host_table(
-        cfg, services_by_host, running_by_host, container_counts, show_containers=live
+        cfg, stacks_by_host, running_by_host, container_counts, show_containers=live
    )
    console.print(host_table)

--- a/src/compose_farm/compose.py
+++ b/src/compose_farm/compose.py
@@ -13,6 +13,7 @@ from pathlib import Path
 from typing import TYPE_CHECKING, Any

 import yaml
+from dotenv import dotenv_values

 if TYPE_CHECKING:
    from .config import Config
@@ -40,25 +41,37 @@ def _load_env(compose_path: Path) -> dict[str, str]:
    Reads from .env file in the same directory as compose file,
    then overlays current environment variables.
    """
-    env: dict[str, str] = {}
    env_path = compose_path.parent / ".env"
-    if env_path.exists():
-        for line in env_path.read_text().splitlines():
-            stripped = line.strip()
-            if not stripped or stripped.startswith("#") or "=" not in stripped:
-                continue
-            key, value = stripped.split("=", 1)
-            key = key.strip()
-            value = value.strip()
-            if (value.startswith('"') and value.endswith('"')) or (
-                value.startswith("'") and value.endswith("'")
-            ):
-                value = value[1:-1]
-            env[key] = value
+    env: dict[str, str] = {k: v for k, v in dotenv_values(env_path).items() if v is not None}
    env.update({k: v for k, v in os.environ.items() if isinstance(v, str)})
    return env


+def parse_compose_data(content: str) -> dict[str, Any]:
+    """Parse compose YAML content into a dict."""
+    compose_data = yaml.safe_load(content) or {}
+    return compose_data if isinstance(compose_data, dict) else {}
+
+
+def load_compose_data(compose_path: Path) -> dict[str, Any]:
+    """Load compose YAML from a file path."""
+    return parse_compose_data(compose_path.read_text())
+
+
+def load_compose_data_for_stack(config: Config, stack: str) -> tuple[Path, dict[str, Any]]:
+    """Load compose YAML for a stack, returning (path, data)."""
+    compose_path = config.get_compose_path(stack)
+    if not compose_path.exists():
+        return compose_path, {}
+    return compose_path, load_compose_data(compose_path)
+
+
+def extract_services(compose_data: dict[str, Any]) -> dict[str, Any]:
+    """Extract services mapping from compose data."""
+    raw_services = compose_data.get("services", {})
+    return raw_services if isinstance(raw_services, dict) else {}
+
+
 def _interpolate(value: str, env: dict[str, str]) -> str:
    """Perform ${VAR} and ${VAR:-default} interpolation."""

@@ -179,22 +192,21 @@ def _parse_volume_item(
    return host_path


-def parse_host_volumes(config: Config, service: str) -> list[str]:
-    """Extract host bind mount paths from a service's compose file.
+def parse_host_volumes(config: Config, stack: str) -> list[str]:
+    """Extract host bind mount paths from a stack's compose file.

    Returns a list of absolute host paths used as volume mounts.
    Skips named volumes and resolves relative paths.
    """
-    compose_path = config.get_compose_path(service)
+    compose_path, compose_data = load_compose_data_for_stack(config, stack)
    if not compose_path.exists():
        return []

-    env = _load_env(compose_path)
-    compose_data = yaml.safe_load(compose_path.read_text()) or {}
-    raw_services = compose_data.get("services", {})
-    if not isinstance(raw_services, dict):
+    raw_services = extract_services(compose_data)
+    if not raw_services:
        return []

+    env = _load_env(compose_path)
    paths: list[str] = []
    compose_dir = compose_path.parent

@@ -216,21 +228,20 @@ def parse_host_volumes(config: Config, service: str) -> list[str]:
    return list(dict.fromkeys(paths))


-def parse_devices(config: Config, service: str) -> list[str]:
-    """Extract host device paths from a service's compose file.
+def parse_devices(config: Config, stack: str) -> list[str]:
+    """Extract host device paths from a stack's compose file.

    Returns a list of host device paths (e.g., /dev/dri, /dev/dri/renderD128).
    """
-    compose_path = config.get_compose_path(service)
+    compose_path, compose_data = load_compose_data_for_stack(config, stack)
    if not compose_path.exists():
        return []

-    env = _load_env(compose_path)
-    compose_data = yaml.safe_load(compose_path.read_text()) or {}
-    raw_services = compose_data.get("services", {})
-    if not isinstance(raw_services, dict):
+    raw_services = extract_services(compose_data)
+    if not raw_services:
        return []

+    env = _load_env(compose_path)
    devices: list[str] = []
    for definition in raw_services.values():
        if not isinstance(definition, dict):
@@ -255,16 +266,15 @@ def parse_devices(config: Config, service: str) -> list[str]:
    return list(dict.fromkeys(devices))


-def parse_external_networks(config: Config, service: str) -> list[str]:
-    """Extract external network names from a service's compose file.
+def parse_external_networks(config: Config, stack: str) -> list[str]:
+    """Extract external network names from a stack's compose file.

    Returns a list of network names marked as external: true.
    """
-    compose_path = config.get_compose_path(service)
+    compose_path, compose_data = load_compose_data_for_stack(config, stack)
    if not compose_path.exists():
        return []

-    compose_data = yaml.safe_load(compose_path.read_text()) or {}
    networks = compose_data.get("networks", {})
    if not isinstance(networks, dict):
        return []
@@ -285,15 +295,14 @@ def load_compose_services(

    Returns (services_dict, env_dict, host_address).
    """
-    compose_path = config.get_compose_path(stack)
+    compose_path, compose_data = load_compose_data_for_stack(config, stack)
    if not compose_path.exists():
        message = f"[{stack}] Compose file not found: {compose_path}"
        raise FileNotFoundError(message)

    env = _load_env(compose_path)
-    compose_data = yaml.safe_load(compose_path.read_text()) or {}
-    raw_services = compose_data.get("services", {})
-    if not isinstance(raw_services, dict):
+    raw_services = extract_services(compose_data)
+    if not raw_services:
        return {}, env, config.get_host(stack).address
    return raw_services, env, config.get_host(stack).address

@@ -336,3 +345,18 @@ def get_ports_for_service(
            if isinstance(ref_def, dict):
                return _parse_ports(ref_def.get("ports"), env)
    return _parse_ports(definition.get("ports"), env)
+
+
+def get_container_name(
+    service_name: str,
+    service_def: dict[str, Any] | None,
+    project_name: str,
+) -> str:
+    """Get the container name for a service.
+
+    Uses container_name from compose if set, otherwise defaults to {project}-{service}-1.
+    This matches Docker Compose's default naming convention.
+    """
+    if isinstance(service_def, dict) and service_def.get("container_name"):
+        return str(service_def["container_name"])
+    return f"{project_name}-{service_name}-1"
--- a/src/compose_farm/config.py
+++ b/src/compose_farm/config.py
@@ -4,6 +4,7 @@ from __future__ import annotations

 import getpass
 from pathlib import Path
+from typing import Any

 import yaml
 from pydantic import BaseModel, Field, model_validator
@@ -14,7 +15,7 @@ from .paths import config_search_paths, find_config_path
 COMPOSE_FILENAMES = ("compose.yaml", "compose.yml", "docker-compose.yml", "docker-compose.yaml")


-class Host(BaseModel):
+class Host(BaseModel, extra="forbid"):
    """SSH host configuration."""

    address: str
@@ -22,14 +23,17 @@ class Host(BaseModel):
    port: int = 22


-class Config(BaseModel):
+class Config(BaseModel, extra="forbid"):
    """Main configuration."""

    compose_dir: Path = Path("/opt/compose")
    hosts: dict[str, Host]
-    services: dict[str, str | list[str]]  # service_name -> host_name or list of hosts
+    stacks: dict[str, str | list[str]]  # stack_name -> host_name or list of hosts
    traefik_file: Path | None = None  # Auto-regenerate traefik config after up/down
-    traefik_service: str | None = None  # Service name for Traefik (skip its host in file-provider)
+    traefik_stack: str | None = None  # Stack name for Traefik (skip its host in file-provider)
+    glances_stack: str | None = (
+        None  # Stack name for Glances (enables host resource stats in web UI)
+    )
    config_path: Path = Path()  # Set by load_config()

    def get_state_path(self) -> Path:
@@ -37,70 +41,70 @@ class Config(BaseModel):
        return self.config_path.parent / "compose-farm-state.yaml"

    @model_validator(mode="after")
-    def validate_hosts_and_services(self) -> Config:
-        """Validate host names and service configurations."""
+    def validate_hosts_and_stacks(self) -> Config:
+        """Validate host names and stack configurations."""
        # "all" is reserved keyword, cannot be used as host name
        if "all" in self.hosts:
            msg = "'all' is a reserved keyword and cannot be used as a host name"
            raise ValueError(msg)

-        for service, host_value in self.services.items():
+        for stack, host_value in self.stacks.items():
            # Validate list configurations
            if isinstance(host_value, list):
                if not host_value:
-                    msg = f"Service '{service}' has empty host list"
+                    msg = f"Stack '{stack}' has empty host list"
                    raise ValueError(msg)
                if len(host_value) != len(set(host_value)):
-                    msg = f"Service '{service}' has duplicate hosts in list"
+                    msg = f"Stack '{stack}' has duplicate hosts in list"
                    raise ValueError(msg)

            # Validate all referenced hosts exist
-            host_names = self.get_hosts(service)
+            host_names = self.get_hosts(stack)
            for host_name in host_names:
                if host_name not in self.hosts:
-                    msg = f"Service '{service}' references unknown host '{host_name}'"
+                    msg = f"Stack '{stack}' references unknown host '{host_name}'"
                    raise ValueError(msg)
        return self

-    def get_hosts(self, service: str) -> list[str]:
-        """Get list of host names for a service.
+    def get_hosts(self, stack: str) -> list[str]:
+        """Get list of host names for a stack.

        Supports:
        - Single host: "truenas-debian" -> ["truenas-debian"]
        - All hosts: "all" -> list of all configured hosts
        - Explicit list: ["host1", "host2"] -> ["host1", "host2"]
        """
-        if service not in self.services:
-            msg = f"Unknown service: {service}"
+        if stack not in self.stacks:
+            msg = f"Unknown stack: {stack}"
            raise ValueError(msg)
-        host_value = self.services[service]
+        host_value = self.stacks[stack]
        if isinstance(host_value, list):
            return host_value
        if host_value == "all":
            return list(self.hosts.keys())
        return [host_value]

-    def is_multi_host(self, service: str) -> bool:
-        """Check if a service runs on multiple hosts."""
-        return len(self.get_hosts(service)) > 1
+    def is_multi_host(self, stack: str) -> bool:
+        """Check if a stack runs on multiple hosts."""
+        return len(self.get_hosts(stack)) > 1

-    def get_host(self, service: str) -> Host:
-        """Get host config for a service (first host if multi-host)."""
-        if service not in self.services:
-            msg = f"Unknown service: {service}"
+    def get_host(self, stack: str) -> Host:
+        """Get host config for a stack (first host if multi-host)."""
+        if stack not in self.stacks:
+            msg = f"Unknown stack: {stack}"
            raise ValueError(msg)
-        host_names = self.get_hosts(service)
+        host_names = self.get_hosts(stack)
        return self.hosts[host_names[0]]

-    def get_compose_path(self, service: str) -> Path:
-        """Get compose file path for a service (tries compose.yaml first)."""
-        service_dir = self.compose_dir / service
+    def get_compose_path(self, stack: str) -> Path:
+        """Get compose file path for a stack (tries compose.yaml first)."""
+        stack_dir = self.compose_dir / stack
        for filename in COMPOSE_FILENAMES:
-            candidate = service_dir / filename
+            candidate = stack_dir / filename
            if candidate.exists():
                return candidate
        # Default to compose.yaml if none exist (will error later)
-        return service_dir / "compose.yaml"
+        return stack_dir / "compose.yaml"

    def discover_compose_dirs(self) -> set[str]:
        """Find all directories in compose_dir that contain a compose file."""
@@ -113,7 +117,7 @@ class Config(BaseModel):
        return found


-def _parse_hosts(raw_hosts: dict[str, str | dict[str, str | int]]) -> dict[str, Host]:
+def _parse_hosts(raw_hosts: dict[str, Any]) -> dict[str, Host]:
    """Parse hosts from config, handling both simple and full forms."""
    hosts = {}
    for name, value in raw_hosts.items():
--- a/src/compose_farm/console.py
+++ b/src/compose_farm/console.py
@@ -9,7 +9,7 @@ err_console = Console(stderr=True, highlight=False)
 # --- Message Constants ---
 # Standardized message templates for consistent user-facing output

-MSG_SERVICE_NOT_FOUND = "Service [cyan]{name}[/] not found in config"
+MSG_STACK_NOT_FOUND = "Stack [cyan]{name}[/] not found in config"
 MSG_HOST_NOT_FOUND = "Host [magenta]{name}[/] not found in config"
 MSG_CONFIG_NOT_FOUND = "Config file not found"
 MSG_DRY_RUN = "[dim](dry-run: no changes made)[/]"
--- a/src/compose_farm/example-config.yaml
+++ b/src/compose_farm/example-config.yaml
@@ -1,7 +1,7 @@
 # Compose Farm configuration
 # Documentation: https://github.com/basnijholt/compose-farm
 #
-# This file configures compose-farm to manage Docker Compose services
+# This file configures compose-farm to manage Docker Compose stacks
 # across multiple hosts via SSH.
 #
 # Place this file at:
@@ -11,7 +11,7 @@
 #   - Or set CF_CONFIG environment variable

 # ------------------------------------------------------------------------------
-# compose_dir: Directory containing service subdirectories with compose files
+# compose_dir: Directory containing stack subdirectories with compose files
 # ------------------------------------------------------------------------------
 # Each subdirectory should contain a compose.yaml (or docker-compose.yml).
 # This path must be the same on all hosts (NFS mount recommended).
@@ -48,28 +48,28 @@ hosts:
    port: 2222

 # ------------------------------------------------------------------------------
-# services: Map service names to their target host(s)
+# stacks: Map stack names to their target host(s)
 # ------------------------------------------------------------------------------
-# Each service name must match a subdirectory in compose_dir.
+# Each stack name must match a subdirectory in compose_dir.
 #
 # Single host:
-#   service-name: hostname
+#   stack-name: hostname
 #
 # Multiple hosts (explicit list):
-#   service-name: [host1, host2]
+#   stack-name: [host1, host2]
 #
 # All hosts:
-#   service-name: all
+#   stack-name: all
 #
-services:
-  # Example: service runs on a single host
+stacks:
+  # Example: stack runs on a single host
  nginx: server1
  postgres: server2

-  # Example: service runs on multiple specific hosts
+  # Example: stack runs on multiple specific hosts
  # prometheus: [server1, server2]

-  # Example: service runs on ALL hosts (e.g., monitoring agents)
+  # Example: stack runs on ALL hosts (e.g., monitoring agents)
  # node-exporter: all

 # ------------------------------------------------------------------------------
@@ -81,9 +81,9 @@ services:
 # traefik_file: /opt/compose/traefik/dynamic.d/compose-farm.yml

 # ------------------------------------------------------------------------------
-# traefik_service: (optional) Service name running Traefik
+# traefik_stack: (optional) Stack name running Traefik
 # ------------------------------------------------------------------------------
-# When generating traefik_file, services on the same host as Traefik are
+# When generating traefik_file, stacks on the same host as Traefik are
 # skipped (they're handled by Traefik's Docker provider directly).
 #
-# traefik_service: traefik
+# traefik_stack: traefik
--- a/src/compose_farm/executor.py
+++ b/src/compose_farm/executor.py
@@ -5,6 +5,7 @@ from __future__ import annotations
 import asyncio
 import socket
 import subprocess
+import time
 from dataclasses import dataclass
 from functools import lru_cache
 from typing import TYPE_CHECKING, Any
@@ -23,6 +24,38 @@ LOCAL_ADDRESSES = frozenset({"local", "localhost", "127.0.0.1", "::1"})
 _DEFAULT_SSH_PORT = 22


+class TTLCache:
+    """Simple TTL cache for async function results."""
+
+    def __init__(self, ttl_seconds: float = 30.0) -> None:
+        """Initialize cache with default TTL in seconds."""
+        # Cache stores: key -> (timestamp, value, item_ttl)
+        self._cache: dict[str, tuple[float, Any, float]] = {}
+        self._default_ttl = ttl_seconds
+
+    def get(self, key: str) -> Any | None:
+        """Get value if exists and not expired."""
+        if key in self._cache:
+            timestamp, value, item_ttl = self._cache[key]
+            if time.monotonic() - timestamp < item_ttl:
+                return value
+            del self._cache[key]
+        return None
+
+    def set(self, key: str, value: Any, ttl_seconds: float | None = None) -> None:
+        """Set value with current timestamp and optional custom TTL."""
+        ttl = ttl_seconds if ttl_seconds is not None else self._default_ttl
+        self._cache[key] = (time.monotonic(), value, ttl)
+
+    def clear(self) -> None:
+        """Clear all cached values."""
+        self._cache.clear()
+
+
+# Cache compose labels per host for 30 seconds
+_compose_labels_cache = TTLCache(ttl_seconds=30.0)
+
+
 def _print_compose_command(
    host_name: str,
    compose_dir: str,
@@ -50,7 +83,7 @@ async def _stream_output_lines(
    *,
    is_stderr: bool = False,
 ) -> None:
-    """Stream lines from a reader to console with a service prefix.
+    """Stream lines from a reader to console with a stack prefix.

    Works with both asyncio.StreamReader (bytes) and asyncssh readers (str).
    If prefix is empty, output is printed without a prefix.
@@ -126,7 +159,7 @@ def _get_local_ips() -> frozenset[str]:
 class CommandResult:
    """Result of a command execution."""

-    service: str
+    stack: str
    exit_code: int
    success: bool
    stdout: str = ""
@@ -158,21 +191,26 @@ def ssh_connect_kwargs(host: Host) -> dict[str, Any]:
        "port": host.port,
        "username": host.user,
        "known_hosts": None,
+        "gss_auth": False,  # Disable GSSAPI - causes multi-second delays
    }
-    # Add SSH agent path (auto-detect forwarded agent if needed)
-    agent_path = get_ssh_auth_sock()
-    if agent_path:
-        kwargs["agent_path"] = agent_path
-    # Add key file fallback for when SSH agent is unavailable
+    # Add key file fallback (prioritized over agent if present)
    key_path = get_key_path()
+    agent_path = get_ssh_auth_sock()
+
    if key_path:
+        # If dedicated key exists, force use of it and ignore agent
+        # This avoids issues with stale/broken forwarded agents in Docker
        kwargs["client_keys"] = [str(key_path)]
+    elif agent_path:
+        # Fallback to agent if no dedicated key
+        kwargs["agent_path"] = agent_path
+
    return kwargs


 async def _run_local_command(
    command: str,
-    service: str,
+    stack: str,
    *,
    stream: bool = True,
    raw: bool = False,
@@ -189,7 +227,7 @@ async def _run_local_command(
            )
            await proc.wait()
            return CommandResult(
-                service=service,
+                stack=stack,
                exit_code=proc.returncode or 0,
                success=proc.returncode == 0,
            )
@@ -214,21 +252,21 @@ async def _run_local_command(
            await proc.wait()

        return CommandResult(
-            service=service,
+            stack=stack,
            exit_code=proc.returncode or 0,
            success=proc.returncode == 0,
            stdout=stdout_data.decode() if stdout_data else "",
            stderr=stderr_data.decode() if stderr_data else "",
        )
    except OSError as e:
-        err_console.print(f"[cyan]\\[{service}][/] [red]Local error:[/] {e}")
-        return CommandResult(service=service, exit_code=1, success=False)
+        err_console.print(f"[cyan]\\[{stack}][/] [red]Local error:[/] {e}")
+        return CommandResult(stack=stack, exit_code=1, success=False)


 async def _run_ssh_command(
    host: Host,
    command: str,
-    service: str,
+    stack: str,
    *,
    stream: bool = True,
    raw: bool = False,
@@ -238,11 +276,15 @@ async def _run_ssh_command(
    if raw:
        # Use native ssh with TTY for proper progress bar rendering
        ssh_args = build_ssh_command(host, command, tty=True)
+
+        def run_ssh() -> subprocess.CompletedProcess[bytes]:
+            return subprocess.run(ssh_args, check=False, env=get_ssh_env())
+
        # Run in thread to avoid blocking the event loop
        # Use get_ssh_env() to auto-detect SSH agent socket
-        result = await asyncio.to_thread(subprocess.run, ssh_args, check=False, env=get_ssh_env())
+        result = await asyncio.to_thread(run_ssh)
        return CommandResult(
-            service=service,
+            stack=stack,
            exit_code=result.returncode,
            success=result.returncode == 0,
        )
@@ -267,21 +309,21 @@ async def _run_ssh_command(

                await proc.wait()
                return CommandResult(
-                    service=service,
+                    stack=stack,
                    exit_code=proc.exit_status or 0,
                    success=proc.exit_status == 0,
                    stdout=stdout_data,
                    stderr=stderr_data,
                )
    except (OSError, asyncssh.Error) as e:
-        err_console.print(f"[cyan]\\[{service}][/] [red]SSH error:[/] {e}")
-        return CommandResult(service=service, exit_code=1, success=False)
+        err_console.print(f"[cyan]\\[{stack}][/] [red]SSH error:[/] {e}")
+        return CommandResult(stack=stack, exit_code=1, success=False)


 async def run_command(
    host: Host,
    command: str,
-    service: str,
+    stack: str,
    *,
    stream: bool = True,
    raw: bool = False,
@@ -292,45 +334,45 @@ async def run_command(
    Args:
        host: Host configuration
        command: Command to run
-        service: Service name (stored in result)
+        stack: Stack name (stored in result)
        stream: Whether to stream output (default True)
        raw: Whether to use raw mode with TTY (default False)
-        prefix: Output prefix. None=use service name, ""=no prefix.
+        prefix: Output prefix. None=use stack name, ""=no prefix.

    """
-    output_prefix = service if prefix is None else prefix
+    output_prefix = stack if prefix is None else prefix
    if is_local(host):
        return await _run_local_command(
-            command, service, stream=stream, raw=raw, prefix=output_prefix
+            command, stack, stream=stream, raw=raw, prefix=output_prefix
        )
    return await _run_ssh_command(
-        host, command, service, stream=stream, raw=raw, prefix=output_prefix
+        host, command, stack, stream=stream, raw=raw, prefix=output_prefix
    )


 async def run_compose(
    config: Config,
-    service: str,
+    stack: str,
    compose_cmd: str,
    *,
    stream: bool = True,
    raw: bool = False,
    prefix: str | None = None,
 ) -> CommandResult:
-    """Run a docker compose command for a service."""
-    host_name = config.get_hosts(service)[0]
+    """Run a docker compose command for a stack."""
+    host_name = config.get_hosts(stack)[0]
    host = config.hosts[host_name]
-    compose_path = config.get_compose_path(service)
+    compose_path = config.get_compose_path(stack)

    _print_compose_command(host_name, str(config.compose_dir), str(compose_path), compose_cmd)

    command = f"docker compose -f {compose_path} {compose_cmd}"
-    return await run_command(host, command, service, stream=stream, raw=raw, prefix=prefix)
+    return await run_command(host, command, stack, stream=stream, raw=raw, prefix=prefix)


 async def run_compose_on_host(
    config: Config,
-    service: str,
+    stack: str,
    host_name: str,
    compose_cmd: str,
    *,
@@ -338,68 +380,68 @@ async def run_compose_on_host(
    raw: bool = False,
    prefix: str | None = None,
 ) -> CommandResult:
-    """Run a docker compose command for a service on a specific host.
+    """Run a docker compose command for a stack on a specific host.

    Used for migration - running 'down' on the old host before 'up' on new host.
    """
    host = config.hosts[host_name]
-    compose_path = config.get_compose_path(service)
+    compose_path = config.get_compose_path(stack)

    _print_compose_command(host_name, str(config.compose_dir), str(compose_path), compose_cmd)

    command = f"docker compose -f {compose_path} {compose_cmd}"
-    return await run_command(host, command, service, stream=stream, raw=raw, prefix=prefix)
+    return await run_command(host, command, stack, stream=stream, raw=raw, prefix=prefix)


-async def run_on_services(
+async def run_on_stacks(
    config: Config,
-    services: list[str],
+    stacks: list[str],
    compose_cmd: str,
    *,
    stream: bool = True,
    raw: bool = False,
 ) -> list[CommandResult]:
-    """Run a docker compose command on multiple services in parallel.
+    """Run a docker compose command on multiple stacks in parallel.

-    For multi-host services, runs on all configured hosts.
-    Note: raw=True only makes sense for single-service operations.
+    For multi-host stacks, runs on all configured hosts.
+    Note: raw=True only makes sense for single-stack operations.
    """
-    return await run_sequential_on_services(config, services, [compose_cmd], stream=stream, raw=raw)
+    return await run_sequential_on_stacks(config, stacks, [compose_cmd], stream=stream, raw=raw)


-async def _run_sequential_commands(
+async def _run_sequential_stack_commands(
    config: Config,
-    service: str,
+    stack: str,
    commands: list[str],
    *,
    stream: bool = True,
    raw: bool = False,
    prefix: str | None = None,
 ) -> CommandResult:
-    """Run multiple compose commands sequentially for a service."""
+    """Run multiple compose commands sequentially for a stack."""
    for cmd in commands:
-        result = await run_compose(config, service, cmd, stream=stream, raw=raw, prefix=prefix)
+        result = await run_compose(config, stack, cmd, stream=stream, raw=raw, prefix=prefix)
        if not result.success:
            return result
-    return CommandResult(service=service, exit_code=0, success=True)
+    return CommandResult(stack=stack, exit_code=0, success=True)


-async def _run_sequential_commands_multi_host(
+async def _run_sequential_stack_commands_multi_host(
    config: Config,
-    service: str,
+    stack: str,
    commands: list[str],
    *,
    stream: bool = True,
    raw: bool = False,
    prefix: str | None = None,
 ) -> list[CommandResult]:
-    """Run multiple compose commands sequentially for a multi-host service.
+    """Run multiple compose commands sequentially for a multi-host stack.

    Commands are run sequentially, but each command runs on all hosts in parallel.
-    For multi-host services, prefix defaults to service@host format.
+    For multi-host stacks, prefix defaults to stack@host format.
    """
-    host_names = config.get_hosts(service)
-    compose_path = config.get_compose_path(service)
+    host_names = config.get_hosts(stack)
+    compose_path = config.get_compose_path(stack)
    final_results: list[CommandResult] = []

    for cmd in commands:
@@ -408,10 +450,10 @@ async def _run_sequential_commands_multi_host(
        for host_name in host_names:
            _print_compose_command(host_name, str(config.compose_dir), str(compose_path), cmd)
            host = config.hosts[host_name]
-            # For multi-host services, always use service@host prefix to distinguish output
-            label = f"{service}@{host_name}" if len(host_names) > 1 else service
-            # Multi-host services always need prefixes to distinguish output from different hosts
-            # (ignore empty prefix from single-service batches - we still need to distinguish hosts)
+            # For multi-host stacks, always use stack@host prefix to distinguish output
+            label = f"{stack}@{host_name}" if len(host_names) > 1 else stack
+            # Multi-host stacks always need prefixes to distinguish output from different hosts
+            # (ignore empty prefix from single-stack batches - we still need to distinguish hosts)
            effective_prefix = label if len(host_names) > 1 else prefix
            tasks.append(
                run_command(host, command, label, stream=stream, raw=raw, prefix=effective_prefix)
@@ -427,37 +469,37 @@ async def _run_sequential_commands_multi_host(
    return final_results


-async def run_sequential_on_services(
+async def run_sequential_on_stacks(
    config: Config,
-    services: list[str],
+    stacks: list[str],
    commands: list[str],
    *,
    stream: bool = True,
    raw: bool = False,
 ) -> list[CommandResult]:
-    """Run sequential commands on multiple services in parallel.
+    """Run sequential commands on multiple stacks in parallel.

-    For multi-host services, runs on all configured hosts.
-    Note: raw=True only makes sense for single-service operations.
+    For multi-host stacks, runs on all configured hosts.
+    Note: raw=True only makes sense for single-stack operations.
    """
-    # Skip prefix for single-service operations (command line already shows context)
-    prefix: str | None = "" if len(services) == 1 else None
+    # Skip prefix for single-stack operations (command line already shows context)
+    prefix: str | None = "" if len(stacks) == 1 else None

-    # Separate multi-host and single-host services for type-safe gathering
+    # Separate multi-host and single-host stacks for type-safe gathering
    multi_host_tasks = []
    single_host_tasks = []

-    for service in services:
-        if config.is_multi_host(service):
+    for stack in stacks:
+        if config.is_multi_host(stack):
            multi_host_tasks.append(
-                _run_sequential_commands_multi_host(
-                    config, service, commands, stream=stream, raw=raw, prefix=prefix
+                _run_sequential_stack_commands_multi_host(
+                    config, stack, commands, stream=stream, raw=raw, prefix=prefix
                )
            )
        else:
            single_host_tasks.append(
-                _run_sequential_commands(
-                    config, service, commands, stream=stream, raw=raw, prefix=prefix
+                _run_sequential_stack_commands(
+                    config, stack, commands, stream=stream, raw=raw, prefix=prefix
                )
            )

@@ -476,23 +518,89 @@ async def run_sequential_on_services(
    return flat_results


-async def check_service_running(
+async def check_stack_running(
    config: Config,
-    service: str,
+    stack: str,
    host_name: str,
 ) -> bool:
-    """Check if a service has running containers on a specific host."""
+    """Check if a stack has running containers on a specific host."""
    host = config.hosts[host_name]
-    compose_path = config.get_compose_path(service)
+    compose_path = config.get_compose_path(stack)

    # Use ps --status running to check for running containers
    command = f"docker compose -f {compose_path} ps --status running -q"
-    result = await run_command(host, command, service, stream=False)
+    result = await run_command(host, command, stack, stream=False)

    # If command succeeded and has output, containers are running
    return result.success and bool(result.stdout.strip())


+async def get_running_stacks_on_host(
+    config: Config,
+    host_name: str,
+) -> set[str]:
+    """Get all running compose stacks on a host in a single SSH call.
+
+    Uses docker ps with the compose.project label to identify running stacks.
+    Much more efficient than checking each stack individually.
+    """
+    host = config.hosts[host_name]
+
+    # Get unique project names from running containers
+    command = "docker ps --format '{{.Label \"com.docker.compose.project\"}}' | sort -u"
+    result = await run_command(host, command, stack=host_name, stream=False, prefix="")
+
+    if not result.success:
+        return set()
+
+    # Filter out empty lines and return as set
+    return {line.strip() for line in result.stdout.splitlines() if line.strip()}
+
+
+async def get_container_compose_labels(
+    config: Config,
+    host_name: str,
+) -> dict[str, tuple[str, str]]:
+    """Get compose labels for all containers on a host.
+
+    Returns dict of container_name -> (project, service).
+    Includes all containers (-a flag) since Glances shows stopped containers too.
+    Falls back to empty dict on timeout/error (5s timeout).
+    Results are cached for 30 seconds to reduce SSH overhead.
+    """
+    # Check cache first
+    cached: dict[str, tuple[str, str]] | None = _compose_labels_cache.get(host_name)
+    if cached is not None:
+        return cached
+
+    host = config.hosts[host_name]
+    cmd = (
+        "docker ps -a --format "
+        '\'{{.Names}}\t{{.Label "com.docker.compose.project"}}\t'
+        '{{.Label "com.docker.compose.service"}}\''
+    )
+
+    try:
+        async with asyncio.timeout(5.0):
+            result = await run_command(host, cmd, stack=host_name, stream=False, prefix="")
+    except TimeoutError:
+        return {}
+    except Exception:
+        return {}
+
+    labels: dict[str, tuple[str, str]] = {}
+    if result.success:
+        for line in result.stdout.splitlines():
+            parts = line.strip().split("\t")
+            if len(parts) >= 3:  # noqa: PLR2004
+                name, project, service = parts[0], parts[1], parts[2]
+                labels[name] = (project or "", service or "")
+
+    # Cache the result
+    _compose_labels_cache.set(host_name, labels)
+    return labels
+
+
 async def _batch_check_existence(
    config: Config,
    host_name: str,
--- a/src/compose_farm/glances.py
+++ b/src/compose_farm/glances.py
@@ -0,0 +1,277 @@
+"""Glances API client for host resource monitoring."""
+
+from __future__ import annotations
+
+import asyncio
+import os
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Any
+
+from .executor import is_local
+
+if TYPE_CHECKING:
+    from .config import Config, Host
+
+# Default Glances REST API port
+DEFAULT_GLANCES_PORT = 61208
+
+
+def _get_glances_address(
+    host_name: str,
+    host: Host,
+    glances_container: str | None,
+) -> str:
+    """Get the address to use for Glances API requests.
+
+    When running in a Docker container (CF_WEB_STACK set), the local host's Glances
+    may not be reachable via its LAN IP due to Docker network isolation. In this case,
+    we use the Glances container name for the local host.
+    Set CF_LOCAL_HOST=<hostname> to explicitly specify which host is local.
+    """
+    # Only use container name when running inside a Docker container
+    in_container = os.environ.get("CF_WEB_STACK") is not None
+    if not in_container or not glances_container:
+        return host.address
+
+    # CF_LOCAL_HOST explicitly tells us which host to reach via container name
+    explicit_local = os.environ.get("CF_LOCAL_HOST")
+    if explicit_local and host_name == explicit_local:
+        return glances_container
+
+    # Fall back to is_local detection (may not work in container)
+    if is_local(host):
+        return glances_container
+
+    return host.address
+
+
+@dataclass
+class HostStats:
+    """Resource statistics for a host."""
+
+    host: str
+    cpu_percent: float
+    mem_percent: float
+    swap_percent: float
+    load: float
+    disk_percent: float
+    net_rx_rate: float = 0.0  # bytes/sec
+    net_tx_rate: float = 0.0  # bytes/sec
+    error: str | None = None
+
+    @classmethod
+    def from_error(cls, host: str, error: str) -> HostStats:
+        """Create a HostStats with an error."""
+        return cls(
+            host=host,
+            cpu_percent=0,
+            mem_percent=0,
+            swap_percent=0,
+            load=0,
+            disk_percent=0,
+            net_rx_rate=0,
+            net_tx_rate=0,
+            error=error,
+        )
+
+
+async def fetch_host_stats(
+    host_name: str,
+    host_address: str,
+    port: int = DEFAULT_GLANCES_PORT,
+    request_timeout: float = 10.0,
+) -> HostStats:
+    """Fetch stats from a single host's Glances API."""
+    import httpx  # noqa: PLC0415
+
+    base_url = f"http://{host_address}:{port}/api/4"
+
+    try:
+        async with httpx.AsyncClient(timeout=request_timeout) as client:
+            # Fetch quicklook stats (CPU, mem, load)
+            response = await client.get(f"{base_url}/quicklook")
+            if not response.is_success:
+                return HostStats.from_error(host_name, f"HTTP {response.status_code}")
+            data = response.json()
+
+            # Fetch filesystem stats for disk usage (root fs or max across all)
+            disk_percent = 0.0
+            try:
+                fs_response = await client.get(f"{base_url}/fs")
+                if fs_response.is_success:
+                    fs_data = fs_response.json()
+                    root = next((fs for fs in fs_data if fs.get("mnt_point") == "/"), None)
+                    disk_percent = (
+                        root.get("percent", 0)
+                        if root
+                        else max((fs.get("percent", 0) for fs in fs_data), default=0)
+                    )
+            except httpx.HTTPError:
+                pass  # Disk stats are optional
+
+            # Fetch network stats for rate (sum across non-loopback interfaces)
+            net_rx_rate, net_tx_rate = 0.0, 0.0
+            try:
+                net_response = await client.get(f"{base_url}/network")
+                if net_response.is_success:
+                    for iface in net_response.json():
+                        if not iface.get("interface_name", "").startswith("lo"):
+                            net_rx_rate += iface.get("bytes_recv_rate_per_sec") or 0
+                            net_tx_rate += iface.get("bytes_sent_rate_per_sec") or 0
+            except httpx.HTTPError:
+                pass  # Network stats are optional
+
+            return HostStats(
+                host=host_name,
+                cpu_percent=data.get("cpu", 0),
+                mem_percent=data.get("mem", 0),
+                swap_percent=data.get("swap", 0),
+                load=data.get("load", 0),
+                disk_percent=disk_percent,
+                net_rx_rate=net_rx_rate,
+                net_tx_rate=net_tx_rate,
+            )
+    except httpx.TimeoutException:
+        return HostStats.from_error(host_name, "timeout")
+    except httpx.HTTPError as e:
+        return HostStats.from_error(host_name, str(e))
+    except Exception as e:
+        return HostStats.from_error(host_name, str(e))
+
+
+async def fetch_all_host_stats(
+    config: Config,
+    port: int = DEFAULT_GLANCES_PORT,
+) -> dict[str, HostStats]:
+    """Fetch stats from all hosts in parallel."""
+    glances_container = config.glances_stack
+    tasks = [
+        fetch_host_stats(name, _get_glances_address(name, host, glances_container), port)
+        for name, host in config.hosts.items()
+    ]
+    results = await asyncio.gather(*tasks)
+    return {stats.host: stats for stats in results}
+
+
+@dataclass
+class ContainerStats:
+    """Container statistics from Glances."""
+
+    name: str
+    host: str
+    status: str
+    image: str
+    cpu_percent: float
+    memory_usage: int  # bytes
+    memory_limit: int  # bytes
+    memory_percent: float
+    network_rx: int  # cumulative bytes received
+    network_tx: int  # cumulative bytes sent
+    uptime: str
+    ports: str
+    engine: str  # docker, podman, etc.
+    stack: str = ""  # compose project name (from docker labels)
+    service: str = ""  # compose service name (from docker labels)
+
+
+def _parse_container(data: dict[str, Any], host_name: str) -> ContainerStats:
+    """Parse container data from Glances API response."""
+    # Image can be a list or string
+    image = data.get("image", ["unknown"])
+    if isinstance(image, list):
+        image = image[0] if image else "unknown"
+
+    # Calculate memory percent
+    mem_usage = data.get("memory_usage", 0) or 0
+    mem_limit = data.get("memory_limit", 1) or 1  # Avoid division by zero
+    mem_percent = (mem_usage / mem_limit) * 100 if mem_limit > 0 else 0
+
+    # Network stats
+    network = data.get("network", {}) or {}
+    network_rx = network.get("cumulative_rx", 0) or 0
+    network_tx = network.get("cumulative_tx", 0) or 0
+
+    return ContainerStats(
+        name=data.get("name", "unknown"),
+        host=host_name,
+        status=data.get("status", "unknown"),
+        image=image,
+        cpu_percent=data.get("cpu_percent", 0) or 0,
+        memory_usage=mem_usage,
+        memory_limit=mem_limit,
+        memory_percent=mem_percent,
+        network_rx=network_rx,
+        network_tx=network_tx,
+        uptime=data.get("uptime", ""),
+        ports=data.get("ports", "") or "",
+        engine=data.get("engine", "docker"),
+    )
+
+
+async def fetch_container_stats(
+    host_name: str,
+    host_address: str,
+    port: int = DEFAULT_GLANCES_PORT,
+    request_timeout: float = 10.0,
+) -> tuple[list[ContainerStats] | None, str | None]:
+    """Fetch container stats from a single host's Glances API.
+
+    Returns:
+        (containers, error_message)
+        - Success: ([...], None)
+        - Failure: (None, "error message")
+
+    """
+    import httpx  # noqa: PLC0415
+
+    url = f"http://{host_address}:{port}/api/4/containers"
+
+    try:
+        async with httpx.AsyncClient(timeout=request_timeout) as client:
+            response = await client.get(url)
+            if not response.is_success:
+                return None, f"HTTP {response.status_code}: {response.reason_phrase}"
+            data = response.json()
+            return [_parse_container(c, host_name) for c in data], None
+    except httpx.ConnectError:
+        return None, "Connection refused (Glances offline?)"
+    except httpx.TimeoutException:
+        return None, "Connection timed out"
+    except Exception as e:
+        return None, str(e)
+
+
+async def fetch_all_container_stats(
+    config: Config,
+    port: int = DEFAULT_GLANCES_PORT,
+) -> list[ContainerStats]:
+    """Fetch container stats from all hosts in parallel, enriched with compose labels."""
+    from .executor import get_container_compose_labels  # noqa: PLC0415
+
+    glances_container = config.glances_stack
+
+    async def fetch_host_data(
+        host_name: str,
+        host_address: str,
+    ) -> list[ContainerStats]:
+        # Fetch Glances stats and compose labels in parallel
+        stats_task = fetch_container_stats(host_name, host_address, port)
+        labels_task = get_container_compose_labels(config, host_name)
+        (containers, _), labels = await asyncio.gather(stats_task, labels_task)
+
+        if containers is None:
+            # Skip failed hosts in aggregate view
+            return []
+
+        # Enrich containers with compose labels (mutate in place)
+        for c in containers:
+            c.stack, c.service = labels.get(c.name, ("", ""))
+        return containers
+
+    tasks = [
+        fetch_host_data(name, _get_glances_address(name, host, glances_container))
+        for name, host in config.hosts.items()
+    ]
+    results = await asyncio.gather(*tasks)
+    # Flatten list of lists
+    return [container for host_containers in results for container in host_containers]
--- a/src/compose_farm/logs.py
+++ b/src/compose_farm/logs.py
@@ -6,28 +6,29 @@ import json
 import tomllib
 from dataclasses import dataclass
 from datetime import UTC, datetime
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING

-from .executor import run_compose
+from .executor import run_command
 from .paths import xdg_config_home

 if TYPE_CHECKING:
-    from collections.abc import Awaitable, Callable, Iterable
+    from collections.abc import Iterable
    from pathlib import Path

    from .config import Config
-    from .executor import CommandResult
+
+# Separator used to split output sections
+_SECTION_SEPARATOR = "---CF-SEP---"


 DEFAULT_LOG_PATH = xdg_config_home() / "compose-farm" / "dockerfarm-log.toml"
-_DIGEST_HEX_LENGTH = 64


@dataclass(frozen=True)
 class SnapshotEntry:
-    """Normalized image snapshot for a single service."""
+    """Normalized image snapshot for a single stack."""

-    service: str
+    stack: str
    host: str
    compose_file: Path
    image: str
@@ -37,7 +38,7 @@ class SnapshotEntry:
    def as_dict(self, first_seen: str, last_seen: str) -> dict[str, str]:
        """Render snapshot as a TOML-friendly dict."""
        return {
-            "service": self.service,
+            "stack": self.stack,
            "host": self.host,
            "compose_file": str(self.compose_file),
            "image": self.image,
@@ -56,87 +57,97 @@ def _escape(value: str) -> str:
    return value.replace("\\", "\\\\").replace('"', '\\"')


-def _parse_images_output(raw: str) -> list[dict[str, Any]]:
-    """Parse `docker compose images --format json` output.
-
-    Handles both a JSON array and newline-separated JSON objects for robustness.
-    """
-    raw = raw.strip()
-    if not raw:
-        return []
-
+def _parse_image_digests(image_json: str) -> dict[str, str]:
+    """Parse docker image inspect JSON to build image tag -> digest map."""
+    if not image_json:
+        return {}
    try:
-        parsed = json.loads(raw)
+        image_data = json.loads(image_json)
    except json.JSONDecodeError:
-        objects = []
-        for line in raw.splitlines():
-            if not line.strip():
-                continue
-            objects.append(json.loads(line))
-        return objects
+        return {}

-    if isinstance(parsed, list):
-        return parsed
-    if isinstance(parsed, dict):
-        return [parsed]
-    return []
+    image_digests: dict[str, str] = {}
+    for img in image_data:
+        tags = img.get("RepoTags") or []
+        digests = img.get("RepoDigests") or []
+        digest = digests[0].split("@")[-1] if digests else img.get("Id", "")
+        for tag in tags:
+            image_digests[tag] = digest
+        if img.get("Id"):
+            image_digests[img["Id"]] = digest
+    return image_digests


-def _extract_image_fields(record: dict[str, Any]) -> tuple[str, str]:
-    """Extract image name and digest with fallbacks."""
-    image = record.get("Image") or record.get("Repository") or record.get("Name") or ""
-    tag = record.get("Tag") or record.get("Version")
-    if tag and ":" not in image.rsplit("/", 1)[-1]:
-        image = f"{image}:{tag}"
-
-    digest = (
-        record.get("Digest")
-        or record.get("Image ID")
-        or record.get("ImageID")
-        or record.get("ID")
-        or ""
-    )
-
-    if digest and not digest.startswith("sha256:") and len(digest) == _DIGEST_HEX_LENGTH:
-        digest = f"sha256:{digest}"
-
-    return image, digest
-
-
-async def collect_service_entries(
+async def collect_stacks_entries_on_host(
    config: Config,
-    service: str,
+    host_name: str,
+    stacks: set[str],
    *,
    now: datetime,
-    run_compose_fn: Callable[..., Awaitable[CommandResult]] = run_compose,
 ) -> list[SnapshotEntry]:
-    """Run `docker compose images` for a service and normalize results."""
-    result = await run_compose_fn(config, service, "images --format json", stream=False)
+    """Collect image entries for stacks on one host using 2 docker commands.
+
+    Uses `docker ps` to get running containers + their compose project labels,
+    then `docker image inspect` to get digests for all unique images.
+    Much faster than running N `docker compose images` commands.
+    """
+    if not stacks:
+        return []
+
+    host = config.hosts[host_name]
+
+    # Single SSH call with 2 docker commands:
+    # 1. Get project|image pairs from running containers
+    # 2. Get image info (including digests) for all unique images
+    command = (
+        f"docker ps --format '{{{{.Label \"com.docker.compose.project\"}}}}|{{{{.Image}}}}' && "
+        f"echo '{_SECTION_SEPARATOR}' && "
+        "docker image inspect $(docker ps --format '{{.Image}}' | sort -u) 2>/dev/null || true"
+    )
+    result = await run_command(host, command, host_name, stream=False, prefix="")
+
    if not result.success:
-        msg = result.stderr or f"compose images exited with {result.exit_code}"
-        error = f"[{service}] Unable to read images: {msg}"
-        raise RuntimeError(error)
+        return []

-    records = _parse_images_output(result.stdout)
-    # Use first host for snapshots (multi-host services use same images on all hosts)
-    host_name = config.get_hosts(service)[0]
-    compose_path = config.get_compose_path(service)
+    # Split output into two sections
+    parts = result.stdout.split(_SECTION_SEPARATOR)
+    if len(parts) != 2:  # noqa: PLR2004
+        return []

-    entries: list[SnapshotEntry] = []
-    for record in records:
-        image, digest = _extract_image_fields(record)
-        if not digest:
+    container_lines, image_json = parts[0].strip(), parts[1].strip()
+
+    # Parse project|image pairs, filtering to only stacks we care about
+    stack_images: dict[str, set[str]] = {}
+    for line in container_lines.splitlines():
+        if "|" not in line:
            continue
-        entries.append(
-            SnapshotEntry(
-                service=service,
-                host=host_name,
-                compose_file=compose_path,
-                image=image,
-                digest=digest,
-                captured_at=now,
-            )
-        )
+        project, image = line.split("|", 1)
+        if project in stacks:
+            stack_images.setdefault(project, set()).add(image)
+
+    if not stack_images:
+        return []
+
+    # Parse image inspect JSON to build image -> digest map
+    image_digests = _parse_image_digests(image_json)
+
+    # Build entries
+    entries: list[SnapshotEntry] = []
+    for stack, images in stack_images.items():
+        for image in images:
+            digest = image_digests.get(image, "")
+            if digest:
+                entries.append(
+                    SnapshotEntry(
+                        stack=stack,
+                        host=host_name,
+                        compose_file=config.get_compose_path(stack),
+                        image=image,
+                        digest=digest,
+                        captured_at=now,
+                    )
+                )
+
    return entries


@@ -145,7 +156,14 @@ def load_existing_entries(log_path: Path) -> list[dict[str, str]]:
    if not log_path.exists():
        return []
    data = tomllib.loads(log_path.read_text())
-    return list(data.get("entries", []))
+    entries = list(data.get("entries", []))
+    normalized: list[dict[str, str]] = []
+    for entry in entries:
+        normalized_entry = dict(entry)
+        if "stack" not in normalized_entry and "service" in normalized_entry:
+            normalized_entry["stack"] = normalized_entry.pop("service")
+        normalized.append(normalized_entry)
+    return normalized


 def merge_entries(
@@ -156,11 +174,11 @@ def merge_entries(
 ) -> list[dict[str, str]]:
    """Merge new snapshot entries with existing ones, preserving first_seen timestamps."""
    merged: dict[tuple[str, str, str], dict[str, str]] = {
-        (e["service"], e["host"], e["digest"]): dict(e) for e in existing
+        (e["stack"], e["host"], e["digest"]): dict(e) for e in existing
    }

    for entry in new_entries:
-        key = (entry.service, entry.host, entry.digest)
+        key = (entry.stack, entry.host, entry.digest)
        first_seen = merged.get(key, {}).get("first_seen", now_iso)
        merged[key] = entry.as_dict(first_seen, now_iso)

@@ -175,10 +193,10 @@ def write_toml(log_path: Path, *, meta: dict[str, str], entries: list[dict[str,
    if entries:
        lines.append("")

-    for entry in sorted(entries, key=lambda e: (e["service"], e["host"], e["digest"])):
+    for entry in sorted(entries, key=lambda e: (e["stack"], e["host"], e["digest"])):
        lines.append("[[entries]]")
        for field in [
-            "service",
+            "stack",
            "host",
            "compose_file",
            "image",
--- a/src/compose_farm/operations.py
+++ b/src/compose_farm/operations.py
@@ -15,17 +15,17 @@ from .executor import (
    CommandResult,
    check_networks_exist,
    check_paths_exist,
-    check_service_running,
+    check_stack_running,
    run_command,
    run_compose,
    run_compose_on_host,
 )
 from .state import (
-    get_orphaned_services,
-    get_service_host,
-    remove_service,
-    set_multi_host_service,
-    set_service_host,
+    get_orphaned_stacks,
+    get_stack_host,
+    remove_stack,
+    set_multi_host_stack,
+    set_stack_host,
 )

 if TYPE_CHECKING:
@@ -37,7 +37,7 @@ class OperationInterruptedError(Exception):


 class PreflightResult(NamedTuple):
-    """Result of pre-flight checks for a service on a host."""
+    """Result of pre-flight checks for a stack on a host."""

    missing_paths: list[str]
    missing_networks: list[str]
@@ -51,7 +51,7 @@ class PreflightResult(NamedTuple):

 async def _run_compose_step(
    cfg: Config,
-    service: str,
+    stack: str,
    command: str,
    *,
    raw: bool,
@@ -59,9 +59,9 @@ async def _run_compose_step(
 ) -> CommandResult:
    """Run a compose command, handle raw output newline, and check for interrupts."""
    if host:
-        result = await run_compose_on_host(cfg, service, host, command, raw=raw)
+        result = await run_compose_on_host(cfg, stack, host, command, raw=raw)
    else:
-        result = await run_compose(cfg, service, command, raw=raw)
+        result = await run_compose(cfg, stack, command, raw=raw)
    if raw:
        print()  # Ensure newline after raw output
    if result.interrupted:
@@ -69,63 +69,69 @@ async def _run_compose_step(
    return result


-def get_service_paths(cfg: Config, service: str) -> list[str]:
-    """Get all required paths for a service (compose_dir + volumes)."""
+def get_stack_paths(cfg: Config, stack: str) -> list[str]:
+    """Get all required paths for a stack (compose_dir + volumes)."""
    paths = [str(cfg.compose_dir)]
-    paths.extend(parse_host_volumes(cfg, service))
+    paths.extend(parse_host_volumes(cfg, stack))
    return paths


-async def discover_service_host(cfg: Config, service: str) -> tuple[str, str | list[str] | None]:
-    """Discover where a service is running.
+class StackDiscoveryResult(NamedTuple):
+    """Result of discovering where a stack is running across all hosts."""

-    For multi-host services, checks all assigned hosts in parallel.
-    For single-host, checks assigned host first, then others.
+    stack: str
+    configured_hosts: list[str]  # From config (where it SHOULD run)
+    running_hosts: list[str]  # From reality (where it IS running)

-    Returns (service_name, host_or_hosts_or_none).
-    """
-    assigned_hosts = cfg.get_hosts(service)
+    @property
+    def is_multi_host(self) -> bool:
+        """Check if this is a multi-host stack."""
+        return len(self.configured_hosts) > 1

-    if cfg.is_multi_host(service):
-        # Check all assigned hosts in parallel
-        checks = await asyncio.gather(
-            *[check_service_running(cfg, service, h) for h in assigned_hosts]
-        )
-        running = [h for h, is_running in zip(assigned_hosts, checks, strict=True) if is_running]
-        return service, running if running else None
+    @property
+    def stray_hosts(self) -> list[str]:
+        """Hosts where stack is running but shouldn't be."""
+        return [h for h in self.running_hosts if h not in self.configured_hosts]

-    # Single-host: check assigned host first, then others
-    if await check_service_running(cfg, service, assigned_hosts[0]):
-        return service, assigned_hosts[0]
-    for host in cfg.hosts:
-        if host != assigned_hosts[0] and await check_service_running(cfg, service, host):
-            return service, host
-    return service, None
+    @property
+    def missing_hosts(self) -> list[str]:
+        """Hosts where stack should be running but isn't."""
+        return [h for h in self.configured_hosts if h not in self.running_hosts]
+
+    @property
+    def is_stray(self) -> bool:
+        """Stack is running on unauthorized host(s)."""
+        return len(self.stray_hosts) > 0
+
+    @property
+    def is_duplicate(self) -> bool:
+        """Single-host stack running on multiple hosts."""
+        return not self.is_multi_host and len(self.running_hosts) > 1


-async def check_service_requirements(
+async def check_stack_requirements(
    cfg: Config,
-    service: str,
+    stack: str,
    host_name: str,
 ) -> PreflightResult:
-    """Check if a service can run on a specific host.
+    """Check if a stack can run on a specific host.

    Verifies that all required paths (volumes), networks, and devices exist.
    """
    # Check mount paths
-    paths = get_service_paths(cfg, service)
+    paths = get_stack_paths(cfg, stack)
    path_exists = await check_paths_exist(cfg, host_name, paths)
    missing_paths = [p for p, found in path_exists.items() if not found]

    # Check external networks
-    networks = parse_external_networks(cfg, service)
+    networks = parse_external_networks(cfg, stack)
    missing_networks: list[str] = []
    if networks:
        net_exists = await check_networks_exist(cfg, host_name, networks)
        missing_networks = [n for n, found in net_exists.items() if not found]

    # Check devices
-    devices = parse_devices(cfg, service)
+    devices = parse_devices(cfg, stack)
    missing_devices: list[str] = []
    if devices:
        dev_exists = await check_paths_exist(cfg, host_name, devices)
@@ -136,7 +142,7 @@ async def check_service_requirements(

 async def _cleanup_and_rollback(
    cfg: Config,
-    service: str,
+    stack: str,
    target_host: str,
    current_host: str,
    prefix: str,
@@ -146,29 +152,29 @@ async def _cleanup_and_rollback(
 ) -> None:
    """Clean up failed start and attempt rollback to old host if it was running."""
    print_warning(f"{prefix} Cleaning up failed start on [magenta]{target_host}[/]")
-    await run_compose(cfg, service, "down", raw=raw)
+    await run_compose(cfg, stack, "down", raw=raw)

    if not was_running:
        err_console.print(
-            f"{prefix} [dim]Service was not running on [magenta]{current_host}[/], skipping rollback[/]"
+            f"{prefix} [dim]Stack was not running on [magenta]{current_host}[/], skipping rollback[/]"
        )
        return

    print_warning(f"{prefix} Rolling back to [magenta]{current_host}[/]...")
-    rollback_result = await run_compose_on_host(cfg, service, current_host, "up -d", raw=raw)
+    rollback_result = await run_compose_on_host(cfg, stack, current_host, "up -d", raw=raw)
    if rollback_result.success:
        print_success(f"{prefix} Rollback succeeded on [magenta]{current_host}[/]")
    else:
-        print_error(f"{prefix} Rollback failed - service is down")
+        print_error(f"{prefix} Rollback failed - stack is down")


 def _report_preflight_failures(
-    service: str,
+    stack: str,
    target_host: str,
    preflight: PreflightResult,
 ) -> None:
    """Report pre-flight check failures."""
-    print_error(f"[cyan]\\[{service}][/] Cannot start on [magenta]{target_host}[/]:")
+    print_error(f"[cyan]\\[{stack}][/] Cannot start on [magenta]{target_host}[/]:")
    for path in preflight.missing_paths:
        print_error(f"  missing path: {path}")
    for net in preflight.missing_networks:
@@ -179,27 +185,25 @@ def _report_preflight_failures(
        print_error(f"  missing device: {dev}")


-async def _up_multi_host_service(
+async def _up_multi_host_stack(
    cfg: Config,
-    service: str,
+    stack: str,
    prefix: str,
    *,
    raw: bool = False,
 ) -> list[CommandResult]:
-    """Start a multi-host service on all configured hosts."""
-    host_names = cfg.get_hosts(service)
+    """Start a multi-host stack on all configured hosts."""
+    host_names = cfg.get_hosts(stack)
    results: list[CommandResult] = []
-    compose_path = cfg.get_compose_path(service)
+    compose_path = cfg.get_compose_path(stack)
    command = f"docker compose -f {compose_path} up -d"

    # Pre-flight checks on all hosts
    for host_name in host_names:
-        preflight = await check_service_requirements(cfg, service, host_name)
+        preflight = await check_stack_requirements(cfg, stack, host_name)
        if not preflight.ok:
-            _report_preflight_failures(service, host_name, preflight)
-            results.append(
-                CommandResult(service=f"{service}@{host_name}", exit_code=1, success=False)
-            )
+            _report_preflight_failures(stack, host_name, preflight)
+            results.append(CommandResult(stack=f"{stack}@{host_name}", exit_code=1, success=False))
            return results

    # Start on all hosts
@@ -209,7 +213,7 @@ async def _up_multi_host_service(
    succeeded_hosts: list[str] = []
    for host_name in host_names:
        host = cfg.hosts[host_name]
-        label = f"{service}@{host_name}"
+        label = f"{stack}@{host_name}"
        result = await run_command(host, command, label, stream=not raw, raw=raw)
        if raw:
            print()  # Ensure newline after raw output
@@ -219,72 +223,70 @@ async def _up_multi_host_service(

    # Update state with hosts that succeeded (partial success is tracked)
    if succeeded_hosts:
-        set_multi_host_service(cfg, service, succeeded_hosts)
+        set_multi_host_stack(cfg, stack, succeeded_hosts)

    return results


-async def _migrate_service(
+async def _migrate_stack(
    cfg: Config,
-    service: str,
+    stack: str,
    current_host: str,
    target_host: str,
    prefix: str,
    *,
    raw: bool = False,
 ) -> CommandResult | None:
-    """Migrate a service from current_host to target_host.
+    """Migrate a stack from current_host to target_host.

-    Pre-pulls/builds images on target, then stops service on current host.
+    Pre-pulls/builds images on target, then stops stack on current host.
    Returns failure result if migration prep fails, None on success.
    """
    console.print(
        f"{prefix} Migrating from [magenta]{current_host}[/] → [magenta]{target_host}[/]..."
    )

-    # Prepare images on target host before stopping old service to minimize downtime.
-    # Pull handles image-based services; build handles Dockerfile-based services.
+    # Prepare images on target host before stopping old stack to minimize downtime.
+    # Pull handles image-based compose services; build handles Dockerfile-based ones.
    # --ignore-buildable makes pull skip images that have build: defined.
    for cmd, label in [("pull --ignore-buildable", "Pull"), ("build", "Build")]:
-        result = await _run_compose_step(cfg, service, cmd, raw=raw)
+        result = await _run_compose_step(cfg, stack, cmd, raw=raw)
        if not result.success:
            print_error(
                f"{prefix} {label} failed on [magenta]{target_host}[/], "
-                "leaving service on current host"
+                "leaving stack on current host"
            )
            return result

    # Stop on current host
-    down_result = await _run_compose_step(cfg, service, "down", raw=raw, host=current_host)
+    down_result = await _run_compose_step(cfg, stack, "down", raw=raw, host=current_host)
    return down_result if not down_result.success else None


-async def _up_single_service(
+async def _up_single_stack(
    cfg: Config,
-    service: str,
+    stack: str,
    prefix: str,
    *,
    raw: bool,
 ) -> CommandResult:
-    """Start a single-host service with migration support."""
-    target_host = cfg.get_hosts(service)[0]
-    current_host = get_service_host(cfg, service)
+    """Start a single-host stack with migration support."""
+    target_host = cfg.get_hosts(stack)[0]
+    current_host = get_stack_host(cfg, stack)

    # Pre-flight check: verify paths, networks, and devices exist on target
-    preflight = await check_service_requirements(cfg, service, target_host)
+    preflight = await check_stack_requirements(cfg, stack, target_host)
    if not preflight.ok:
-        _report_preflight_failures(service, target_host, preflight)
-        return CommandResult(service=service, exit_code=1, success=False)
+        _report_preflight_failures(stack, target_host, preflight)
+        return CommandResult(stack=stack, exit_code=1, success=False)

-    # If service is deployed elsewhere, migrate it
+    # If stack is deployed elsewhere, migrate it
    did_migration = False
    was_running = False
    if current_host and current_host != target_host:
        if current_host in cfg.hosts:
-            was_running = await check_service_running(cfg, service, current_host)
-            failure = await _migrate_service(
-                cfg, service, current_host, target_host, prefix, raw=raw
-            )
+            was_running = await check_stack_running(cfg, stack, current_host)
+            failure = await _migrate_stack(cfg, stack, current_host, target_host, prefix, raw=raw)
            if failure:
                return failure
            did_migration = True
@@ -295,15 +297,15 @@ async def _up_single_service(

    # Start on target host
    console.print(f"{prefix} Starting on [magenta]{target_host}[/]...")
-    up_result = await _run_compose_step(cfg, service, "up -d", raw=raw)
+    up_result = await _run_compose_step(cfg, stack, "up -d", raw=raw)

    # Update state on success, or rollback on failure
    if up_result.success:
-        set_service_host(cfg, service, target_host)
+        set_stack_host(cfg, stack, target_host)
    elif did_migration and current_host:
        await _cleanup_and_rollback(
            cfg,
-            service,
+            stack,
            target_host,
            current_host,
            prefix,
@@ -314,24 +316,24 @@ async def _up_single_service(
    return up_result


-async def up_services(
+async def up_stacks(
    cfg: Config,
-    services: list[str],
+    stacks: list[str],
    *,
    raw: bool = False,
 ) -> list[CommandResult]:
-    """Start services with automatic migration if host changed."""
+    """Start stacks with automatic migration if host changed."""
    results: list[CommandResult] = []
-    total = len(services)
+    total = len(stacks)

    try:
-        for idx, service in enumerate(services, 1):
-            prefix = f"[dim][{idx}/{total}][/] [cyan]\\[{service}][/]"
+        for idx, stack in enumerate(stacks, 1):
+            prefix = f"[dim][{idx}/{total}][/] [cyan]\\[{stack}][/]"

-            if cfg.is_multi_host(service):
-                results.extend(await _up_multi_host_service(cfg, service, prefix, raw=raw))
+            if cfg.is_multi_host(stack):
+                results.extend(await _up_multi_host_stack(cfg, stack, prefix, raw=raw))
            else:
-                results.append(await _up_single_service(cfg, service, prefix, raw=raw))
+                results.append(await _up_single_stack(cfg, stack, prefix, raw=raw))
    except OperationInterruptedError:
        raise KeyboardInterrupt from None

@@ -340,22 +342,22 @@ async def up_services(

 async def check_host_compatibility(
    cfg: Config,
-    service: str,
+    stack: str,
 ) -> dict[str, tuple[int, int, list[str]]]:
-    """Check which hosts can run a service based on paths, networks, and devices.
+    """Check which hosts can run a stack based on paths, networks, and devices.

    Returns dict of host_name -> (found_count, total_count, missing_items).
    """
    # Get total requirements count
-    paths = get_service_paths(cfg, service)
-    networks = parse_external_networks(cfg, service)
-    devices = parse_devices(cfg, service)
+    paths = get_stack_paths(cfg, stack)
+    networks = parse_external_networks(cfg, stack)
+    devices = parse_devices(cfg, stack)
    total = len(paths) + len(networks) + len(devices)

    results: dict[str, tuple[int, int, list[str]]] = {}

    for host_name in cfg.hosts:
-        preflight = await check_service_requirements(cfg, service, host_name)
+        preflight = await check_stack_requirements(cfg, stack, host_name)
        all_missing = (
            preflight.missing_paths + preflight.missing_networks + preflight.missing_devices
        )
@@ -365,70 +367,167 @@ async def check_host_compatibility(
    return results


-async def stop_orphaned_services(cfg: Config) -> list[CommandResult]:
-    """Stop orphaned services (in state but not in config).
+async def _stop_stacks_on_hosts(
+    cfg: Config,
+    stacks_to_hosts: dict[str, list[str]],
+    label: str = "",
+) -> list[CommandResult]:
+    """Stop stacks on specific hosts.

-    Runs docker compose down on each service on its tracked host(s).
-    Only removes from state on successful stop.
+    Shared helper for stop_orphaned_stacks and stop_stray_stacks.
+
+    Args:
+        cfg: Config object.
+        stacks_to_hosts: Dict mapping stack name to list of hosts to stop on.
+        label: Optional label for success message (e.g., "stray", "orphaned").
+
+    Returns:
+        List of CommandResults for each stack@host.

-    Returns list of CommandResults for each service@host.
    """
-    orphaned = get_orphaned_services(cfg)
-    if not orphaned:
+    if not stacks_to_hosts:
        return []

    results: list[CommandResult] = []
    tasks: list[tuple[str, str, asyncio.Task[CommandResult]]] = []
+    suffix = f" ({label})" if label else ""

-    # Build list of (service, host, task) for all orphaned services
-    for service, hosts in orphaned.items():
-        host_list = hosts if isinstance(hosts, list) else [hosts]
-        for host in host_list:
-            # Skip hosts no longer in config
+    for stack, hosts in stacks_to_hosts.items():
+        for host in hosts:
            if host not in cfg.hosts:
-                print_warning(f"{service}@{host}: host no longer in config, skipping")
+                print_warning(f"{stack}@{host}: host no longer in config, skipping")
                results.append(
                    CommandResult(
-                        service=f"{service}@{host}",
+                        stack=f"{stack}@{host}",
                        exit_code=1,
                        success=False,
                        stderr="host no longer in config",
                    )
                )
                continue
-            coro = run_compose_on_host(cfg, service, host, "down")
-            tasks.append((service, host, asyncio.create_task(coro)))
+            coro = run_compose_on_host(cfg, stack, host, "down")
+            tasks.append((stack, host, asyncio.create_task(coro)))

-    # Run all down commands in parallel
-    if tasks:
-        for service, host, task in tasks:
-            try:
-                result = await task
-                results.append(result)
-                if result.success:
-                    print_success(f"{service}@{host}: stopped")
-                else:
-                    print_error(f"{service}@{host}: {result.stderr or 'failed'}")
-            except Exception as e:
-                print_error(f"{service}@{host}: {e}")
-                results.append(
-                    CommandResult(
-                        service=f"{service}@{host}",
-                        exit_code=1,
-                        success=False,
-                        stderr=str(e),
-                    )
+    for stack, host, task in tasks:
+        try:
+            result = await task
+            results.append(result)
+            if result.success:
+                print_success(f"{stack}@{host}: stopped{suffix}")
+            else:
+                print_error(f"{stack}@{host}: {result.stderr or 'failed'}")
+        except Exception as e:
+            print_error(f"{stack}@{host}: {e}")
+            results.append(
+                CommandResult(
+                    stack=f"{stack}@{host}",
+                    exit_code=1,
+                    success=False,
+                    stderr=str(e),
                )
-
-    # Remove from state only for services where ALL hosts succeeded
-    for service, hosts in orphaned.items():
-        host_list = hosts if isinstance(hosts, list) else [hosts]
-        all_succeeded = all(
-            r.success
-            for r in results
-            if r.service.startswith(f"{service}@") or r.service == service
-        )
-        if all_succeeded:
-            remove_service(cfg, service)
+            )

    return results
+
+
+async def stop_orphaned_stacks(cfg: Config) -> list[CommandResult]:
+    """Stop orphaned stacks (in state but not in config).
+
+    Runs docker compose down on each stack on its tracked host(s).
+    Only removes from state on successful stop.
+
+    Returns list of CommandResults for each stack@host.
+    """
+    orphaned = get_orphaned_stacks(cfg)
+    if not orphaned:
+        return []
+
+    normalized: dict[str, list[str]] = {
+        stack: (hosts if isinstance(hosts, list) else [hosts]) for stack, hosts in orphaned.items()
+    }
+
+    results = await _stop_stacks_on_hosts(cfg, normalized)
+
+    # Remove from state only for stacks where ALL hosts succeeded
+    for stack in normalized:
+        all_succeeded = all(
+            r.success for r in results if r.stack.startswith(f"{stack}@") or r.stack == stack
+        )
+        if all_succeeded:
+            remove_stack(cfg, stack)
+
+    return results
+
+
+async def stop_stray_stacks(
+    cfg: Config,
+    strays: dict[str, list[str]],
+) -> list[CommandResult]:
+    """Stop stacks running on unauthorized hosts.
+
+    Args:
+        cfg: Config object.
+        strays: Dict mapping stack name to list of stray hosts.
+
+    Returns:
+        List of CommandResults for each stack@host stopped.
+
+    """
+    return await _stop_stacks_on_hosts(cfg, strays, label="stray")
+
+
+def build_discovery_results(
+    cfg: Config,
+    running_on_host: dict[str, set[str]],
+    stacks: list[str] | None = None,
+) -> tuple[dict[str, str | list[str]], dict[str, list[str]], dict[str, list[str]]]:
+    """Build discovery results from per-host running stacks.
+
+    Takes the raw data of which stacks are running on which hosts and
+    categorizes them into discovered (running correctly), strays (wrong host),
+    and duplicates (single-host stack on multiple hosts).
+
+    Args:
+        cfg: Config object.
+        running_on_host: Dict mapping host -> set of running stack names.
+        stacks: Optional list of stacks to check. Defaults to all configured stacks.
+
+    Returns:
+        Tuple of (discovered, strays, duplicates):
+        - discovered: stack -> host(s) where running correctly
+        - strays: stack -> list of unauthorized hosts
+        - duplicates: stack -> list of all hosts (for single-host stacks on multiple)
+
+    """
+    stack_list = stacks if stacks is not None else list(cfg.stacks)
+    all_hosts = list(running_on_host.keys())
+
+    # Build StackDiscoveryResult for each stack
+    results: list[StackDiscoveryResult] = [
+        StackDiscoveryResult(
+            stack=stack,
+            configured_hosts=cfg.get_hosts(stack),
+            running_hosts=[h for h in all_hosts if stack in running_on_host[h]],
+        )
+        for stack in stack_list
+    ]
+
+    discovered: dict[str, str | list[str]] = {}
+    strays: dict[str, list[str]] = {}
+    duplicates: dict[str, list[str]] = {}
+
+    for result in results:
+        correct_hosts = [h for h in result.running_hosts if h in result.configured_hosts]
+        if correct_hosts:
+            if result.is_multi_host:
+                discovered[result.stack] = correct_hosts
+            else:
+                discovered[result.stack] = correct_hosts[0]
+
+        if result.is_stray:
+            strays[result.stack] = result.stray_hosts
+
+        if result.is_duplicate:
+            duplicates[result.stack] = result.running_hosts
+
+    return discovered, strays, duplicates
--- a/src/compose_farm/paths.py
+++ b/src/compose_farm/paths.py
@@ -11,9 +11,19 @@ def xdg_config_home() -> Path:
    return Path(os.environ.get("XDG_CONFIG_HOME", Path.home() / ".config"))


+def config_dir() -> Path:
+    """Get the compose-farm config directory."""
+    return xdg_config_home() / "compose-farm"
+
+
 def default_config_path() -> Path:
    """Get the default user config path."""
-    return xdg_config_home() / "compose-farm" / "compose-farm.yaml"
+    return config_dir() / "compose-farm.yaml"
+
+
+def backup_dir() -> Path:
+    """Get the backup directory for file edits."""
+    return config_dir() / "backups"


 def config_search_paths() -> list[Path]:
--- a/src/compose_farm/registry.py
+++ b/src/compose_farm/registry.py
@@ -0,0 +1,220 @@
+"""Container registry API client for tag discovery."""
+
+from __future__ import annotations
+
+import re
+from dataclasses import dataclass, field
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    import httpx
+
+# Image reference pattern: [registry/][namespace/]name[:tag][@digest]
+IMAGE_PATTERN = re.compile(
+    r"^(?:(?P<registry>[^/]+\.[^/]+)/)?(?:(?P<namespace>[^/:@]+)/)?(?P<name>[^/:@]+)(?::(?P<tag>[^@]+))?(?:@(?P<digest>.+))?$"
+)
+
+# Docker Hub aliases
+DOCKER_HUB_ALIASES = frozenset(
+    {"docker.io", "index.docker.io", "registry.hub.docker.com", "registry-1.docker.io"}
+)
+
+# Token endpoints per registry: (url, extra_params)
+TOKEN_ENDPOINTS: dict[str, tuple[str, dict[str, str]]] = {
+    "docker.io": ("https://auth.docker.io/token", {"service": "registry.docker.io"}),
+    "ghcr.io": ("https://ghcr.io/token", {}),
+}
+
+# Registry URL overrides (Docker Hub uses a different host for API)
+REGISTRY_URLS: dict[str, str] = {
+    "docker.io": "https://registry-1.docker.io",
+}
+
+HTTP_OK = 200
+
+MANIFEST_ACCEPT = (
+    "application/vnd.docker.distribution.manifest.v2+json, "
+    "application/vnd.oci.image.manifest.v1+json, "
+    "application/vnd.oci.image.index.v1+json"
+)
+
+
+@dataclass(frozen=True)
+class ImageRef:
+    """Parsed container image reference."""
+
+    registry: str
+    namespace: str
+    name: str
+    tag: str
+    digest: str | None = None
+
+    @property
+    def full_name(self) -> str:
+        """Full image name with namespace."""
+        return f"{self.namespace}/{self.name}" if self.namespace else self.name
+
+    @property
+    def display_name(self) -> str:
+        """Display name (omits docker.io/library for official images)."""
+        if self.registry in DOCKER_HUB_ALIASES:
+            if self.namespace == "library":
+                return self.name
+            return self.full_name
+        return f"{self.registry}/{self.full_name}"
+
+    @classmethod
+    def parse(cls, image: str) -> ImageRef:
+        """Parse image string into components."""
+        match = IMAGE_PATTERN.match(image)
+        if not match:
+            return cls("docker.io", "library", image.split(":")[0].split("@")[0], "latest")
+
+        groups = match.groupdict()
+        registry = groups.get("registry") or "docker.io"
+        namespace = groups.get("namespace") or ""
+        name = groups.get("name") or image
+        tag = groups.get("tag") or "latest"
+        digest = groups.get("digest")
+
+        # Docker Hub official images have implicit "library" namespace
+        if registry in DOCKER_HUB_ALIASES and not namespace:
+            namespace = "library"
+
+        return cls(registry, namespace, name, tag, digest)
+
+
+@dataclass
+class TagCheckResult:
+    """Result of checking tags for an image."""
+
+    image: ImageRef
+    current_digest: str
+    available_updates: list[str] = field(default_factory=list)
+    error: str | None = None
+
+
+class RegistryClient:
+    """Unified OCI Distribution API client."""
+
+    def __init__(self, registry: str) -> None:
+        """Initialize for a specific registry."""
+        self.registry = registry.lower()
+        # Normalize Docker Hub aliases
+        if self.registry in DOCKER_HUB_ALIASES:
+            self.registry = "docker.io"
+
+        self.registry_url = REGISTRY_URLS.get(self.registry, f"https://{self.registry}")
+        self._token_cache: dict[str, str] = {}
+
+    async def _get_token(self, image: ImageRef, client: httpx.AsyncClient) -> str | None:
+        """Get auth token for the registry (cached per image)."""
+        cache_key = image.full_name
+        if cache_key in self._token_cache:
+            return self._token_cache[cache_key]
+
+        endpoint = TOKEN_ENDPOINTS.get(self.registry)
+        if not endpoint:
+            return None  # No auth needed or unknown registry
+
+        url, extra_params = endpoint
+        params = {"scope": f"repository:{image.full_name}:pull", **extra_params}
+        resp = await client.get(url, params=params)
+
+        if resp.status_code == HTTP_OK:
+            token: str | None = resp.json().get("token")
+            if token:
+                self._token_cache[cache_key] = token
+            return token
+        return None
+
+    async def get_tags(self, image: ImageRef, client: httpx.AsyncClient) -> list[str]:
+        """Fetch available tags for an image."""
+        headers = {}
+        token = await self._get_token(image, client)
+        if token:
+            headers["Authorization"] = f"Bearer {token}"
+
+        url = f"{self.registry_url}/v2/{image.full_name}/tags/list"
+        resp = await client.get(url, headers=headers)
+
+        if resp.status_code != HTTP_OK:
+            return []
+        tags: list[str] = resp.json().get("tags", [])
+        return tags
+
+    async def get_digest(self, image: ImageRef, tag: str, client: httpx.AsyncClient) -> str | None:
+        """Get digest for a specific tag."""
+        headers = {"Accept": MANIFEST_ACCEPT}
+        token = await self._get_token(image, client)
+        if token:
+            headers["Authorization"] = f"Bearer {token}"
+
+        url = f"{self.registry_url}/v2/{image.full_name}/manifests/{tag}"
+        resp = await client.head(url, headers=headers)
+
+        if resp.status_code == HTTP_OK:
+            digest: str | None = resp.headers.get("docker-content-digest")
+            return digest
+        return None
+
+
+def _parse_version(tag: str) -> tuple[int, ...] | None:
+    """Parse version string into comparable tuple."""
+    tag = tag.lstrip("vV")
+    parts = tag.split(".")
+    try:
+        return tuple(int(p) for p in parts)
+    except ValueError:
+        return None
+
+
+def _find_updates(current_tag: str, tags: list[str]) -> list[str]:
+    """Find tags newer than current based on version comparison."""
+    current_version = _parse_version(current_tag)
+    if current_version is None:
+        return []
+
+    updates = []
+    for tag in tags:
+        tag_version = _parse_version(tag)
+        if tag_version and tag_version > current_version:
+            updates.append(tag)
+
+    updates.sort(key=lambda t: _parse_version(t) or (), reverse=True)
+    return updates
+
+
+async def check_image_updates(
+    image_str: str,
+    client: httpx.AsyncClient,
+) -> TagCheckResult:
+    """Check if newer versions are available for an image.
+
+    Args:
+        image_str: Image string like "nginx:1.25" or "ghcr.io/user/repo:tag"
+        client: httpx async client
+
+    Returns:
+        TagCheckResult with available updates
+
+    """
+    image = ImageRef.parse(image_str)
+    registry_client = RegistryClient(image.registry)
+
+    try:
+        tags = await registry_client.get_tags(image, client)
+        updates = _find_updates(image.tag, tags)
+        current_digest = await registry_client.get_digest(image, image.tag, client) or ""
+
+        return TagCheckResult(
+            image=image,
+            current_digest=current_digest,
+            available_updates=updates,
+        )
+    except Exception as e:
+        return TagCheckResult(
+            image=image,
+            current_digest="",
+            error=str(e),
+        )
--- a/src/compose_farm/state.py
+++ b/src/compose_farm/state.py
@@ -1,4 +1,4 @@
-"""State tracking for deployed services."""
+"""State tracking for deployed stacks."""

 from __future__ import annotations

@@ -13,44 +13,44 @@ if TYPE_CHECKING:
    from .config import Config


-def group_services_by_host(
-    services: dict[str, str | list[str]],
+def group_stacks_by_host(
+    stacks: dict[str, str | list[str]],
    hosts: Mapping[str, object],
    all_hosts: list[str] | None = None,
 ) -> dict[str, list[str]]:
-    """Group services by their assigned host(s).
+    """Group stacks by their assigned host(s).

-    For multi-host services (list or "all"), the service appears in multiple host lists.
+    For multi-host stacks (list or "all"), the stack appears in multiple host lists.
    """
    by_host: dict[str, list[str]] = {h: [] for h in hosts}
-    for service, host_value in services.items():
+    for stack, host_value in stacks.items():
        if isinstance(host_value, list):
            for host_name in host_value:
                if host_name in by_host:
-                    by_host[host_name].append(service)
+                    by_host[host_name].append(stack)
        elif host_value == "all" and all_hosts:
            for host_name in all_hosts:
                if host_name in by_host:
-                    by_host[host_name].append(service)
+                    by_host[host_name].append(stack)
        elif host_value in by_host:
-            by_host[host_value].append(service)
+            by_host[host_value].append(stack)
    return by_host


-def group_running_services_by_host(
+def group_running_stacks_by_host(
    state: dict[str, str | list[str]],
    hosts: Mapping[str, object],
 ) -> dict[str, list[str]]:
-    """Group running services by host, filtering out hosts with no services."""
-    by_host = group_services_by_host(state, hosts)
+    """Group running stacks by host, filtering out hosts with no stacks."""
+    by_host = group_stacks_by_host(state, hosts)
    return {h: svcs for h, svcs in by_host.items() if svcs}


 def load_state(config: Config) -> dict[str, str | list[str]]:
    """Load the current deployment state.

-    Returns a dict mapping service names to host name(s).
-    Multi-host services store a list of hosts.
+    Returns a dict mapping stack names to host name(s).
+    Multi-host stacks store a list of hosts.
    """
    state_path = config.get_state_path()
    if not state_path.exists():
@@ -83,13 +83,13 @@ def _modify_state(config: Config) -> Generator[dict[str, str | list[str]], None,
    save_state(config, state)


-def get_service_host(config: Config, service: str) -> str | None:
-    """Get the host where a service is currently deployed.
+def get_stack_host(config: Config, stack: str) -> str | None:
+    """Get the host where a stack is currently deployed.

-    For multi-host services, returns the first host or None.
+    For multi-host stacks, returns the first host or None.
    """
    state = load_state(config)
-    value = state.get(service)
+    value = state.get(stack)
    if value is None:
        return None
    if isinstance(value, list):
@@ -97,59 +97,59 @@ def get_service_host(config: Config, service: str) -> str | None:
    return value


-def set_service_host(config: Config, service: str, host: str) -> None:
-    """Record that a service is deployed on a host."""
+def set_stack_host(config: Config, stack: str, host: str) -> None:
+    """Record that a stack is deployed on a host."""
    with _modify_state(config) as state:
-        state[service] = host
+        state[stack] = host


-def set_multi_host_service(config: Config, service: str, hosts: list[str]) -> None:
-    """Record that a multi-host service is deployed on multiple hosts."""
+def set_multi_host_stack(config: Config, stack: str, hosts: list[str]) -> None:
+    """Record that a multi-host stack is deployed on multiple hosts."""
    with _modify_state(config) as state:
-        state[service] = hosts
+        state[stack] = hosts


-def remove_service(config: Config, service: str) -> None:
-    """Remove a service from the state (after down)."""
+def remove_stack(config: Config, stack: str) -> None:
+    """Remove a stack from the state (after down)."""
    with _modify_state(config) as state:
-        state.pop(service, None)
+        state.pop(stack, None)


-def get_services_needing_migration(config: Config) -> list[str]:
-    """Get services where current host differs from configured host.
+def get_stacks_needing_migration(config: Config) -> list[str]:
+    """Get stacks where current host differs from configured host.

-    Multi-host services are never considered for migration.
+    Multi-host stacks are never considered for migration.
    """
    needs_migration = []
-    for service in config.services:
-        # Skip multi-host services
-        if config.is_multi_host(service):
+    for stack in config.stacks:
+        # Skip multi-host stacks
+        if config.is_multi_host(stack):
            continue

-        configured_host = config.get_hosts(service)[0]
-        current_host = get_service_host(config, service)
+        configured_host = config.get_hosts(stack)[0]
+        current_host = get_stack_host(config, stack)
        if current_host and current_host != configured_host:
-            needs_migration.append(service)
+            needs_migration.append(stack)
    return needs_migration


-def get_orphaned_services(config: Config) -> dict[str, str | list[str]]:
-    """Get services that are in state but not in config.
+def get_orphaned_stacks(config: Config) -> dict[str, str | list[str]]:
+    """Get stacks that are in state but not in config.

-    These are services that were previously deployed but have been
+    These are stacks that were previously deployed but have been
    removed from the config file (e.g., commented out).

-    Returns a dict mapping service name to host(s) where it's deployed.
+    Returns a dict mapping stack name to host(s) where it's deployed.
    """
    state = load_state(config)
-    return {service: hosts for service, hosts in state.items() if service not in config.services}
+    return {stack: hosts for stack, hosts in state.items() if stack not in config.stacks}


-def get_services_not_in_state(config: Config) -> list[str]:
-    """Get services that are in config but not in state.
+def get_stacks_not_in_state(config: Config) -> list[str]:
+    """Get stacks that are in config but not in state.

-    These are services that should be running but aren't tracked
+    These are stacks that should be running but aren't tracked
    (e.g., newly added to config, or previously stopped as orphans).
    """
    state = load_state(config)
-    return [service for service in config.services if service not in state]
+    return [stack for stack in config.stacks if stack not in state]
--- a/Show More
+++ b/Show More