Files
compose-farm/src/compose_farm/logs.py
Bas Nijholt 5f2e081298 perf: Batch snapshot collection to 1 SSH call per host (#130)
## Summary

Optimize `cf refresh` SSH calls from O(stacks) to O(hosts):
- Discovery: 1 SSH call per host (unchanged)
- Snapshots: 1 SSH call per host (was 1 per stack)

For 50 stacks across 4 hosts: 54 → 8 SSH calls.

## Changes

**Performance:**
- Use `docker ps` + `docker image inspect` instead of `docker compose images` per stack
- Batch snapshot collection by host in `collect_stacks_entries_on_host()`

**Architecture:**
- Add `build_discovery_results()` to `operations.py` (business logic)
- Keep progress bar wrapper in `cli/management.py` (presentation)
- Remove dead code: `discover_all_stacks_on_all_hosts()`, `collect_all_stacks_entries()`
2025-12-22 22:19:32 -08:00

214 lines
6.5 KiB
Python

"""Snapshot current compose images into a TOML log."""
from __future__ import annotations
import json
import tomllib
from dataclasses import dataclass
from datetime import UTC, datetime
from typing import TYPE_CHECKING
from .executor import run_command
from .paths import xdg_config_home
if TYPE_CHECKING:
from collections.abc import Iterable
from pathlib import Path
from .config import Config
# Separator used to split output sections
_SECTION_SEPARATOR = "---CF-SEP---"
DEFAULT_LOG_PATH = xdg_config_home() / "compose-farm" / "dockerfarm-log.toml"
@dataclass(frozen=True)
class SnapshotEntry:
"""Normalized image snapshot for a single stack."""
stack: str
host: str
compose_file: Path
image: str
digest: str
captured_at: datetime
def as_dict(self, first_seen: str, last_seen: str) -> dict[str, str]:
"""Render snapshot as a TOML-friendly dict."""
return {
"stack": self.stack,
"host": self.host,
"compose_file": str(self.compose_file),
"image": self.image,
"digest": self.digest,
"first_seen": first_seen,
"last_seen": last_seen,
}
def isoformat(dt: datetime) -> str:
"""Format a datetime as an ISO 8601 string with Z suffix for UTC."""
return dt.astimezone(UTC).replace(microsecond=0).isoformat().replace("+00:00", "Z")
def _escape(value: str) -> str:
return value.replace("\\", "\\\\").replace('"', '\\"')
def _parse_image_digests(image_json: str) -> dict[str, str]:
"""Parse docker image inspect JSON to build image tag -> digest map."""
if not image_json:
return {}
try:
image_data = json.loads(image_json)
except json.JSONDecodeError:
return {}
image_digests: dict[str, str] = {}
for img in image_data:
tags = img.get("RepoTags") or []
digests = img.get("RepoDigests") or []
digest = digests[0].split("@")[-1] if digests else img.get("Id", "")
for tag in tags:
image_digests[tag] = digest
if img.get("Id"):
image_digests[img["Id"]] = digest
return image_digests
async def collect_stacks_entries_on_host(
config: Config,
host_name: str,
stacks: set[str],
*,
now: datetime,
) -> list[SnapshotEntry]:
"""Collect image entries for stacks on one host using 2 docker commands.
Uses `docker ps` to get running containers + their compose project labels,
then `docker image inspect` to get digests for all unique images.
Much faster than running N `docker compose images` commands.
"""
if not stacks:
return []
host = config.hosts[host_name]
# Single SSH call with 2 docker commands:
# 1. Get project|image pairs from running containers
# 2. Get image info (including digests) for all unique images
command = (
f"docker ps --format '{{{{.Label \"com.docker.compose.project\"}}}}|{{{{.Image}}}}' && "
f"echo '{_SECTION_SEPARATOR}' && "
"docker image inspect $(docker ps --format '{{.Image}}' | sort -u) 2>/dev/null || true"
)
result = await run_command(host, command, host_name, stream=False, prefix="")
if not result.success:
return []
# Split output into two sections
parts = result.stdout.split(_SECTION_SEPARATOR)
if len(parts) != 2: # noqa: PLR2004
return []
container_lines, image_json = parts[0].strip(), parts[1].strip()
# Parse project|image pairs, filtering to only stacks we care about
stack_images: dict[str, set[str]] = {}
for line in container_lines.splitlines():
if "|" not in line:
continue
project, image = line.split("|", 1)
if project in stacks:
stack_images.setdefault(project, set()).add(image)
if not stack_images:
return []
# Parse image inspect JSON to build image -> digest map
image_digests = _parse_image_digests(image_json)
# Build entries
entries: list[SnapshotEntry] = []
for stack, images in stack_images.items():
for image in images:
digest = image_digests.get(image, "")
if digest:
entries.append(
SnapshotEntry(
stack=stack,
host=host_name,
compose_file=config.get_compose_path(stack),
image=image,
digest=digest,
captured_at=now,
)
)
return entries
def load_existing_entries(log_path: Path) -> list[dict[str, str]]:
"""Load existing snapshot entries from a TOML log file."""
if not log_path.exists():
return []
data = tomllib.loads(log_path.read_text())
entries = list(data.get("entries", []))
normalized: list[dict[str, str]] = []
for entry in entries:
normalized_entry = dict(entry)
if "stack" not in normalized_entry and "service" in normalized_entry:
normalized_entry["stack"] = normalized_entry.pop("service")
normalized.append(normalized_entry)
return normalized
def merge_entries(
existing: Iterable[dict[str, str]],
new_entries: Iterable[SnapshotEntry],
*,
now_iso: str,
) -> list[dict[str, str]]:
"""Merge new snapshot entries with existing ones, preserving first_seen timestamps."""
merged: dict[tuple[str, str, str], dict[str, str]] = {
(e["stack"], e["host"], e["digest"]): dict(e) for e in existing
}
for entry in new_entries:
key = (entry.stack, entry.host, entry.digest)
first_seen = merged.get(key, {}).get("first_seen", now_iso)
merged[key] = entry.as_dict(first_seen, now_iso)
return list(merged.values())
def write_toml(log_path: Path, *, meta: dict[str, str], entries: list[dict[str, str]]) -> None:
"""Write snapshot entries to a TOML log file."""
lines: list[str] = ["[meta]"]
lines.extend(f'{key} = "{_escape(meta[key])}"' for key in sorted(meta))
if entries:
lines.append("")
for entry in sorted(entries, key=lambda e: (e["stack"], e["host"], e["digest"])):
lines.append("[[entries]]")
for field in [
"stack",
"host",
"compose_file",
"image",
"digest",
"first_seen",
"last_seen",
]:
value = entry[field]
lines.append(f'{field} = "{_escape(str(value))}"')
lines.append("")
content = "\n".join(lines).rstrip() + "\n"
log_path.parent.mkdir(parents=True, exist_ok=True)
log_path.write_text(content)