#!/usr/bin/env python3 """Generate Starlight API reference pages from the dev_2.0 branch. Reads each Python module under ``src/mscp/`` from the ``dev_2.0`` branch via ``git show``, parses it with ``ast``, and emits one Markdown page per module into ``src/content/docs/api/``. Run from the repository root:: python3 scripts/gen_api_docs.py The script has no third-party dependencies; standard library only. """ from __future__ import annotations import ast import re import shutil import subprocess import sys import textwrap from dataclasses import dataclass, field from pathlib import Path REPO_ROOT = Path(__file__).resolve().parent.parent SOURCE_BRANCH = "dev_2.0" # Restrict generation to a single Python (sub)package within the source tree. SOURCE_PREFIX = "src/mscp/" # Dotted-name prefix corresponding to SOURCE_PREFIX (e.g. "mscp.classes."). MODULE_PREFIX = ( SOURCE_PREFIX.removeprefix("src/").rstrip("/").replace("/", ".") + "." ) OUTPUT_DIR = REPO_ROOT / "src" / "content" / "docs" / "api" # Paths (relative to repo root) to exclude from documentation entirely. # These are internal implementation packages, not public library API. SKIP_PATHS = { "src/mscp/generate", "src/mscp/admin_utils", "src/mscp/cli.py", } def run_git(*args: str) -> str: result = subprocess.run( ["git", *args], cwd=REPO_ROOT, check=True, capture_output=True, text=True, ) return result.stdout def list_python_files() -> list[str]: out = run_git("ls-tree", "-r", SOURCE_BRANCH, "--name-only") files: list[str] = [] for line in out.splitlines(): rel = line.strip() if not (rel.startswith(SOURCE_PREFIX) and rel.endswith(".py")): continue # Astro excludes underscore-prefixed slugs from routing, so skip # entry-point shims like __main__.py that have no API surface anyway. if Path(rel).name == "__main__.py": continue if any(rel == p or rel.startswith(p + "/") for p in SKIP_PATHS): continue files.append(rel) return sorted(files) def read_file(path: str) -> str: return run_git("show", f"{SOURCE_BRANCH}:{path}") @dataclass class FunctionDoc: name: str signature: str docstring: str | None decorators: list[str] = field(default_factory=list) is_async: bool = False @dataclass class ClassDoc: name: str bases: list[str] docstring: str | None methods: list[FunctionDoc] = field(default_factory=list) @dataclass class ModuleDoc: rel_path: str # e.g. "macsecurityrule.py" (relative to SOURCE_PREFIX) module_dotted: str # e.g. "mscp.classes.macsecurityrule" module_docstring: str | None functions: list[FunctionDoc] classes: list[ClassDoc] exports: list[str] # __all__, if defined @property def is_init(self) -> bool: return Path(self.rel_path).name == "__init__.py" def is_public(name: str) -> bool: if name == "__init__": return True return not name.startswith("_") def format_signature(func: ast.FunctionDef | ast.AsyncFunctionDef) -> str: args = ast.unparse(func.args) if func.returns is not None: return f"{func.name}({args}) -> {ast.unparse(func.returns)}" return f"{func.name}({args})" def extract_decorators(func: ast.FunctionDef | ast.AsyncFunctionDef) -> list[str]: return [ast.unparse(d) for d in func.decorator_list] def parse_function( node: ast.FunctionDef | ast.AsyncFunctionDef, ) -> FunctionDoc | None: if not is_public(node.name): return None return FunctionDoc( name=node.name, signature=format_signature(node), docstring=ast.get_docstring(node), decorators=extract_decorators(node), is_async=isinstance(node, ast.AsyncFunctionDef), ) def parse_class(node: ast.ClassDef) -> ClassDoc | None: if not is_public(node.name): return None methods: list[FunctionDoc] = [] for item in node.body: if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)): method = parse_function(item) if method is not None: methods.append(method) return ClassDoc( name=node.name, bases=[ast.unparse(b) for b in node.bases], docstring=ast.get_docstring(node), methods=methods, ) def extract_dunder_all(tree: ast.Module) -> list[str]: for node in tree.body: if not isinstance(node, ast.Assign): continue for target in node.targets: if isinstance(target, ast.Name) and target.id == "__all__": if isinstance(node.value, (ast.List, ast.Tuple)): return [ elt.value for elt in node.value.elts if isinstance(elt, ast.Constant) and isinstance(elt.value, str) ] return [] def parse_module(rel_path: str, source: str) -> ModuleDoc: tree = ast.parse(source) exports = extract_dunder_all(tree) exports_set = set(exports) # non-empty only when __all__ is defined functions: list[FunctionDoc] = [] classes: list[ClassDoc] = [] for node in tree.body: if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)): if exports_set and node.name not in exports_set: continue func = parse_function(node) if func is not None: functions.append(func) elif isinstance(node, ast.ClassDef): if exports_set and node.name not in exports_set: continue cls = parse_class(node) if cls is not None: classes.append(cls) package_rel = rel_path[len(SOURCE_PREFIX):] # e.g. "macsecurityrule.py" dotted = MODULE_PREFIX + package_rel.removesuffix(".py").replace("/", ".") if dotted.endswith(".__init__"): dotted = dotted.removesuffix(".__init__") dotted = dotted.rstrip(".") return ModuleDoc( rel_path=package_rel, module_dotted=dotted, module_docstring=ast.get_docstring(tree), functions=functions, classes=classes, exports=exports, ) # ---- Markdown rendering --------------------------------------------------- def md_escape_frontmatter(text: str) -> str: return text.replace('"', '\\"').replace("\n", " ").strip() def first_sentence(text: str | None) -> str: if not text: return "" cleaned = text.strip().split("\n\n", 1)[0] cleaned = " ".join(cleaned.split()) for sep in (". ", "! ", "? "): if sep in cleaned: return cleaned.split(sep, 1)[0] + sep.strip() return cleaned # Google-style docstring section headers we know how to render. # Listed in lowercase for case-insensitive matching; canonical capitalisation # is preserved when emitting the section header. _LIST_SECTIONS = { "args", "arguments", "parameters", "params", "attributes", "attribute", "returns", "return", "yields", "yield", "raises", "raise", "exceptions", "except", "class methods", "methods", "side effects", } _BLOCK_SECTIONS = { "example", "examples", "note", "notes", "warning", "warnings", "see also", "references", "todo", } _KNOWN_SECTIONS = _LIST_SECTIONS | _BLOCK_SECTIONS _SECTION_HEADER_RE = re.compile(r"^([A-Za-z][A-Za-z ]*):\s*$") _ITEM_RE = re.compile( r"^(?P\S+?)\s*(?:\((?P[^)]+)\))?\s*:\s*(?P.*)$" ) def _split_into_items(body: str) -> list[str]: """Group lines under a Google-style section into individual items. Items are flush-left; continuation text is indented further. Returns a list of single-line item strings (continuation lines collapsed with spaces), preserving order. """ items: list[list[str]] = [] current: list[str] = [] for line in body.split("\n"): if not line.strip(): if current: current.append("") continue if line[0] not in (" ", "\t"): if current: items.append(current) current = [line.rstrip()] else: current.append(line.strip()) if current: items.append(current) flattened: list[str] = [] for item in items: joined = " ".join(part for part in item if part) if joined: flattened.append(joined) return flattened def _render_list_section(header: str, body: str) -> str: items = _split_into_items(body) if not items: return f"**{header}**" bullets: list[str] = [] for item in items: # Some docstrings already prefix items with "- " or "* " — drop it # so we don't end up with "- - foo". if item[:2] in ("- ", "* "): item = item[2:] m = _ITEM_RE.match(item) if m: name = m.group("name") type_ = m.group("type") desc = m.group("desc") type_part = f" *({type_})*" if type_ else "" desc_part = f" — {desc}" if desc else "" bullets.append(f"- **`{name}`**{type_part}{desc_part}") else: bullets.append(f"- {item}") return f"**{header}**\n\n" + "\n".join(bullets) def _render_block_section(header: str, body: str) -> str: body = body.rstrip() if not body: return f"**{header}**" lower = header.lower() if lower in ("example", "examples"): return f"**{header}**\n\n```python\n{body}\n```" quoted = "\n".join(f"> {line}" if line else ">" for line in body.split("\n")) return f"**{header}**\n\n{quoted}" def render_docstring(text: str | None) -> str: """Render a docstring as Markdown. Recognises Google-style sections (``Args:``, ``Returns:``, ``Attributes:`` etc.) and emits them as bullet lists or block callouts so they don't collapse into a single paragraph. Free-form prose passes through. """ if not text: return "" cleaned = textwrap.dedent(text).strip("\n") lines = cleaned.split("\n") blocks: list[str] = [] paragraph: list[str] = [] def flush_paragraph() -> None: if paragraph: blocks.append("\n".join(paragraph).strip()) paragraph.clear() i = 0 while i < len(lines): line = lines[i] m = _SECTION_HEADER_RE.match(line) if m and m.group(1).strip().lower() in _KNOWN_SECTIONS: flush_paragraph() header = m.group(1).strip() i += 1 body_lines: list[str] = [] while i < len(lines): bl = lines[i] if bl.strip() == "": # Blank line ends the section unless the next non-blank # line is still indented (i.e. a continuation). j = i + 1 while j < len(lines) and lines[j].strip() == "": j += 1 if j < len(lines) and lines[j][:1] in (" ", "\t"): body_lines.append("") i += 1 continue break if bl[:1] not in (" ", "\t"): break body_lines.append(bl) i += 1 body = textwrap.dedent("\n".join(body_lines)).strip("\n") if header.lower() in _LIST_SECTIONS: blocks.append(_render_list_section(header, body)) else: blocks.append(_render_block_section(header, body)) elif line.strip() == "": flush_paragraph() i += 1 else: paragraph.append(line) i += 1 flush_paragraph() return "\n\n".join(b for b in blocks if b) + "\n" def render_function(func: FunctionDoc, heading_level: int) -> str: h = "#" * heading_level prefix = "async " if func.is_async else "" parts: list[str] = [ f"{h} {func.name}", "", "```python", f"{prefix}{func.signature}", "```", "", ] _IMPLICIT_DECORATORS = {"classmethod", "staticmethod", "property"} extra_decos = [ d for d in func.decorators if d not in _IMPLICIT_DECORATORS and not d.endswith((".setter", ".deleter")) ] if extra_decos: decos = ", ".join(f"`@{d}`" for d in extra_decos) parts.append(f"*Decorators:* {decos}") parts.append("") if func.docstring: parts.append(render_docstring(func.docstring)) return "\n".join(parts).rstrip() + "\n" def _method_category(method: FunctionDoc) -> str: for d in method.decorators: if d == "classmethod": return "class_methods" if d == "staticmethod": return "static_methods" if d == "property" or d.endswith(".setter") or d.endswith(".deleter"): return "properties" if method.name == "__init__": return "constructor" return "methods" _METHOD_SECTION_ORDER = [ ("constructor", "Constructor"), ("class_methods", "Class Methods"), ("static_methods", "Static Methods"), ("properties", "Properties"), ("methods", "Methods"), ] def render_class(cls: ClassDoc, heading_level: int) -> str: h = "#" * heading_level bases = f"({', '.join(cls.bases)})" if cls.bases else "" parts: list[str] = [ f"{h} {cls.name}", "", "```python", f"class {cls.name}{bases}", "```", "", ] if cls.docstring: parts.append(render_docstring(cls.docstring)) if cls.methods: buckets: dict[str, list[FunctionDoc]] = {key: [] for key, _ in _METHOD_SECTION_ORDER} for method in cls.methods: buckets[_method_category(method)].append(method) for key, label in _METHOD_SECTION_ORDER: if not buckets[key]: continue parts.append("") parts.append(f"{'#' * (heading_level + 1)} {label}") parts.append("") for method in buckets[key]: parts.append(render_function(method, heading_level + 2)) return "\n".join(parts).rstrip() + "\n" def render_module(module: ModuleDoc) -> str: description = first_sentence(module.module_docstring) or ( f"API reference for `{module.module_dotted}`." ) # Top-level package index gets the group label as its title. top_level_dotted = MODULE_PREFIX.rstrip(".") title = ( "mSCP 2.0 API Reference" if module.module_dotted == top_level_dotted else module.module_dotted ) # Groups (directory index pages) sort before flat module pages in the sidebar. sidebar_order = 0 if module.is_init else 1 parts: list[str] = [ "---", f"title: {title}", f'description: "{md_escape_frontmatter(description)}"', "sidebar:", f" order: {sidebar_order}", "---", "", f"> Source: [`{SOURCE_PREFIX}{module.rel_path}`](https://github.com/usnistgov/macos_security/blob/{SOURCE_BRANCH}/{SOURCE_PREFIX}{module.rel_path})", "", ] if module.module_docstring: parts.append(render_docstring(module.module_docstring)) parts.append("") if module.exports: parts.append("## Re-exports (`__all__`)") parts.append("") parts.append(", ".join(f"`{name}`" for name in module.exports)) parts.append("") if module.classes: parts.append("## Classes") parts.append("") for cls in module.classes: parts.append(render_class(cls, heading_level=3)) parts.append("") if module.functions: parts.append("## Functions") parts.append("") for func in module.functions: parts.append(render_function(func, heading_level=3)) parts.append("") if not (module.classes or module.functions or module.module_docstring or module.exports): parts.append("_This module exposes no public API surface._") parts.append("") return "\n".join(parts).rstrip() + "\n" def output_path_for(module: ModuleDoc) -> Path: rel = Path(module.rel_path) if rel.name == "__init__.py": # Subpackage index page lives at /index.md if rel.parent == Path("."): return OUTPUT_DIR / "index.md" return OUTPUT_DIR / rel.parent / "index.md" return OUTPUT_DIR / rel.with_suffix(".md") def write_landing_page() -> None: """Augment the top-level index page with a list of sibling modules.""" landing = OUTPUT_DIR / "index.md" module_links = "\n".join( f"- [`{p.stem}`]({p.stem}/)" for p in sorted(OUTPUT_DIR.glob("*.md")) if p.name != "index.md" ) addition = "\n## Modules\n\n" + module_links + "\n" if module_links else "" if landing.exists(): existing = landing.read_text().rstrip() + "\n" if "## Modules" in existing or not addition: return landing.write_text(existing + addition) return landing.write_text( "---\n" "title: mSCP 2.0 API Reference\n" 'description: "Python API reference for the mscp 2.0 classes package, generated from docstrings on the dev_2.0 branch."\n' "---\n\n" f"Reference for the `{MODULE_PREFIX.rstrip('.')}` package on the " f"`{SOURCE_BRANCH}` branch. These pages are generated directly from " "the source docstrings — run `python3 scripts/gen_api_docs.py` to " "regenerate.\n" + addition ) def main() -> int: if not (REPO_ROOT / ".git").exists(): print(f"error: {REPO_ROOT} is not a git repository", file=sys.stderr) return 1 try: files = list_python_files() except subprocess.CalledProcessError as exc: print(f"error: failed to list files on {SOURCE_BRANCH}: {exc.stderr}", file=sys.stderr) return 1 if not files: print(f"error: no Python files found under {SOURCE_PREFIX} on {SOURCE_BRANCH}", file=sys.stderr) return 1 if OUTPUT_DIR.exists(): shutil.rmtree(OUTPUT_DIR) OUTPUT_DIR.mkdir(parents=True) written = 0 for rel in files: try: source = read_file(rel) except subprocess.CalledProcessError as exc: print(f"warning: could not read {rel}: {exc.stderr}", file=sys.stderr) continue try: module = parse_module(rel, source) except SyntaxError as exc: print(f"warning: skipping {rel} (syntax error: {exc})", file=sys.stderr) continue out_path = output_path_for(module) out_path.parent.mkdir(parents=True, exist_ok=True) out_path.write_text(render_module(module)) written += 1 print(f"wrote {out_path.relative_to(REPO_ROOT)}") write_landing_page() print(f"\nGenerated {written} module pages in {OUTPUT_DIR.relative_to(REPO_ROOT)}") return 0 if __name__ == "__main__": raise SystemExit(main())