Skip to content

Python API

decoct can be used as a Python library for embedding in your own tooling.

Quick compression

from io import StringIO
from pathlib import Path
from ruamel.yaml import YAML
from decoct.formats import load_input, detect_platform
from decoct.pipeline import Pipeline
from decoct.passes.strip_secrets import StripSecretsPass
from decoct.passes.strip_comments import StripCommentsPass
from decoct.passes.strip_defaults import StripDefaultsPass
from decoct.passes.emit_classes import EmitClassesPass
from decoct.passes.prune_empty import PruneEmptyPass
from decoct.schemas.loader import load_schema
from decoct.schemas.resolver import resolve_schema
from decoct.tokens import create_report, format_report
# Load input (auto-detects JSON, YAML, or INI format)
doc, raw_text = load_input(Path("docker-compose.yaml"))
# Auto-detect platform
platform = detect_platform(doc)
# Build pipeline
passes = [StripSecretsPass(), StripCommentsPass()]
if platform:
schema = load_schema(resolve_schema(platform))
passes.append(StripDefaultsPass(schema=schema))
passes.append(EmitClassesPass(schema=schema))
passes.append(PruneEmptyPass())
pipeline = Pipeline(passes)
stats = pipeline.run(doc)
# Dump compressed output
yaml = YAML(typ="rt")
stream = StringIO()
yaml.dump(doc, stream)
compressed = stream.getvalue()
# Token report
report = create_report(raw_text, compressed)
print(format_report(report))

Pipeline

from decoct.pipeline import Pipeline
from decoct.passes.strip_secrets import StripSecretsPass
from decoct.passes.strip_comments import StripCommentsPass
from decoct.passes.strip_defaults import StripDefaultsPass
from decoct.passes.emit_classes import EmitClassesPass
from decoct.passes.strip_conformant import StripConformantPass
from decoct.passes.annotate_deviations import AnnotateDeviationsPass
from decoct.passes.deviation_summary import DeviationSummaryPass
from decoct.passes.prune_empty import PruneEmptyPass
from decoct.schemas.loader import load_schema
from decoct.schemas.resolver import resolve_schema
from decoct.assertions.loader import load_assertions
schema = load_schema(resolve_schema("docker-compose"))
assertions = load_assertions("team-standards.yaml")
pipeline = Pipeline([
StripSecretsPass(),
StripCommentsPass(),
StripDefaultsPass(schema=schema),
EmitClassesPass(schema=schema),
StripConformantPass(assertions=assertions),
AnnotateDeviationsPass(assertions=assertions),
DeviationSummaryPass(assertions=assertions),
PruneEmptyPass(),
])
yaml = YAML(typ="rt")
doc = yaml.load(open("docker-compose.yaml"))
stats = pipeline.run(doc)
# doc is modified in-place
# stats.pass_results contains per-pass statistics

Pipeline statistics

stats = pipeline.run(doc)
for result in stats.pass_results:
print(f"{result.name}: removed {result.items_removed} items")
for detail in result.details:
print(f" {detail}")
print(f"Total pipeline time: {stats.total_time:.3f}s")

Custom pass selection

from decoct.passes.drop_fields import DropFieldsPass
from decoct.passes.keep_fields import KeepFieldsPass
# Only strip secrets and specific noise fields
pipeline = Pipeline([
StripSecretsPass(),
DropFieldsPass(patterns=["**.managedFields", "**.uid", "**.resourceVersion"]),
])
# Or keep only specific fields
pipeline = Pipeline([
StripSecretsPass(),
KeepFieldsPass(patterns=[
"metadata.name",
"metadata.namespace",
"spec.containers.*.image",
"spec.containers.*.resources",
]),
])

Token counting

from decoct.tokens import count_tokens, create_report, format_report
# Count tokens in a string
tokens = count_tokens(text)
tokens_4o = count_tokens(text, encoding="o200k_base")
# Compare input vs output
report = create_report(input_text, output_text)
print(format_report(report))
# Tokens: 500 -> 250 (saved 250, 50.0%)

Supported encodings: cl100k_base (GPT-4, Claude), o200k_base (GPT-4o).

Key classes

Pipeline

class Pipeline:
def __init__(self, passes: list[BasePass]) -> None: ...
def run(self, doc: Any, **kwargs: Any) -> PipelineStats: ...

Sorts passes topologically using run_after/run_before constraints. The document is modified in-place.

PipelineStats

@dataclass
class PipelineStats:
pass_results: list[PassResult]
pass_timings: dict[str, float]
total_time: float

Schema

@dataclass
class Schema:
platform: str
source: str
confidence: Confidence # "authoritative" | "high" | "medium" | "low"
defaults: dict[str, Any]
drop_patterns: list[str]
system_managed: list[str]

TokenReport

@dataclass
class TokenReport:
input_tokens: int
output_tokens: int
savings_tokens: int # property
savings_pct: float # property

Format handling

from decoct.formats import load_input, detect_format, detect_platform
# Load any supported format
doc, raw_text = load_input(Path("config.yaml"))
doc, raw_text = load_input(Path("state.json"))
doc, raw_text = load_input(Path("my.cnf"))
# Detect format from extension
fmt = detect_format(Path("config.ini")) # "ini"
# Detect platform from document content
platform = detect_platform(doc) # "docker-compose" | "kubernetes" | ... | None

Using profiles

from decoct.profiles.loader import load_profile
from decoct.profiles.resolver import resolve_profile
resolved_path = resolve_profile("docker-compose")
profile = load_profile(resolved_path)
print(f"Profile: {profile.name}")
print(f"Schema ref: {profile.schema_ref}")
print(f"Assertion refs: {profile.assertion_refs}")
print(f"Passes: {list(profile.passes.keys())}")