ML-Dash

Complete Examples

End-to-end, runnable examples for distinct ML-Dash use cases. Each example is self-contained — copy, paste, and run.

For a first-time walkthrough, see Getting Started. Replace alice/... prefixes with your own owner/project path.

Minimal Experiment

The absolute minimum: open an experiment, set parameters, log a metric. This is the smallest useful ML-Dash program.

python
"""Hello, ML-Dash."""
from ml_dash import Experiment

with Experiment(
    prefix="alice/tutorials/hello-ml-dash",
    readme="My first ML-Dash experiment",
    tags=["tutorial"],
).run as experiment:
    experiment.params.set(learning_rate=0.001, batch_size=32)
    experiment.log("Hello from ML-Dash!", level="info")

    for epoch in range(5):
        loss = 1.0 / (epoch + 1)
        experiment.metrics("train").log(loss=loss, epoch=epoch)

Data is written under ./.dash/ by default. Pass dash_url=True to mirror to the remote server.

Three Usage Styles

ML-Dash supports three equivalent ways to scope an experiment. Pick the one that fits your code; all three produce identical data on disk.

python
"""Decorator, context manager, and imperative styles."""
from ml_dash import Experiment, ml_dash_experiment


# 1. Decorator — cleanest for a training function.
@ml_dash_experiment(
    prefix="alice/usage-styles/decorator",
    readme="Decorator style",
    tags=["decorator"],
)
def train_decorated(experiment):
    experiment.params.set(learning_rate=0.001)
    for epoch in range(3):
        experiment.metrics("train").log(loss=1.0 / (epoch + 1), epoch=epoch)


# 2. Context manager — best for scripts and notebooks.
def train_context():
    with Experiment(
        prefix="alice/usage-styles/context",
        readme="Context manager style",
    ).run as experiment:
        experiment.params.set(learning_rate=0.002)
        for epoch in range(3):
            experiment.metrics("train").log(loss=0.8 / (epoch + 1), epoch=epoch)


# 3. Imperative — when the experiment must span multiple scopes.
def train_imperative():
    experiment = Experiment(
        prefix="alice/usage-styles/imperative",
        readme="Imperative style",
    )
    experiment.run.start()
    try:
        experiment.params.set(learning_rate=0.003)
        for epoch in range(3):
            experiment.metrics("train").log(loss=0.6 / (epoch + 1), epoch=epoch)
    finally:
        experiment.run.complete()


if __name__ == "__main__":
    train_decorated()
    train_context()
    train_imperative()

The decorator injects experiment as a keyword argument. The context manager auto-closes on exit (and marks FAILED if an exception is raised). The imperative form needs an explicit try/finally to ensure complete() runs.

Parameters from a Config Class

params.set() accepts class objects directly — their public attributes are extracted into a namespaced parameter group. This pairs naturally with params-proto config classes.

python
"""Pass a config class straight into params.set()."""
from ml_dash import Experiment


class Args:
    learning_rate = 0.001
    batch_size = 64
    optimizer = "adam"
    weight_decay = 1e-4


class ModelArgs:
    architecture = "resnet50"
    pretrained = True
    num_classes = 10


with Experiment(
    prefix="alice/config-class/run-001",
    readme="Config classes as parameter groups",
    tags=["config"],
).run as experiment:
    # Class attributes are flattened into Args.learning_rate, Args.batch_size, ...
    experiment.params.set(Args=Args, Model=ModelArgs)

    for epoch in range(Args.batch_size // 16):
        experiment.metrics("train").log(epoch=epoch, loss=1.0 / (epoch + 1))

The same call shape works with params_proto.PrefixProto subclasses, so a CLI-configurable class can be logged with a single line.

PyTorch Training with Checkpoints

Full MNIST training loop with parameters, metrics, structured logs, and best/final model uploads.

python
"""PyTorch MNIST training with ML-Dash tracking."""
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from ml_dash import Experiment


class SimpleNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(784, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = x.view(-1, 784)
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        return self.fc3(x)


def train_mnist():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    batch_size, epochs, lr = 64, 5, 0.001

    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,)),
    ])
    train_ds = datasets.MNIST("./data", train=True, download=True, transform=transform)
    test_ds = datasets.MNIST("./data", train=False, transform=transform)
    train_loader = torch.utils.data.DataLoader(train_ds, batch_size=batch_size, shuffle=True)
    test_loader = torch.utils.data.DataLoader(test_ds, batch_size=batch_size)

    model = SimpleNet().to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()

    with Experiment(
        prefix="alice/computer-vision/mnist-pytorch",
        readme="MNIST classification with PyTorch",
        tags=["mnist", "pytorch"],
    ).run as experiment:
        experiment.params.set({
            "model": {"architecture": "SimpleMLP", "layers": [784, 128, 64, 10]},
            "training": {"optimizer": "adam", "learning_rate": lr,
                         "batch_size": batch_size, "epochs": epochs},
            "device": str(device),
            "dataset": "MNIST",
        })
        experiment.log(f"Training on {device}", level="info")

        best_accuracy = 0.0
        for epoch in range(epochs):
            model.train()
            train_loss, correct, total = 0.0, 0, 0
            for data, target in train_loader:
                data, target = data.to(device), target.to(device)
                optimizer.zero_grad()
                output = model(data)
                loss = criterion(output, target)
                loss.backward()
                optimizer.step()
                train_loss += loss.item()
                correct += output.argmax(dim=1).eq(target).sum().item()
                total += target.size(0)
            train_loss /= len(train_loader)
            train_acc = correct / total

            model.eval()
            val_loss, correct, total = 0.0, 0, 0
            with torch.no_grad():
                for data, target in test_loader:
                    data, target = data.to(device), target.to(device)
                    output = model(data)
                    val_loss += criterion(output, target).item()
                    correct += output.argmax(dim=1).eq(target).sum().item()
                    total += target.size(0)
            val_loss /= len(test_loader)
            val_acc = correct / total

            experiment.metrics.log(
                epoch=epoch,
                train=dict(loss=train_loss, accuracy=train_acc),
                eval=dict(loss=val_loss, accuracy=val_acc),
            )
            experiment.log(f"Epoch {epoch + 1}/{epochs}", level="info",
                           metadata={"train_loss": train_loss, "val_acc": val_acc})

            if val_acc > best_accuracy:
                best_accuracy = val_acc
                torch.save(model.state_dict(), "best_model.pth")
                experiment.files("models").save(
                    "best_model.pth",
                    description=f"Best model (accuracy: {best_accuracy:.4f})",
                    tags=["best"],
                    metadata={"epoch": epoch, "accuracy": best_accuracy},
                )

        torch.save(model.state_dict(), "final_model.pth")
        experiment.files("models").save("final_model.pth", tags=["final"])
        experiment.log("Training complete!", level="info")


if __name__ == "__main__":
    train_mnist()

Hyperparameter Sweep

Grid search across configurations. Each run becomes a separate experiment for side-by-side comparison in the dashboard.

python
"""Hyperparameter grid search."""
import random
from itertools import product
from ml_dash import Experiment


def train_with_config(lr, batch_size, experiment):
    epochs = 10
    accuracy = 0.0
    for epoch in range(epochs):
        loss = 1.0 / (epoch + 1) * (lr / 0.01) + random.uniform(-0.05, 0.05)
        accuracy = min(0.95, 0.5 + epoch * 0.05 * (32 / batch_size))
        experiment.metrics.log(epoch=epoch, train=dict(loss=loss, accuracy=accuracy))
    return accuracy


def sweep():
    results = []
    for lr, bs in product([0.1, 0.01, 0.001], [16, 32, 64]):
        run_name = f"search-lr{lr}-bs{bs}"
        with Experiment(
            prefix=f"alice/hyperparameter-search/{run_name}",
            readme=f"Grid search: lr={lr}, batch_size={bs}",
            tags=["grid-search", f"lr-{lr}", f"bs-{bs}"],
        ).run as experiment:
            experiment.params.set(learning_rate=lr, batch_size=bs,
                                  optimizer="sgd", epochs=10)
            experiment.log(f"Starting run lr={lr} bs={bs}")
            acc = train_with_config(lr, bs, experiment)
            results.append({"lr": lr, "batch_size": bs, "accuracy": acc})

    best = max(results, key=lambda r: r["accuracy"])
    print(f"Best: lr={best['lr']} bs={best['batch_size']} acc={best['accuracy']:.4f}")


if __name__ == "__main__":
    sweep()

Resume an Experiment

Open the same prefix again to upsert. Metrics, parameters, and logs append rather than overwrite — useful for resuming a crashed run or extending a finished one with a second analysis pass.

python
"""Resume an existing experiment and append more metrics."""
from ml_dash import Experiment

PREFIX = "alice/resume-demo/run-001"

# First pass: train for a few epochs.
with Experiment(prefix=PREFIX, readme="Resume demo").run as experiment:
    experiment.params.set(learning_rate=0.001, batch_size=32)
    for epoch in range(3):
        experiment.metrics("train").log(loss=1.0 / (epoch + 1), epoch=epoch)
    experiment.log("Initial run complete", level="info")

# Later (different process, same prefix): read back and continue.
with Experiment(prefix=PREFIX, readme="Resume demo - continued").run as experiment:
    prev = experiment.metrics("train").read(start_index=0, limit=1000)
    last_epoch = max(p["data"]["epoch"] for p in prev["data"])
    experiment.log(f"Resuming from epoch {last_epoch}", level="info")

    # Bump a hyperparameter and append more epochs.
    experiment.params.set(learning_rate=0.0001)
    for epoch in range(last_epoch + 1, last_epoch + 4):
        experiment.metrics("train").log(loss=0.3 / (epoch + 1), epoch=epoch)

params.set() merges into existing parameters, so the second learning_rate overrides the first. metrics(...).read() returns data, total, and hasMore — see Metrics.

Project Root Pattern

For multi-experiment repos, set RUN.project_root once and pass RUN.entry = __file__ in each training script. ML-Dash derives the prefix from the script's path relative to the project root — no hardcoded names.

python
"""experiments/__init__.py — one-time setup."""
from pathlib import Path
from ml_dash import RUN

RUN.project_root = str(Path(__file__).parent)
python
"""experiments/vision/resnet/train.py — auto-prefixed by file path."""
from ml_dash import RUN, Experiment
import experiments  # triggers project_root setup

# Compute prefix from this file's location relative to project_root.
# experiments/vision/resnet/train.py  ->  RUN.prefix = "vision/resnet/train"
RUN.__post_init__(entry=__file__)

with Experiment(
    prefix=f"alice/my-project/{RUN.prefix}",
    readme="Auto-prefixed from filesystem layout",
).run as experiment:
    experiment.params.set(script=RUN.entry, model="resnet50", lr=0.001)
    for epoch in range(5):
        experiment.metrics("train").log(loss=1.0 / (epoch + 1), epoch=epoch)

Moving train.py to a new directory automatically gives it a new prefix; no constants to update. The same pattern accepts a directory (e.g. one containing sweep.jsonl) instead of a file.

Robotics: Timestamped Tracks and Buffered Telemetry

Log timestamped joint state on a track while accumulating per-step values into a buffer, then flush per-epoch summary statistics. Tracks store time-aligned data with a required _ts= timestamp; the metric buffer reduces per-step write overhead by aggregating values before logging.

python
"""Robotics episode: joint-state track + buffered telemetry summaries."""
import numpy as np
from ml_dash import Experiment


def run_episode():
    with Experiment(
        prefix="alice/robotics/pick-and-place-001",
        readme="Pick-and-place demo with joint telemetry",
        tags=["robot", "episode"],
    ).run as experiment:
        experiment.params.set(
            robot="ur5",
            task="pick_and_place",
            control_hz=100,
        )

        steps_per_epoch = 100
        for epoch in range(5):
            for i in range(steps_per_epoch):
                step = epoch * steps_per_epoch + i
                t = step / 100.0

                # Timestamped track entry: _ts is required (float seconds).
                experiment.tracks("robot/joints").append(
                    q=[np.sin(t), np.cos(t), np.sin(2 * t), np.cos(2 * t)],
                    gripper_force=0.5 + 0.5 * np.sin(t),
                    _ts=t,
                )

                # Accumulate scalars in the buffer instead of logging every step.
                experiment.metrics("control").buffer(
                    gripper_force=0.5 + 0.5 * np.sin(t),
                    q0_abs=abs(float(np.sin(t))),
                )

                if step == 250:
                    experiment.log("Object grasped", level="info",
                                   metadata={"step": step, "t": t})

            # Flush per-epoch summary stats: logs control/gripper_force.mean,
            # control/q0_abs.mean, plus .max for each.
            experiment.metrics.buffer.log_summary("mean", "max")
            experiment.metrics("epoch").log(epoch=epoch)

        experiment.tracks.flush()
        experiment.log("Episode complete", level="info")


if __name__ == "__main__":
    run_episode()

See Tracks and Buffering for details on the APIs used here.

Structured Logging for Debugging

Use log levels and metadata to make runs easy to triage. Filter by level in the dashboard.

python
"""Training with structured debug/warn/info logs."""
import random
from ml_dash import Experiment


def train_with_debug():
    with Experiment(
        prefix="alice/debugging/debug-training",
        readme="Training with debug logging",
        tags=["debug"],
    ).run as experiment:
        experiment.params.set(learning_rate=0.001, batch_size=32, model="debug_net")
        experiment.log("Training started", level="info")
        experiment.log("Initializing model", level="debug")

        for epoch in range(5):
            experiment.log(f"Starting epoch {epoch + 1}", level="debug")
            loss = 1.0 / (epoch + 1)

            if epoch == 2:
                experiment.log(
                    "Learning rate may be too high",
                    level="warn",
                    metadata={"current_lr": 0.001, "suggested_lr": 0.0001},
                )

            if random.random() < 0.2:
                experiment.log(
                    "Gradient clipping applied",
                    level="warn",
                    metadata={"gradient_norm": 15.5, "max_norm": 10.0},
                )

            experiment.metrics("train").log(loss=loss, epoch=epoch)
            experiment.log(f"Epoch {epoch + 1} complete", level="info",
                           metadata={"loss": loss})

        experiment.log("Training complete", level="info")


if __name__ == "__main__":
    train_with_debug()

See Also