# Complete Examples

End-to-end, runnable examples for distinct ML-Dash use cases. Each example is self-contained — copy, paste, and run.

For a first-time walkthrough, see [Getting Started](/getting-started.md). Replace `alice/...` prefixes with your own `owner/project` path.

## Minimal Experiment

The absolute minimum: open an experiment, set parameters, log a metric. This is the smallest useful ML-Dash program.

```python
"""Hello, ML-Dash."""
from ml_dash import Experiment

with Experiment(
    prefix="alice/tutorials/hello-ml-dash",
    readme="My first ML-Dash experiment",
    tags=["tutorial"],
).run as experiment:
    experiment.params.set(learning_rate=0.001, batch_size=32)
    experiment.log("Hello from ML-Dash!", level="info")

    for epoch in range(5):
        loss = 1.0 / (epoch + 1)
        experiment.metrics("train").log(loss=loss, epoch=epoch)
```

Data is written under `./.dash/` by default. Pass `dash_url=True` to mirror to the remote server.

## Three Usage Styles

ML-Dash supports three equivalent ways to scope an experiment. Pick the one that fits your code; all three produce identical data on disk.

```python
"""Decorator, context manager, and imperative styles."""
from ml_dash import Experiment, ml_dash_experiment

# 1. Decorator — cleanest for a training function.
@ml_dash_experiment(
    prefix="alice/usage-styles/decorator",
    readme="Decorator style",
    tags=["decorator"],
)
def train_decorated(experiment):
    experiment.params.set(learning_rate=0.001)
    for epoch in range(3):
        experiment.metrics("train").log(loss=1.0 / (epoch + 1), epoch=epoch)

# 2. Context manager — best for scripts and notebooks.
def train_context():
    with Experiment(
        prefix="alice/usage-styles/context",
        readme="Context manager style",
    ).run as experiment:
        experiment.params.set(learning_rate=0.002)
        for epoch in range(3):
            experiment.metrics("train").log(loss=0.8 / (epoch + 1), epoch=epoch)

# 3. Imperative — when the experiment must span multiple scopes.
def train_imperative():
    experiment = Experiment(
        prefix="alice/usage-styles/imperative",
        readme="Imperative style",
    )
    experiment.run.start()
    try:
        experiment.params.set(learning_rate=0.003)
        for epoch in range(3):
            experiment.metrics("train").log(loss=0.6 / (epoch + 1), epoch=epoch)
    finally:
        experiment.run.complete()

if __name__ == "__main__":
    train_decorated()
    train_context()
    train_imperative()
```

The decorator injects `experiment` as a keyword argument. The context manager auto-closes on exit (and marks `FAILED` if an exception is raised). The imperative form needs an explicit `try`/`finally` to ensure `complete()` runs.

## Parameters from a Config Class

`params.set()` accepts class objects directly — their public attributes are extracted into a namespaced parameter group. This pairs naturally with `params-proto` config classes.

```python
"""Pass a config class straight into params.set()."""
from ml_dash import Experiment

class Args:
    learning_rate = 0.001
    batch_size = 64
    optimizer = "adam"
    weight_decay = 1e-4

class ModelArgs:
    architecture = "resnet50"
    pretrained = True
    num_classes = 10

with Experiment(
    prefix="alice/config-class/run-001",
    readme="Config classes as parameter groups",
    tags=["config"],
).run as experiment:
    # Class attributes are flattened into Args.learning_rate, Args.batch_size, ...
    experiment.params.set(Args=Args, Model=ModelArgs)

    for epoch in range(Args.batch_size // 16):
        experiment.metrics("train").log(epoch=epoch, loss=1.0 / (epoch + 1))
```

The same call shape works with `params_proto.PrefixProto` subclasses, so a CLI-configurable class can be logged with a single line.

## PyTorch Training with Checkpoints

Full MNIST training loop with parameters, metrics, structured logs, and best/final model uploads.

```python
"""PyTorch MNIST training with ML-Dash tracking."""
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from ml_dash import Experiment

class SimpleNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(784, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = x.view(-1, 784)
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        return self.fc3(x)

def train_mnist():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    batch_size, epochs, lr = 64, 5, 0.001

    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,)),
    ])
    train_ds = datasets.MNIST("./data", train=True, download=True, transform=transform)
    test_ds = datasets.MNIST("./data", train=False, transform=transform)
    train_loader = torch.utils.data.DataLoader(train_ds, batch_size=batch_size, shuffle=True)
    test_loader = torch.utils.data.DataLoader(test_ds, batch_size=batch_size)

    model = SimpleNet().to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()

    with Experiment(
        prefix="alice/computer-vision/mnist-pytorch",
        readme="MNIST classification with PyTorch",
        tags=["mnist", "pytorch"],
    ).run as experiment:
        experiment.params.set({
            "model": {"architecture": "SimpleMLP", "layers": [784, 128, 64, 10]},
            "training": {"optimizer": "adam", "learning_rate": lr,
                         "batch_size": batch_size, "epochs": epochs},
            "device": str(device),
            "dataset": "MNIST",
        })
        experiment.log(f"Training on {device}", level="info")

        best_accuracy = 0.0
        for epoch in range(epochs):
            model.train()
            train_loss, correct, total = 0.0, 0, 0
            for data, target in train_loader:
                data, target = data.to(device), target.to(device)
                optimizer.zero_grad()
                output = model(data)
                loss = criterion(output, target)
                loss.backward()
                optimizer.step()
                train_loss += loss.item()
                correct += output.argmax(dim=1).eq(target).sum().item()
                total += target.size(0)
            train_loss /= len(train_loader)
            train_acc = correct / total

            model.eval()
            val_loss, correct, total = 0.0, 0, 0
            with torch.no_grad():
                for data, target in test_loader:
                    data, target = data.to(device), target.to(device)
                    output = model(data)
                    val_loss += criterion(output, target).item()
                    correct += output.argmax(dim=1).eq(target).sum().item()
                    total += target.size(0)
            val_loss /= len(test_loader)
            val_acc = correct / total

            experiment.metrics.log(
                epoch=epoch,
                train=dict(loss=train_loss, accuracy=train_acc),
                eval=dict(loss=val_loss, accuracy=val_acc),
            )
            experiment.log(f"Epoch {epoch + 1}/{epochs}", level="info",
                           metadata={"train_loss": train_loss, "val_acc": val_acc})

            if val_acc > best_accuracy:
                best_accuracy = val_acc
                torch.save(model.state_dict(), "best_model.pth")
                experiment.files("models").save(
                    "best_model.pth",
                    description=f"Best model (accuracy: {best_accuracy:.4f})",
                    tags=["best"],
                    metadata={"epoch": epoch, "accuracy": best_accuracy},
                )

        torch.save(model.state_dict(), "final_model.pth")
        experiment.files("models").save("final_model.pth", tags=["final"])
        experiment.log("Training complete!", level="info")

if __name__ == "__main__":
    train_mnist()
```

## Hyperparameter Sweep

Grid search across configurations. Each run becomes a separate experiment for side-by-side comparison in the dashboard.

```python
"""Hyperparameter grid search."""
import random
from itertools import product
from ml_dash import Experiment

def train_with_config(lr, batch_size, experiment):
    epochs = 10
    accuracy = 0.0
    for epoch in range(epochs):
        loss = 1.0 / (epoch + 1) * (lr / 0.01) + random.uniform(-0.05, 0.05)
        accuracy = min(0.95, 0.5 + epoch * 0.05 * (32 / batch_size))
        experiment.metrics.log(epoch=epoch, train=dict(loss=loss, accuracy=accuracy))
    return accuracy

def sweep():
    results = []
    for lr, bs in product([0.1, 0.01, 0.001], [16, 32, 64]):
        run_name = f"search-lr{lr}-bs{bs}"
        with Experiment(
            prefix=f"alice/hyperparameter-search/{run_name}",
            readme=f"Grid search: lr={lr}, batch_size={bs}",
            tags=["grid-search", f"lr-{lr}", f"bs-{bs}"],
        ).run as experiment:
            experiment.params.set(learning_rate=lr, batch_size=bs,
                                  optimizer="sgd", epochs=10)
            experiment.log(f"Starting run lr={lr} bs={bs}")
            acc = train_with_config(lr, bs, experiment)
            results.append({"lr": lr, "batch_size": bs, "accuracy": acc})

    best = max(results, key=lambda r: r["accuracy"])
    print(f"Best: lr={best['lr']} bs={best['batch_size']} acc={best['accuracy']:.4f}")

if __name__ == "__main__":
    sweep()
```

## Resume an Experiment

Open the same prefix again to upsert. Metrics, parameters, and logs append rather than overwrite — useful for resuming a crashed run or extending a finished one with a second analysis pass.

```python
"""Resume an existing experiment and append more metrics."""
from ml_dash import Experiment

PREFIX = "alice/resume-demo/run-001"

# First pass: train for a few epochs.
with Experiment(prefix=PREFIX, readme="Resume demo").run as experiment:
    experiment.params.set(learning_rate=0.001, batch_size=32)
    for epoch in range(3):
        experiment.metrics("train").log(loss=1.0 / (epoch + 1), epoch=epoch)
    experiment.log("Initial run complete", level="info")

# Later (different process, same prefix): read back and continue.
with Experiment(prefix=PREFIX, readme="Resume demo - continued").run as experiment:
    prev = experiment.metrics("train").read(start_index=0, limit=1000)
    last_epoch = max(p["data"]["epoch"] for p in prev["data"])
    experiment.log(f"Resuming from epoch {last_epoch}", level="info")

    # Bump a hyperparameter and append more epochs.
    experiment.params.set(learning_rate=0.0001)
    for epoch in range(last_epoch + 1, last_epoch + 4):
        experiment.metrics("train").log(loss=0.3 / (epoch + 1), epoch=epoch)
```

`params.set()` merges into existing parameters, so the second `learning_rate` overrides the first. `metrics(...).read()` returns `data`, `total`, and `hasMore` — see [Metrics](/metrics.md).

## Project Root Pattern

For multi-experiment repos, set `RUN.project_root` once and pass `RUN.entry = __file__` in each training script. ML-Dash derives the prefix from the script's path relative to the project root — no hardcoded names.

```python
"""experiments/__init__.py — one-time setup."""
from pathlib import Path
from ml_dash import RUN

RUN.project_root = str(Path(__file__).parent)
```

```python
"""experiments/vision/resnet/train.py — auto-prefixed by file path."""
from ml_dash import RUN, Experiment
import experiments  # triggers project_root setup

# Compute prefix from this file's location relative to project_root.
# experiments/vision/resnet/train.py  ->  RUN.prefix = "vision/resnet/train"
RUN.__post_init__(entry=__file__)

with Experiment(
    prefix=f"alice/my-project/{RUN.prefix}",
    readme="Auto-prefixed from filesystem layout",
).run as experiment:
    experiment.params.set(script=RUN.entry, model="resnet50", lr=0.001)
    for epoch in range(5):
        experiment.metrics("train").log(loss=1.0 / (epoch + 1), epoch=epoch)
```

Moving `train.py` to a new directory automatically gives it a new prefix; no constants to update. The same pattern accepts a directory (e.g. one containing `sweep.jsonl`) instead of a file.

## Robotics: Timestamped Tracks and Buffered Telemetry

Log timestamped joint state on a track while accumulating per-step values into a buffer, then flush per-epoch summary statistics. Tracks store time-aligned data with a required `_ts=` timestamp; the metric buffer reduces per-step write overhead by aggregating values before logging.

```python
"""Robotics episode: joint-state track + buffered telemetry summaries."""
import numpy as np
from ml_dash import Experiment

def run_episode():
    with Experiment(
        prefix="alice/robotics/pick-and-place-001",
        readme="Pick-and-place demo with joint telemetry",
        tags=["robot", "episode"],
    ).run as experiment:
        experiment.params.set(
            robot="ur5",
            task="pick_and_place",
            control_hz=100,
        )

        steps_per_epoch = 100
        for epoch in range(5):
            for i in range(steps_per_epoch):
                step = epoch * steps_per_epoch + i
                t = step / 100.0

                # Timestamped track entry: _ts is required (float seconds).
                experiment.tracks("robot/joints").append(
                    q=[np.sin(t), np.cos(t), np.sin(2 * t), np.cos(2 * t)],
                    gripper_force=0.5 + 0.5 * np.sin(t),
                    _ts=t,
                )

                # Accumulate scalars in the buffer instead of logging every step.
                experiment.metrics("control").buffer(
                    gripper_force=0.5 + 0.5 * np.sin(t),
                    q0_abs=abs(float(np.sin(t))),
                )

                if step == 250:
                    experiment.log("Object grasped", level="info",
                                   metadata={"step": step, "t": t})

            # Flush per-epoch summary stats: logs control/gripper_force.mean,
            # control/q0_abs.mean, plus .max for each.
            experiment.metrics.buffer.log_summary("mean", "max")
            experiment.metrics("epoch").log(epoch=epoch)

        experiment.tracks.flush()
        experiment.log("Episode complete", level="info")

if __name__ == "__main__":
    run_episode()
```

See [Tracks](/tracks.md) and [Buffering](/buffering.md) for details on the APIs used here.

## Structured Logging for Debugging

Use log levels and metadata to make runs easy to triage. Filter by `level` in the dashboard.

```python
"""Training with structured debug/warn/info logs."""
import random
from ml_dash import Experiment

def train_with_debug():
    with Experiment(
        prefix="alice/debugging/debug-training",
        readme="Training with debug logging",
        tags=["debug"],
    ).run as experiment:
        experiment.params.set(learning_rate=0.001, batch_size=32, model="debug_net")
        experiment.log("Training started", level="info")
        experiment.log("Initializing model", level="debug")

        for epoch in range(5):
            experiment.log(f"Starting epoch {epoch + 1}", level="debug")
            loss = 1.0 / (epoch + 1)

            if epoch == 2:
                experiment.log(
                    "Learning rate may be too high",
                    level="warn",
                    metadata={"current_lr": 0.001, "suggested_lr": 0.0001},
                )

            if random.random() < 0.2:
                experiment.log(
                    "Gradient clipping applied",
                    level="warn",
                    metadata={"gradient_norm": 15.5, "max_norm": 10.0},
                )

            experiment.metrics("train").log(loss=loss, epoch=epoch)
            experiment.log(f"Epoch {epoch + 1} complete", level="info",
                           metadata={"loss": loss})

        experiment.log("Training complete", level="info")

if __name__ == "__main__":
    train_with_debug()
```

## See Also

- [Getting Started](/getting-started.md) — install and first experiment
- [Experiments](/experiments.md) — `Experiment` lifecycle and prefixes
- [Parameters](/parameters.md) — hyperparameter tracking
- [Metrics](/metrics.md) — time-series metrics
- [Logging](/logging.md) — structured logs with levels and metadata
- [Files](/files.md) — checkpoints and artifacts
- [Tracks](/tracks.md) — time-aligned media streams
- [Images](/images.md) — image logging and formats
- [Buffering](/buffering.md) — batched writes for high-frequency loops
- [CLI](/cli.md) — `ml-dash` command-line tools
- [API Reference](/api-reference.md) — full API surface