From 8d5f6c5798cf8387b6919c2eee9fa12e2f0c757d Mon Sep 17 00:00:00 2001
From: BatyLeo <leo.baty67@gmail.com>
Date: Wed, 1 Apr 2026 14:56:28 +0200
Subject: [PATCH 01/21] First version of VSP doc

---
 docs/src/benchmarks/vsp.md | 91 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 89 insertions(+), 2 deletions(-)

diff --git a/docs/src/benchmarks/vsp.md b/docs/src/benchmarks/vsp.md
index adcb772..89b51a8 100644
--- a/docs/src/benchmarks/vsp.md
+++ b/docs/src/benchmarks/vsp.md
@@ -2,5 +2,92 @@
 
 [`StochasticVehicleSchedulingBenchmark`](@ref).
 
-!!! warning
-    Documentation for this benchmark is still under development. Please refer to the source code and API for more details.
+The Stochastic Vehicle Scheduling Problem (StoVSP) is a stochastic combinatorial optimization benchmark. The problem consists in assigning vehicles to cover a set of scheduled tasks, minimizing base operational costs while accounting for random delays that propagate along vehicle tours.
+
+## Problem Description
+
+### Overview
+
+In the **Vehicle Scheduling Problem (VSP)**, we consider a set of tasks $V$. Each task $v\in V$ has a scheduled beginning time $t_v^b$ and a scheduled end time $t_v^e$, such that $t_v^e > t_v^b$. We denote $t^{tr}_{(u, v)}$ the travel time from task $u$ to task $v$. A task $v$ can be scheduled consecutively after another task $u$ only if we can reach it in time, i.e.,
+```math
+t_v^b \geq t_u^e + t^{tr}_{(u, v)}
+```
+
+An instance of VSP can be modeled with an acyclic directed graph where nodes are tasks and edges represent feasible successions. A solution is a set of disjoint paths such that all tasks are fulfilled exactly once to minimize total costs. The constraints matrix of this deterministic version is totally unimodular, so integrity constraints can be relaxed and the problem easily solved using a standard linear programming solver.
+
+In the **Stochastic Vehicle Scheduling Problem (StoVSP)**, we consider the same setting but after the scheduling decision is set, we observe random delays which propagate along the tours of the vehicles. The objective becomes minimizing the sum of the vehicles' base operational costs and the expected total delay costs over a finite set of scenarios $s \in S$.
+
+### Mathematical Formulation
+
+The deterministic problem can be formulated as a minimum-cost network flow problem. The stochastic version introduces scenarios that add complexities to the objective function.
+
+**Variables:**
+Let $y_{u,v} \in \{0, 1\}$ be the binary decision variable indicating if a vehicle performs task $v$ immediately after task $u$. Formally, this defines the edges of the selected disjoint paths.
+
+**Delay Propagation:**
+
+For each task $v$, we denote:
+- $\gamma_v^s \in \mathbb{R}_+$: The intrinsic delay of task $v$ in scenario $s$.
+- $d_v^s \in \mathbb{R}_+$: The total delay accumulated by task $v$ in scenario $s$.
+- $\delta_{u, v}^s = t_v^b - (t_u^e + t^{tr}_{(u, v)})$: The slack time between tasks $u$ and $v$.
+
+These quantities follow the *delay propagation equation*. When $u$ and $v$ are consecutively operated by the same vehicle ($y_{u,v} = 1$), the total delay transfers with the following dynamic:
+```math
+d_v^s = \gamma_v^s + \max(d_u^s - \delta_{u, v}^s, 0)
+```
+
+This leads to a much more difficult problem to solve since the recursive max-function breaks the total unimodularity. This makes it an excellent benchmark for Decision-Focused Learning, where predicting robust base costs that account for expectation of future delays yields superior scheduling decisions.
+
+**Objective**: Find a scheduling policy (defined by $y$) that minimizes the total cost:
+```math
+\min_{y} \quad \sum_{(u,v)} c_{u,v} y_{u,v} + \mathbb{E}_{s \in S}\left[ \sum_v C_d d_v^s \right]
+```
+where $c_{u,v}$ are the deterministic transition costs and $C_d$ is the unit penalty for delays.
+
+## Key Components
+
+### [`StochasticVehicleSchedulingBenchmark`](@ref)
+
+The main benchmark configuration with the following parameters:
+
+- `nb_tasks`: Number of tasks to schedule in each instance (default: 25)
+- `nb_scenarios`: Number of scenarios to evaluate the expected delay costs (default: 10)
+
+### Instance Generation
+
+Each problem instance is generated by simulating a geographic city landscape with depots and task locations:
+- **Tasks**: Generated with realistic scheduled start and end times respecting spatial bounds.
+- **Scenarios**: Random intrinsic delays $\gamma$ drawn from probability distributions (e.g. Log-Normal).
+- **Features**: A 20-dimensional feature vector ($d=20$) describing the tasks and network properties (spatial coordinates, start times, route density, etc.).
+
+## Benchmark Policies
+
+The benchmark provides the following baseline policies:
+
+### Deterministic Policy
+[`svs_deterministic_policy`](@ref) solves the deterministic version of the VSP using a Mixed Integer Programming (MIP) solver. It completely ignores scenario delays and slack capacities.
+
+### Sample Average Approximation (SAA)
+This approach builds a stochastic instance using a finite set of $K$ available scenarios and minimizes the empirical expected cost. Two formulations are provided:
+- **SAA (col gen)** ([`svs_saa_policy`](@ref)): Solves the stochastic MIP using a column generation algorithm.
+- **SAA (exact MIP)** ([`svs_saa_mip_policy`](@ref)): Solves the exact stochastic MIP via a compact linearized formulation.
+
+### Local Search Policy
+[`svs_local_search_policy`](@ref) begins with a heuristic initialization (usually deterministic) and iteratively explores neighboring schedules, accepting moves that improve the expected cost over the sampled scenarios.
+
+## Decision-Focused Learning Policy
+
+```math
+\xrightarrow[\text{Features}]{x_t \in \mathbb{R}^{20}}
+\fbox{Neural network $\varphi_w$}
+\xrightarrow[\text{Predicted Cost}]{\hat{c}}
+\fbox{Deterministic VSP Solver}
+\xrightarrow[\text{Paths}]{y_t}
+```
+
+**Components**:
+
+1. **Neural Network** ``\varphi_w``: A linear model (mapping 20-dimensional features to 1 scalar) predicting an adjusted edge cost ``\hat{c}_{u,v}`` for each possible assignment.
+2. **Optimization Layer (Maximizer)**: A deterministic mathematical programming solver `StochasticVechicleSchedulingMaximizer` that takes the predicted costs $\hat{c}$ and solves the easily tractable deterministic VSP to map back to a routing decision $y_t$.
+
+By training the neural network end-to-end with the combinatorial solver, the Decision-Focused Learning agent learns to produce adjusted costs $\hat{c}$ that serve as proxies, implicitly hedging against the actual stochastic delays while retaining the rapid evaluation of the deterministic solver.

From e03a7d2218945bc57f3724700ce9825b0fda365e Mon Sep 17 00:00:00 2001
From: BatyLeo <leo.baty67@gmail.com>
Date: Fri, 3 Apr 2026 16:25:04 +0200
Subject: [PATCH 02/21] wip for better documentation

---
 docs/make.jl                                  |  31 +++-
 docs/src/benchmarks/argmax.md                 |   4 -
 .../contextual_stochastic_argmax.md           |  37 ----
 docs/src/benchmarks/dvsp.md                   | 145 ----------------
 docs/src/benchmarks/dynamic/dvsp.jl           | 115 +++++++++++++
 .../benchmarks/dynamic/dynamic_assortment.jl  | 112 +++++++++++++
 docs/src/benchmarks/dynamic/maintenance.jl    | 105 ++++++++++++
 docs/src/benchmarks/dynamic_assortment.md     | 158 ------------------
 .../benchmarks/fixed_size_shortest_path.md    |   7 -
 docs/src/benchmarks/maintenance.md            | 107 ------------
 docs/src/benchmarks/portfolio_optimization.md |  15 --
 docs/src/benchmarks/ranking.md                |   4 -
 .../static/fixed_size_shortest_path.jl        |  82 +++++++++
 .../static/portfolio_optimization.jl          |  86 ++++++++++
 docs/src/benchmarks/static/ranking.jl         |  73 ++++++++
 .../src/benchmarks/static/subset_selection.jl |  77 +++++++++
 docs/src/benchmarks/static/warcraft.jl        |  89 ++++++++++
 docs/src/benchmarks/stochastic/vsp.jl         | 116 +++++++++++++
 docs/src/benchmarks/subset_selection.md       |  13 --
 docs/src/benchmarks/toy/argmax.jl             |  84 ++++++++++
 docs/src/benchmarks/toy/argmax2d.jl           |  74 ++++++++
 .../toy/contextual_stochastic_argmax.jl       | 102 +++++++++++
 docs/src/benchmarks/vsp.md                    |  93 -----------
 docs/src/benchmarks/warcraft.md               |   3 -
 ext/DFLBenchmarksPlotsExt.jl                  |   8 +
 ext/plots/argmax_plots.jl                     |  61 +++++++
 .../contextual_stochastic_argmax_plots.jl     |  56 +++++++
 ext/plots/dynamic_assortment_plots.jl         |  50 ++++++
 ext/plots/maintenance_plots.jl                |  54 ++++++
 ext/plots/portfolio_plots.jl                  |  41 +++++
 ext/plots/ranking_plots.jl                    |  35 ++++
 ext/plots/shortest_path_plots.jl              |  94 +++++++++++
 ext/plots/subset_selection_plots.jl           |  33 ++++
 .../ContextualStochasticArgmax.jl             |  10 ++
 src/ContextualStochasticArgmax/policies.jl    |  10 --
 35 files changed, 1585 insertions(+), 599 deletions(-)
 delete mode 100644 docs/src/benchmarks/argmax.md
 delete mode 100644 docs/src/benchmarks/contextual_stochastic_argmax.md
 delete mode 100644 docs/src/benchmarks/dvsp.md
 create mode 100644 docs/src/benchmarks/dynamic/dvsp.jl
 create mode 100644 docs/src/benchmarks/dynamic/dynamic_assortment.jl
 create mode 100644 docs/src/benchmarks/dynamic/maintenance.jl
 delete mode 100644 docs/src/benchmarks/dynamic_assortment.md
 delete mode 100644 docs/src/benchmarks/fixed_size_shortest_path.md
 delete mode 100644 docs/src/benchmarks/maintenance.md
 delete mode 100644 docs/src/benchmarks/portfolio_optimization.md
 delete mode 100644 docs/src/benchmarks/ranking.md
 create mode 100644 docs/src/benchmarks/static/fixed_size_shortest_path.jl
 create mode 100644 docs/src/benchmarks/static/portfolio_optimization.jl
 create mode 100644 docs/src/benchmarks/static/ranking.jl
 create mode 100644 docs/src/benchmarks/static/subset_selection.jl
 create mode 100644 docs/src/benchmarks/static/warcraft.jl
 create mode 100644 docs/src/benchmarks/stochastic/vsp.jl
 delete mode 100644 docs/src/benchmarks/subset_selection.md
 create mode 100644 docs/src/benchmarks/toy/argmax.jl
 create mode 100644 docs/src/benchmarks/toy/argmax2d.jl
 create mode 100644 docs/src/benchmarks/toy/contextual_stochastic_argmax.jl
 delete mode 100644 docs/src/benchmarks/vsp.md
 delete mode 100644 docs/src/benchmarks/warcraft.md
 create mode 100644 ext/plots/argmax_plots.jl
 create mode 100644 ext/plots/contextual_stochastic_argmax_plots.jl
 create mode 100644 ext/plots/dynamic_assortment_plots.jl
 create mode 100644 ext/plots/maintenance_plots.jl
 create mode 100644 ext/plots/portfolio_plots.jl
 create mode 100644 ext/plots/ranking_plots.jl
 create mode 100644 ext/plots/shortest_path_plots.jl
 create mode 100644 ext/plots/subset_selection_plots.jl

diff --git a/docs/make.jl b/docs/make.jl
index 4a1ec1b..b33f305 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -5,11 +5,16 @@ using Literate
 md_dir = joinpath(@__DIR__, "src")
 tutorial_dir = joinpath(@__DIR__, "src", "tutorials")
 benchmarks_dir = joinpath(@__DIR__, "src", "benchmarks")
-api_dir = joinpath(@__DIR__, "src", "api")
 
 tutorial_files = readdir(tutorial_dir)
 md_tutorial_files = [split(file, ".")[1] * ".md" for file in tutorial_files]
-benchmark_files = [joinpath("benchmarks", e) for e in readdir(benchmarks_dir)]
+
+categories = [
+    "Toy problems" => "toy",
+    "Static problems" => "static",
+    "Stochastic problems" => "stochastic",
+    "Dynamic problems" => "dynamic",
+]
 
 include_tutorial = true
 
@@ -20,6 +25,19 @@ if include_tutorial
     end
 end
 
+benchmark_sections = Pair{String,Vector{String}}[]
+
+for (label, subdir) in categories
+    dir = joinpath(benchmarks_dir, subdir)
+    jl_files = filter(f -> endswith(f, ".jl"), readdir(dir))
+    md_names = [splitext(f)[1] * ".md" for f in jl_files]
+    for file in jl_files
+        Literate.markdown(joinpath(dir, file), dir; documenter=true, execute=false)
+    end
+    md_paths = [joinpath("benchmarks", subdir, f) for f in md_names]
+    push!(benchmark_sections, label => md_paths)
+end
+
 makedocs(;
     modules=[DecisionFocusedLearningBenchmarks],
     authors="Members of JuliaDecisionFocusedLearning",
@@ -32,7 +50,7 @@ makedocs(;
             "Creating custom benchmarks" => "custom_benchmarks.md",
         ],
         "Tutorials" => include_tutorial ? md_tutorial_files : [],
-        "Benchmark problems list" => benchmark_files,
+        "Benchmarks" => benchmark_sections,
         "API reference" => "api.md",
     ],
 )
@@ -44,6 +62,13 @@ if include_tutorial
     end
 end
 
+for (_, subdir) in categories
+    dir = joinpath(benchmarks_dir, subdir)
+    for f in filter(f -> endswith(f, ".md"), readdir(dir))
+        rm(joinpath(dir, f); force=true)
+    end
+end
+
 deploydocs(;
     repo="github.com/JuliaDecisionFocusedLearning/DecisionFocusedLearningBenchmarks.jl",
     devbranch="main",
diff --git a/docs/src/benchmarks/argmax.md b/docs/src/benchmarks/argmax.md
deleted file mode 100644
index 00a5e67..0000000
--- a/docs/src/benchmarks/argmax.md
+++ /dev/null
@@ -1,4 +0,0 @@
-# Argmax
-
-!!! warning
-    Documentation for this benchmark is still under development. Please refer to the source code and API for more details.
diff --git a/docs/src/benchmarks/contextual_stochastic_argmax.md b/docs/src/benchmarks/contextual_stochastic_argmax.md
deleted file mode 100644
index 59f588f..0000000
--- a/docs/src/benchmarks/contextual_stochastic_argmax.md
+++ /dev/null
@@ -1,37 +0,0 @@
-# Contextual Stochastic Argmax
-
-[`ContextualStochasticArgmaxBenchmark`](@ref) is a minimalist contextual stochastic optimization benchmark problem.
-
-The decision maker selects one item out of ``n``. Item values are uncertain at decision time: they depend on a base utility plus a context-correlated perturbation revealed only after the decision is made. An observable context vector, correlated with the perturbation via a fixed linear map ``W``, allows the learner to anticipate the perturbation and pick the right item.
-
-## Problem Formulation
-
-**Instance**: ``c_{\text{base}} \sim \mathcal{U}[0,1]^n``, base values for ``n`` items.
-
-**Context**: ``x_{\text{raw}} \sim \mathcal{N}(0, I_d)``, a ``d``-dimensional signal correlated with item values. The feature vector passed to the model is ``x = [c_{\text{base}};\, x_{\text{raw}}] \in \mathbb{R}^{n+d}``.
-
-**Scenario**: the realized item values are
-```math
-\xi = c_{\text{base}} + W x_{\text{raw}} + \varepsilon, \quad \varepsilon \sim \mathcal{N}(0, \sigma^2 I_n)
-```
-where ``W \in \mathbb{R}^{n \times d}`` is a fixed matrix unknown to the learner.
-
-**Decision**: ``y \in \{e_1, \ldots, e_n\}`` (one-hot vector selecting one item).
-
-## Policies
-
-### DFL Policy
-
-```math
-\xrightarrow[\text{Features}]{x}
-\fbox{Neural network $\varphi_w$}
-\xrightarrow[\text{Predicted values}]{\hat{\theta}}
-\fbox{\texttt{one\_hot\_argmax}}
-\xrightarrow[\text{Decision}]{y}
-```
-
-The neural network predicts item values ``\hat{\theta} \in \mathbb{R}^n`` from the feature vector ``x \in \mathbb{R}^{n+d}``. The default architecture is `Dense(n+d => n; bias=false)`, which can exactly recover the optimal linear predictor ``[I_n \mid W]``, so a well-trained model should reach near-zero gap.
-
-### SAA Policy
-
-``y_{\text{SAA}} = \operatorname{argmax}\bigl(\frac{1}{S}\sum_s \xi^{(s)}\bigr)`` — the exact SAA-optimal decision for linear argmax, accessible via `generate_baseline_policies(bench).saa`.
diff --git a/docs/src/benchmarks/dvsp.md b/docs/src/benchmarks/dvsp.md
deleted file mode 100644
index 2282597..0000000
--- a/docs/src/benchmarks/dvsp.md
+++ /dev/null
@@ -1,145 +0,0 @@
-# Dynamic Vehicle Scheduling
-
-The Dynamic Vehicle Scheduling Problem (DVSP) is a sequential decision-making problem where an agent must dynamically dispatch vehicles to serve customers that arrive over time.
-
-## Problem Description
-
-### Overview
-
-In the dynamic vehicle scheduling problem, a fleet operator must decide at each time step which customer to serve immediately and which to postpone to future time steps.
-The goal is to serve all customers by the end of the planning horizon while minimizing total travel time.
-
-This is a simplified version of the more complex Dynamic Vehicle Routing Problem with Time Windows (DVRPTW), focusing on the core sequential decision-making aspects without capacity or time window constraints.
-
-The problem is characterized by:
-- **Exogenous noise**: customer arrivals are stochastic and follow a fixed known distribution, independent of the agent's actions
-- **Combinatorial action space**: at each time step, the agent must build vehicle routes to serve selected customers, which leads to a huge combinatorial action space
-
-### Mathematical Formulation
-
-The dynamic vehicle scheduling problem can be formulated as a finite-horizon Markov Decision Process (MDP):
-
-**State Space** ``\mathcal{S}``: At time step ``t``, the state ``s_t`` consists of:
-```math
-s_t = (R_t, D_t, t)
-```
-where:
-- ``R_t`` are the pending customer (not yet served), where each customer ``r_i \in R_t`` contains:
-  - ``x_i, y_i``: 2d spatial coordinates of the customer location
-  - ``\tau_i``: start time when the customer needs to be served
-  - ``s_i``: service time required to serve the customer
-- ``D_t`` indicates which customers must be dispatched this time step (i.e. that cannot be postponed further, otherwise they will be infeasible at the next time step because of their start time)
-- ``t \in \{1, 2, \ldots, T\}`` is the current time step
-
-The state also implicitly includes (constant over time):
-- Travel duration matrix ``d_{ij}``: time to travel from location ``i`` to location ``j``
-- Depot location
-
-**Action Space** ``\mathcal{A}(s_t)``: The action at time step ``t`` is a set of vehicle routes:
-```math
-a_t = \{r_1, r_2, \ldots, r_k\}
-```
-where each route ``r_i`` is a sequence of customer that starts and ends at the depot.
-
-A route is feasible if:
-- It starts and ends at the depot
-- It follows time constraints, i.e. customers are served on time
-
-**Transition Dynamics** ``\mathcal{P}(s_{t+1} | s_t, a_t)``: After executing routes ``a_t``:
-
-1. **Remove served customers** from the pending customer set
-2. **Generate new customer arrivals** according to the underlying exogenous distribution
-3. **Update must-dispatch set** based on postponement rules
-
-**Reward Function** ``r(s_t, a_t)``: The immediate reward is the negative total travel time of the routes:
-
-```math
-r(s_t, a_t) = - \sum_{r \in a_t} \sum_{(i,j) \in r} d_{ij}
-```
-
-where ``d_{ij}`` is the travel duration from location ``i`` to location ``j``, and the sum is over all consecutive location pairs in each route ``r``.
-
-**Objective**: Find a policy ``\pi: \mathcal{S} \to \mathcal{A}`` that maximizes expected cumulative reward:
-```math
-\max_\pi \mathbb{E}\left[\sum_{t=1}^T r(s_t, \pi(s_t)) \right]
-```
-
-## Key Components
-
-### [`DynamicVehicleSchedulingBenchmark`](@ref)
-
-The main benchmark configuration with the following parameters:
-
-- `max_requests_per_epoch`: Maximum number of new customers per time step (default: 10)
-- `Δ_dispatch`: Time delay between decision and vehicle dispatch (default: 1.0)
-- `epoch_duration`: Duration of each decision time step (default: 1.0)
-- `two_dimensional_features`: Whether to use simplified 2D features instead of full feature set (default: false)
-
-### Instance Generation
-
-Problem instances are generated from static vehicle routing datasets and include:
-
-- **Customer locations**: Spatial coordinates for pickup/delivery points
-- **Depot location**: Central starting and ending point for all routes
-- **Travel times**: Distance/duration matrix between all location pairs
-- **Service times**: Service time each customer
-
-The dynamic version samples new customer arrivals from the static instance, drawing new customers by independently sampling:
-- their locations from the set of static customer locations
-- service times, uniformly from the range of service times in the static instance
-
-### Features
-
-The benchmark provides two feature matrix representations, containing one column per postponable customer in the state:
-
-**Full Features** (27-dimensional):
-- Start times for postponable customers (1)
-- End times (start + service time) (2)
-- Travel time from depot to customer (3)
-- Travel time from customer to depot (4)
-- Slack time until next time step (5)
-- % of must-dispatch customers that can reach this customer on time (6)
-- % of customers reachable from this customer on time (7)
-- % of customers that can reach this customer on time (8)
-- % of customers reachable or that can reach this customer on time (9)
-- Quantile-based travel times to other customers (9 quantiles) (10-18)
-- Quantiles of % of reachable new customers (9 quantiles) (19-27)
-
-**2D Features** (simplified):
-- Travel time from depot to customer (1)
-- Mean travel time to other customers (2)
-
-## Benchmark Policies
-
-### Lazy Policy
-
-The lazy policy postpones all possible customers, serving only those that must be dispatched.
-
-### Greedy Policy  
-
-The greedy policy serves all pending customers as soon as they arrive, without considering future consequences. 
-
-## Decision-Focused Learning Policy
-
-```math
-\xrightarrow[\text{State}]{s_t}
-\fbox{Neural network $\varphi_w$}
-\xrightarrow[\text{Prizes}]{\theta}
-\fbox{Prize-collecting VSP}
-\xrightarrow[\text{Routes}]{a_t}
-```
-
-**Components**:
-
-1. **Neural Network** ``\varphi_w``: Takes current state features as input and predicts customer prizes ``\theta = (\theta_1, \ldots, \theta_n)``, one value per postponable customer.
-2. **Optimization Layer**: Solves the prize-collecting vehicle scheduling problem to determine optimal routes given the predicted prizes, by maximizing total collected prizes minus travel costs:
-    ```math
-    \max_{a_t\in \mathcal{A}(s_t)} \sum_{r \in a_t} \left( \sum_{i \in r} \theta_i - \sum_{(i,j) \in r} d_{ij} \right)
-    ```
-    This can be modeled as a flow linear program on a directed acyclic graph (DAG) and is solved using standard LP solvers.
-
-The neural network architecture adapts to the feature dimensionality:
-- **2D features**: `Dense(2 => 1)`, applied in parallel to each postponable customer
-- **Full features**: `Dense(27 => 1)` applied in parallel to each postponable customer
-
-**Note:** one can also use more complex architectures such as a deeper MLP or a graph neural network for better performance.
diff --git a/docs/src/benchmarks/dynamic/dvsp.jl b/docs/src/benchmarks/dynamic/dvsp.jl
new file mode 100644
index 0000000..9c86f5e
--- /dev/null
+++ b/docs/src/benchmarks/dynamic/dvsp.jl
@@ -0,0 +1,115 @@
+# # Dynamic Vehicle Scheduling
+# Dispatch vehicles to customers arriving over time: at each step the agent decides which
+# customers to serve now and which to postpone, minimizing total travel cost.
+
+using DecisionFocusedLearningBenchmarks
+using Plots
+
+b = DynamicVehicleSchedulingBenchmark()
+
+# ## A sample episode
+#
+# Generate one environment and roll it out with the greedy policy (serves all pending
+# customers immediately):
+policies = generate_baseline_policies(b)
+env = generate_environments(b, 1)[1]
+_, trajectory = evaluate_policy!(policies.greedy, env)
+
+# One step: depot (green square), must-dispatch customers (red stars; deadline reached),
+# postponable customers (blue triangles), vehicle routes (lines):
+plot_solution(b, trajectory[1])
+
+# Multiple steps side by side — customers accumulate and routes change over time:
+plot_trajectory(b, trajectory[1:min(3, length(trajectory))])
+
+# ## DFL pipeline components
+
+# The DFL agent chains two components: a neural network predicting a prize per customer:
+model = generate_statistical_model(b)     # Dense(27 → 1) per customer: state features → prize
+# and a maximizer selecting routes that balance collected prizes against travel costs:
+maximizer = generate_maximizer(b)         # prize-collecting VSP solver
+
+# At each step, the model assigns a prize to each postponable customer. The solver then
+# selects routes maximizing collected prizes minus travel costs, deciding which customers
+# to serve now and which to defer.
+
+# ---
+# ## Problem Description
+#
+# ### Overview
+#
+# In the **Dynamic Vehicle Scheduling Problem (DVSP)**, a fleet operator must decide at
+# each time step which customers to serve immediately and which to postpone. The goal is
+# to serve all customers by end of the planning horizon while minimizing total travel time.
+#
+# The problem is characterized by:
+# - **Exogenous noise**: customer arrivals are stochastic and follow a fixed distribution
+# - **Combinatorial action space**: routes are built over a large set of customers
+#
+# ### Mathematical Formulation
+#
+# **State** ``s_t = (R_t, D_t, t)`` where:
+# - ``R_t``: pending customers, each with coordinates, start time, service time
+# - ``D_t``: must-dispatch customers (cannot be postponed further)
+# - ``t``: current time step
+#
+# **Action** ``a_t``: a set of vehicle routes ``\{r_1, r_2, \ldots, r_k\}``, each starting
+# and ending at the depot, satisfying time constraints.
+#
+# **Reward:**
+# ```math
+# r(s_t, a_t) = -\sum_{r \in a_t} \sum_{(i,j) \in r} d_{ij}
+# ```
+#
+# **Objective:**
+# ```math
+# \max_\pi \; \mathbb{E}\!\left[\sum_{t=1}^T r(s_t, \pi(s_t))\right]
+# ```
+#
+# ## Key Components
+#
+# ### [`DynamicVehicleSchedulingBenchmark`](@ref)
+#
+# | Parameter | Description | Default |
+# |-----------|-------------|---------|
+# | `max_requests_per_epoch` | Maximum new customers per time step | 10 |
+# | `Δ_dispatch` | Time delay between decision and dispatch | 1.0 |
+# | `epoch_duration` | Duration of each time step | 1.0 |
+# | `two_dimensional_features` | Use 2D instead of full 27D features | `false` |
+#
+# ### Features
+#
+# **Full features (27D per customer):** start/end times, depot travel times, slack,
+# reachability ratios, quantile-based travel times to other customers.
+#
+# **2D features:** travel time from depot + mean travel time to others.
+#
+# ## Baseline Policies
+#
+# | Policy | Description |
+# |--------|-------------|
+# | Lazy | Postpones all possible customers; serves only must-dispatch |
+# | Greedy | Serves all pending customers immediately |
+#
+# ## DFL Policy
+#
+# ```math
+# \xrightarrow[\text{State}]{s_t}
+# \fbox{Neural network $\varphi_w$}
+# \xrightarrow[\text{Prizes}]{\theta}
+# \fbox{Prize-collecting VSP}
+# \xrightarrow[\text{Routes}]{a_t}
+# ```
+#
+# The neural network predicts a prize ``\theta_i`` for each postponable customer.
+# The prize-collecting VSP solver then maximizes collected prizes minus travel costs:
+# ```math
+# \max_{a_t \in \mathcal{A}(s_t)} \sum_{r \in a_t} \left(\sum_{i \in r} \theta_i - \sum_{(i,j) \in r} d_{ij}\right)
+# ```
+#
+# **Model:**
+# - 2D features: `Dense(2 → 1)` applied independently per customer
+# - Full features: `Dense(27 → 1)` applied independently per customer
+#
+# !!! note "Reference"
+#     TODO: add original reference.
diff --git a/docs/src/benchmarks/dynamic/dynamic_assortment.jl b/docs/src/benchmarks/dynamic/dynamic_assortment.jl
new file mode 100644
index 0000000..9d00d3c
--- /dev/null
+++ b/docs/src/benchmarks/dynamic/dynamic_assortment.jl
@@ -0,0 +1,112 @@
+# # Dynamic Assortment
+# Select which K items to offer at each step to maximize revenue: customer preferences
+# evolve dynamically based on purchase history (hype and saturation effects).
+
+using DecisionFocusedLearningBenchmarks
+using Plots
+
+b = DynamicAssortmentBenchmark()
+
+# ## A sample episode
+#
+# Generate one environment and roll out with the greedy policy (offers the K highest-priced
+# items at every step):
+policies = generate_baseline_policies(b)
+env = generate_environments(b, 1)[1]
+_, trajectory = evaluate_policy!(policies.greedy, env)
+
+# One step: bar chart of item prices, green = items in the offered assortment:
+plot_solution(b, trajectory[1])
+
+# A few steps side by side (prices are fixed; assortment composition changes over time):
+plot_trajectory(b, trajectory[1:min(4, length(trajectory))])
+
+# ## DFL pipeline components
+
+# The DFL agent chains two components: a neural network predicting utility scores per item:
+model = generate_statistical_model(b)     # MLP: state features → predicted utility per item
+# and a maximizer offering the K items with the highest predicted utilities:
+maximizer = generate_maximizer(b)         # top-K selection by predicted utility
+
+# At each step, the model maps the current state (prices, hype, saturation, history) to a
+# utility score per item. The maximizer selects the K items with the highest scores.
+
+# ---
+# ## Problem Description
+#
+# ### Overview
+#
+# In the **Dynamic Assortment problem**, a retailer has ``N`` items and must select
+# ``K`` to offer at each time step. Customer preferences evolve based on purchase history
+# through **hype** (recent purchases increase demand) and **saturation** (repeated
+# purchases slightly decrease demand).
+#
+# ### Mathematical Formulation
+#
+# **State** ``s_t = (p, f, h_t, \sigma_t, t, \mathcal{H}_t)`` where:
+# - ``p``: fixed item prices
+# - ``f``: static item features
+# - ``h_t, \sigma_t``: current hype and saturation levels
+# - ``t``: current time step
+# - ``\mathcal{H}_t``: purchase history (last 5 purchases)
+#
+# **Action:** ``a_t \subseteq \{1,\ldots,N\}`` with ``|a_t| = K``
+#
+# **Customer choice** (multinomial logit):
+# ```math
+# \mathbb{P}(i \mid a_t, s_t) = \frac{\exp(\theta_i(s_t))}{\sum_{j \in a_t} \exp(\theta_j(s_t)) + 1}
+# ```
+#
+# **Transition dynamics:**
+# - Hype: ``h_{t+1}^{(i)} = h_t^{(i)} \times m^{(i)}`` where the multiplier reflects recent purchases
+# - Saturation: increases by ×1.01 for the purchased item
+#
+# **Reward:** ``r(s_t, a_t) = p_{i^\star}`` (price of the purchased item, 0 if no purchase)
+#
+# **Objective:**
+# ```math
+# \max_\pi \; \mathbb{E}\!\left[\sum_{t=1}^T r(s_t, \pi(s_t))\right]
+# ```
+#
+# ## Key Components
+#
+# ### [`DynamicAssortmentBenchmark`](@ref)
+#
+# | Parameter | Description | Default |
+# |-----------|-------------|---------|
+# | `N` | Number of items in catalog | 20 |
+# | `d` | Static feature dimension per item | 2 |
+# | `K` | Assortment size | 4 |
+# | `max_steps` | Steps per episode | 80 |
+# | `exogenous` | Whether dynamics are exogenous | `false` |
+#
+# ### State Observation
+#
+# Agents observe a ``(d+8) \times N`` normalized feature matrix per step containing:
+# current prices, hype, saturation, static features, change in hype/saturation from
+# previous step and from initial state, and normalized time step.
+#
+# ## Baseline Policies
+#
+# | Policy | Description |
+# |--------|-------------|
+# | Expert | Brute-force enumeration of all ``\binom{N}{K}`` subsets; optimal but slow |
+# | Greedy | Selects the ``K`` items with highest prices |
+#
+# ## DFL Policy
+#
+# ```math
+# \xrightarrow[\text{State}]{s_t}
+# \fbox{Neural network $\varphi_w$}
+# \xrightarrow[\text{Utilities}]{\theta \in \mathbb{R}^N}
+# \fbox{Top-K}
+# \xrightarrow[\text{Assortment}]{a_t}
+# ```
+#
+# **Model:** `Chain(Dense(d+8 → 5), Dense(5 → 1), vec)` — predicts one utility score
+# per item from the current state features.
+#
+# **Maximizer:** `TopKMaximizer(K)` — selects the top ``K`` items by predicted utility.
+#
+# !!! note "Reference"
+#     [Structured Reinforcement Learning for Combinatorial Decision-Making](https://arxiv.org/abs/2505.19053)
diff --git a/docs/src/benchmarks/dynamic/maintenance.jl b/docs/src/benchmarks/dynamic/maintenance.jl
new file mode 100644
index 0000000..a9205de
--- /dev/null
+++ b/docs/src/benchmarks/dynamic/maintenance.jl
@@ -0,0 +1,105 @@
+# # Maintenance
+# Decide which components to maintain at each step to minimize failure and maintenance costs:
+# components degrade stochastically and the agent has limited maintenance capacity.
+
+using DecisionFocusedLearningBenchmarks
+using Plots
+
+b = MaintenanceBenchmark(; N=5, K=2)  # 5 components, maintain up to 2 per step
+
+# ## A sample episode
+#
+# Generate one environment and roll out with the greedy policy (maintains the most degraded
+# components up to capacity):
+policies = generate_baseline_policies(b)
+env = generate_environments(b, 1)[1]
+_, trajectory = evaluate_policy!(policies.greedy, env)
+
+# One step: bars show degradation levels (1 = new, n = failed), green = maintained, red = failed:
+plot_solution(b, trajectory[1])
+
+# A few steps side by side showing degradation evolving over time:
+plot_trajectory(b, trajectory[1:min(4, length(trajectory))])
+
+# ## DFL pipeline components
+
+# The DFL agent chains two components: a neural network predicting urgency scores per component:
+model = generate_statistical_model(b)     # two-layer MLP: degradation state → urgency scores
+# and a maximizer selecting the most urgent components for maintenance:
+maximizer = generate_maximizer(b)         # top-K selection among components with positive scores
+
+# At each step, the model maps the current degradation state to an urgency score per component.
+# The maximizer selects up to K components with the highest positive scores for maintenance.
+
+# ---
+# ## Problem Description
+#
+# ### Overview
+#
+# In the **Maintenance benchmark**, a system has ``N`` identical components, each with
+# ``n`` discrete degradation states (1 = new, ``n`` = failed). At each step, the agent
+# can maintain up to ``K`` components. Maintained components are reset to state 1.
+# Unmaintained components degrade stochastically.
+#
+# ### Mathematical Formulation
+#
+# **State** ``s_t \in \{1,\ldots,n\}^N``: degradation level of each component.
+#
+# **Action** ``a_t \subseteq \{1,\ldots,N\}`` with ``|a_t| \leq K``
+#
+# **Transition dynamics:** For each component ``i``:
+# - If maintained: ``s_{t+1}^i = 1``
+# - If not maintained: ``s_{t+1}^i = \min(s_t^i + 1, n)`` with probability ``p``, else ``s_t^i``
+#
+# **Cost:**
+# ```math
+# c(s_t, a_t) = c_m \cdot |a_t| + c_f \cdot \#\{i : s_t^i = n\}
+# ```
+#
+# **Objective:**
+# ```math
+# \min_\pi \; \mathbb{E}\!\left[\sum_{t=1}^T c(s_t, \pi(s_t))\right]
+# ```
+#
+# ## Key Components
+#
+# ### [`MaintenanceBenchmark`](@ref)
+#
+# | Parameter | Description | Default |
+# |-----------|-------------|---------|
+# | `N` | Number of components | 2 |
+# | `K` | Max simultaneous maintenance operations | 1 |
+# | `n` | Degradation levels per component | 3 |
+# | `p` | Degradation probability per step | 0.2 |
+# | `c_f` | Failure cost per failed component | 10.0 |
+# | `c_m` | Maintenance cost per maintained component | 3.0 |
+# | `max_steps` | Steps per episode | 80 |
+#
+# ### Instance Generation
+#
+# Each instance has random starting degradation states uniformly drawn from ``\{1,\ldots,n\}``.
+#
+# ## Baseline Policies
+#
+# | Policy | Description |
+# |--------|-------------|
+# | Greedy | Maintains components in the last degradation state before failure, up to capacity |
+#
+# ## DFL Policy
+#
+# ```math
+# \xrightarrow[\text{State}]{s_t \in \{1,\ldots,n\}^N}
+# \fbox{Neural network $\varphi_w$}
+# \xrightarrow[\text{Scores}]{\theta \in \mathbb{R}^N}
+# \fbox{Top-K (positive)}
+# \xrightarrow[\text{Maintenance}]{a_t}
+# ```
+#
+# **Model:** `Chain(Dense(N → N), Dense(N → N), vec)` — two-layer MLP predicting one
+# urgency score per component.
+#
+# **Maximizer:** `TopKPositiveMaximizer(K)` — selects the ``K`` components with the
+# highest positive scores for maintenance.
+#
+# !!! note "Reference"
+#     TODO: add original reference.
diff --git a/docs/src/benchmarks/dynamic_assortment.md b/docs/src/benchmarks/dynamic_assortment.md
deleted file mode 100644
index 6f5264c..0000000
--- a/docs/src/benchmarks/dynamic_assortment.md
+++ /dev/null
@@ -1,158 +0,0 @@
-# Dynamic Assortment
-
-The Dynamic Assortment problem is a sequential decision-making benchmark where an agent must repeatedly select which subset of items to offer to customers over time. The goal is to maximize total revenue while accounting for dynamic customer preferences that evolve based on purchase history.
-
-## Problem Description
-
-### Overview
-
-In the dynamic assortment problem, a retailer has access to a catalog of ``N`` items and must decide which subset of exactly ``K`` items to offer to customers at each time step. Customers make purchasing decisions according to a choice model that depends on public features ``x``:
-
-- **Item prices**: Fixed monetary cost of each item
-- **Item features**: Static characteristics of each item (size ``d``)
-- **Hype**: Dynamic popularity that increases when items are purchased recently, and decays over time if not purchased
-- **Saturation**: Dynamic measure that slightly increases when specific items are purchased
-
-Both hype and saturation evolve over time based on the agent's assortment decisions and customer purchases, this providing an endogenous multistage stochastic optimization problem.
-
-### Mathematical Formulation
-
-The dynamic assortment problem can be formulated as a finite-horizon Markov Decision Process (MDP) with the following components:
-
-**State Space** ``\mathcal{S}``: At time step ``t``, the state ``s_t`` consists of:
-```math
-s_t = (p, f, h_t, \sigma_t, t, \mathcal{H}_t)
-```
-where:
-- ``p \in \mathbb{R}^N`` are the fixed item prices
-- ``f \in \mathbb{R}^{d \times N}`` are the static item features
-- ``h_t \in \mathbb{R}^N`` are the current hype levels for each item
-- ``\sigma_t \in \mathbb{R}^N`` are the current saturation levels for each item
-- ``t \in \{1, 2, \ldots, T\}`` is the current time step
-- ``\mathcal{H}_t`` is the purchase history (last 5 purchases)
-
-**Action Space** ``\mathcal{A}``: The action at time ``t`` is an assortment selection:
-```math
-a_t \subseteq \{1, 2, \ldots, N\} \text{ such that } |a_t| = K
-```
-
-**Customer Choice Model**: Given assortment ``a_t``, customers choose according to a multinomial logit model:
-```math
-\forall i\in a_t,\, \mathbb{P}(i | a_t, s_t) = \frac{\exp(\theta_i(s_t))}{\sum_{j\in a_t} \exp(\theta_j(s_t)) + 1}
-```
-```math
-\mathbb{P}(\text{no purchase} | a_t, s_t) = \frac{1}{\sum_{j\in a_t} \exp(\theta_j(s_t)) + 1}
-```
-
-where ``\theta_i(s_t)`` is the utility of item ``i`` at state ``s_t``, computed by a hidden utility function:
-```math
-\theta_i(s_t) = \Phi(p_i, h_t^{(i)}, \sigma_t^{(i)}, f_{\cdot,i})
-```
-
-**Transition Dynamics** ``\mathcal{P}(s_{t+1} | s_t, a_t)``: After selecting assortment ``a_t`` and observing customer choice ``i^\star \sim \mathbb{P}(\cdot | a_t, s_t)``, the state evolves as:
-
-1. **Hype Update**: For each item ``i``, compute a hype multiplier based on recent purchase history:
-   ```math
-   m^{(i)} = 1 + \sum_{k=1}^{\min(5, |\mathcal{H}_t|)} \mathbf{1}_{i = \mathcal{H}_t[-k]} \cdot \alpha_k
-   ```
-   where ``\mathcal{H}_t[-k]`` is the ``k``-th most recent purchase, and the factors are:
-   ```math
-   \alpha_1 = 0.02, \quad \alpha_2 = \alpha_3 = \alpha_4 = \alpha_5 = -0.005
-   ```
-   Then update: ``h_{t+1}^{(i)} = h_t^{(i)} \times m^{(i)}``
-
-2. **Saturation Update**:
-   ```math
-   \sigma_{t+1}^{(i)} = \begin{cases}
-   \sigma_t^{(i)} \times 1.01 & \text{if } i = i^\star \\
-   \sigma_t^{(i)} & \text{otherwise}
-   \end{cases}
-   ```
-
-3. **History Update**: ``\mathcal{H}_{t+1} = \text{append}(\mathcal{H}_t, i^\star)`` (keeping last 5 purchases)
-
-**Reward Function** ``r(s_t, a_t, s_{t+1})``: The immediate reward is the revenue from the customer's purchase:
-```math
-r(s_t, a_t, s_{t+1}) = \begin{cases}
-p_{i^\star} & \text{if customer purchases item } i^\star \\
-0 & \text{if no purchase}
-\end{cases}
-```
-
-**Objective**: Find a policy ``\pi: \mathcal{S} \to \mathcal{A}`` that maximizes the expected cumulative reward:
-```math
-\max_\pi \mathbb{E}\left[\sum_{t=1}^T r(s_t, \pi(s_t), s_{t+1}) \right]
-```
-
-**Terminal Condition**: The episode terminates after ``T`` time steps, with no terminal reward.
-
-## Key Components
-
-### [`DynamicAssortmentBenchmark`](@ref)
-
-The main benchmark configuration with the following parameters:
-
-- `N`: Number of items in the catalog (default: 20)
-- `d`: Dimension of static feature vectors (default: 2) 
-- `K`: Assortment size constraint (default: 4)
-- `max_steps`: Number of time steps per episode (default: 80)
-- `customer_choice_model`: linear mapping from features to utilities
-- `exogenous`: Whether dynamics are exogenous (default: false)
-
-### Instance Generation
-
-Each problem instance includes:
-
-- **Prices**: Random values in [1, 10] for each item, plus 0 for no-purchase
-- **Features**: Random static features in [1, 10] for each item
-- **Initial State**: Random starting hype and saturation values in [1, 10]
-
-### Environment Dynamics
-
-The environment tracks:
-- Current time step
-- Purchase history (last 5 purchases)
-- Current hype and saturation for each item  
-- Customer utilities computed from current state
-
-**State Observation**: Agents observe a normalized feature vector containing:
-- Current full features (prices, hype, saturation, static features)
-- Change in hype/saturation from previous step
-- Change in hype/saturation from initial state  
-- Normalized current time step
-
-All features are divided by 10 for normalization.
-
-## Benchmark Policies
-
-### Expert Policy
-
-The expert policy computes the optimal assortment by brute-force enumeration:
-1. Enumerate all possible K-subsets of the N items
-2. For each subset, compute expected revenue using the choice model
-3. Return the subset with highest expected revenue
-
-This provides an optimal baseline but is computationally expensive.
-
-### Greedy Policy  
-
-The greedy policy selects the K items with the highest prices, ignoring dynamic effects and customer preferences. This provides a simple baseline.
-
-## Decision-Focused Learning Policy
-
-```math
-\xrightarrow[\text{State}]{s_t}
-\fbox{Neural network $\varphi_w$}
-\xrightarrow[\text{Cost vector}]{\theta}
-\fbox{Top K}
-\xrightarrow[\text{Assortment}]{a_t}
-```
-
-**Components**:
-
-1. **Neural Network** ``\varphi_w``: Takes the current state ``s_t`` as input and predicts item utilities ``\theta = (\theta_1, \ldots, \theta_N)``
-2. **Optimization Layer**: Selects the top ``K`` items with highest predicted utilities to form the assortment ``a_t``
-
-## Reference
-
-Based on the paper: [Structured Reinforcement Learning for Combinatorial Decision-Making](https://arxiv.org/abs/2505.19053)
diff --git a/docs/src/benchmarks/fixed_size_shortest_path.md b/docs/src/benchmarks/fixed_size_shortest_path.md
deleted file mode 100644
index 049724d..0000000
--- a/docs/src/benchmarks/fixed_size_shortest_path.md
+++ /dev/null
@@ -1,7 +0,0 @@
-# Shortest paths
-
-[`FixedSizeShortestPathBenchmark`](@ref) is a benchmark problem that consists of finding the shortest path in a grid graph between the top left and bottom right corners.
-In this benchmark, the grid size is the same for all instances.
-
-!!! warning
-    Documentation for this benchmark is still under development. Please refer to the source code and API for more details.
\ No newline at end of file
diff --git a/docs/src/benchmarks/maintenance.md b/docs/src/benchmarks/maintenance.md
deleted file mode 100644
index 060099d..0000000
--- a/docs/src/benchmarks/maintenance.md
+++ /dev/null
@@ -1,107 +0,0 @@
-# Maintenance problem with resource constraint
-
-The Maintenance problem with resource constraint is a sequential decision-making benchmark where an agent must repeatedly decide which components to maintain over time. The goal is to minimize total expected cost while accounting for independent degradation of components and limited maintenance capacity.
-
-
-## Problem Description
-
-### Overview
-
-In this benchmark, a system consists of ``N`` identical components, each of which can degrade over ``n`` discrete states. State ``1`` means that the component is new, state $n$ means that the component is failed. At each time step, the agent can maintain up to $K$ components.  
-
-This forms an endogenous multistage stochastic optimization problem, where the agent must plan maintenance actions over the horizon.
-
-### Mathematical Formulation
-
-The maintenance problem can be formulated as a finite-horizon Markov Decision Process (MDP) with the following components:
-
-**State Space** ``\mathcal{S}``: At time step ``t``, the state ``s_t \in [1:n]^N`` is the degradation state for each component.
-
-**Action Space** ``\mathcal{A}``: The action at time ``t`` is the set of components that are maintained at time ``t``:
-```math
-a_t \subseteq \{1, 2, \ldots, N\} \text{ such that } |a_t| \leq K
-```
-### Transition Dynamics
-
-The state transitions depend on whether a component is maintained or not:
-
-For each component \(i\) at time \(t\):
-
-- **Maintained component** (\(i \in a_t\)):
-
-\[
-s_{t+1}^i = 1 \quad \text{(perfect maintenance)}
-\]
-
-- **Unmaintained component** (\(i \notin a_t\)):
-
-\[
-s_{t+1}^i =
-\begin{cases}
-\min(s_t^i + 1, n) & \text{with probability } p,\\
-s_t^i & \text{with probability } 1-p.
-\end{cases}
-\]
-
-Here, \(p\) is the degradation probability, \(s_t^i\) is the current state of component \(i\), and \(n\) is the maximum (failed) state.
-
----
-
-### Cost Function
-
-The immediate cost at time \(t\) is:
-
-```math
-c(s_t, a_t) = \Big( c_m \cdot |a_t| + c_f \cdot \#\{ i : s_t^i = n \} \Big)
-```
-
-Where:
-
-- $c_m$ is the maintenance cost per component.  
-- $|a_t|$ is the number of components maintained.  
-- $c_f$ is the failure cost per failed component.  
-- $\#\{ i : s_t^i = n \}$ counts the number of components in the failed state.
-
-This formulation captures the total cost for maintaining components and penalizing failures.
-
-**Objective**: Find a policy $\pi: \mathcal{S} \to \mathcal{A}$ that minimizes the expected cumulative cost:
-```math
-\min_\pi \mathbb{E}\left[\sum_{t=1}^T c(s_t, \pi(s_t)) \right]
-```
-
-**Terminal Condition**: The episode terminates after $T$ time steps, with no terminal reward.
-
-## Key Components
-
-### [`MaintenanceBenchmark`](@ref)
-
-The main benchmark configuration with the following parameters:
-
-- `N`: number of components (default: 2)
-- `K`: maximum number of components that can be maintained simultaneously (default: 1) 
-- `n`: number of degradation states per component (default: 3)
-- `p`: degradation probability (default: 0.2)
-- `c_f`: failure cost (default: 10.0)
-- `c_m`: maintenance cost (default: 3.0)
-- `max_steps`: Number of time steps per episode (default: 80)
-
-### Instance Generation
-
-Each problem instance includes:
-
-- **Starting State**: Random starting degradation state in $[1,n]$ for each components.
-
-### Environment Dynamics
-
-The environment tracks:
-- Current time step
-- Current degradation state.
-
-**State Observation**: Agents observe a normalized feature vector containing the degradation state of each component.
-
-## Benchmark Policies
-
-### Greedy Policy  
-
-Greedy policy that maintains components in the last two degradation states, up to the maintenance capacity. This provides a simple baseline.
-
diff --git a/docs/src/benchmarks/portfolio_optimization.md b/docs/src/benchmarks/portfolio_optimization.md
deleted file mode 100644
index da14f5a..0000000
--- a/docs/src/benchmarks/portfolio_optimization.md
+++ /dev/null
@@ -1,15 +0,0 @@
-# Portfolio Optimization
-
-[`PortfolioOptimizationBenchmark`](@ref) is a Markovitz portfolio optimization problem, where asset prices are unknown, and only contextual data is available to predict these prices.
-The goal is to predict asset prices $c$ and maximize the expected return of a portfolio, subject to a risk constraint using this maximization program:
-```math
-\begin{aligned}
-\max\quad & c^\top x\\
-\text{s.t.}\quad & x^\top \Sigma x \leq \gamma\\
-& 1^\top x \leq 1\\
-& x \geq 0
-\end{aligned}
-```
-
-!!! warning
-    Documentation for this benchmark is still under development. Please refer to the source code and API for more details.
\ No newline at end of file
diff --git a/docs/src/benchmarks/ranking.md b/docs/src/benchmarks/ranking.md
deleted file mode 100644
index b0069e4..0000000
--- a/docs/src/benchmarks/ranking.md
+++ /dev/null
@@ -1,4 +0,0 @@
-# Ranking
-
-!!! warning
-    Documentation for this benchmark is still under development. Please refer to the source code and API for more details.
\ No newline at end of file
diff --git a/docs/src/benchmarks/static/fixed_size_shortest_path.jl b/docs/src/benchmarks/static/fixed_size_shortest_path.jl
new file mode 100644
index 0000000..8a6779d
--- /dev/null
+++ b/docs/src/benchmarks/static/fixed_size_shortest_path.jl
@@ -0,0 +1,82 @@
+# # Shortest Path
+# Find the cheapest path from the top-left to the bottom-right of a grid graph:
+# edge costs are unknown and must be predicted from instance features.
+
+using DecisionFocusedLearningBenchmarks
+using Plots
+
+b = FixedSizeShortestPathBenchmark()
+
+# ## A training sample
+#
+# Each sample is a labeled triple `(x, θ, y)`:
+# - `x`: instance feature vector (observable at train and test time)
+# - `θ`: true edge costs (training supervision only, hidden at test time)
+# - `y`: path indicator vector (`y[e] = 1` if edge `e` is on the optimal path)
+#
+# True edge costs θ, averaged per vertex for display (hidden at test time — the model observes only `x`):
+dataset = generate_dataset(b, 50; seed=0)
+sample = first(dataset)
+plot_instance(b, sample)
+
+# Left: edge costs. Right: optimal path (white dots):
+plot_solution(b, sample)
+
+# ## Untrained policy
+
+# A DFL policy chains two components: a statistical model predicting edge costs:
+model = generate_statistical_model(b)     # linear map: features → predicted edge costs
+# and a maximizer finding the shortest path given those costs:
+maximizer = generate_maximizer(b)         # Dijkstra shortest path on the grid graph
+
+# A randomly initialized policy predicts arbitrary costs, yielding a near-straight path:
+θ_pred = model(sample.x)
+plot_solution(b, DataSample(; sample.context..., x=sample.x, θ=θ_pred, y=maximizer(θ_pred)))
+
+# Optimality gap on the dataset (0 = optimal, higher is worse):
+compute_gap(b, dataset, model, maximizer)
+
+# ---
+# ## Problem Description
+#
+# A **fixed-size grid shortest path** problem. The graph is a directed acyclic grid of
+# size ``(\text{rows} \times \text{cols})``, with edges pointing right and downward.
+# Edge costs ``\theta \in \mathbb{R}^E`` are unknown; only a feature vector
+# ``x \in \mathbb{R}^p`` is observed. The task is to find the minimum-cost path from
+# vertex 1 (top-left) to vertex ``V`` (bottom-right):
+# ```math
+# y^* = \mathrm{argmin}_{y \in \mathcal{P}} \; \theta^\top y
+# ```
+# where ``y \in \{0,1\}^E`` indicates selected edges and ``\mathcal{P}`` is the set of
+# valid source-to-sink paths.
+#
+# Data is generated following the process in
+# [Mandi et al., 2023](https://arxiv.org/abs/2307.13565).
+#
+# ## Key Parameters
+#
+# | Parameter | Description | Default |
+# |-----------|-------------|---------|
+# | `grid_size` | Grid dimensions `(rows, cols)` | `(5, 5)` |
+# | `p` | Feature dimension | 5 |
+# | `deg` | Polynomial degree for cost generation | 1 |
+# | `ν` | Multiplicative noise level (0 = no noise) | 0.0 |
+#
+# ## DFL Policy
+#
+# ```math
+# \xrightarrow[\text{Features}]{x \in \mathbb{R}^p}
+# \fbox{Linear model}
+# \xrightarrow[\text{Predicted costs}]{\hat{\theta} \in \mathbb{R}^E}
+# \fbox{Dijkstra / Bellman-Ford}
+# \xrightarrow[\text{Path}]{y \in \{0,1\}^E}
+# ```
+#
+# **Model:** `Chain(Dense(p → E))` — predicts one cost per edge.
+#
+# **Maximizer:** Dijkstra (default) or Bellman-Ford on negated weights to find the
+# longest (maximum-weight) path.
+#
+# !!! note "Reference"
+#     Mandi et al. (2023), Decision-Focused Learning: Foundations, State of the Art, Benchmark and Future Opportunities.
+#     [arXiv:2307.13565](https://arxiv.org/abs/2307.13565)
diff --git a/docs/src/benchmarks/static/portfolio_optimization.jl b/docs/src/benchmarks/static/portfolio_optimization.jl
new file mode 100644
index 0000000..d7e7df0
--- /dev/null
+++ b/docs/src/benchmarks/static/portfolio_optimization.jl
@@ -0,0 +1,86 @@
+# # Portfolio Optimization
+# Allocate wealth across assets to maximize expected return subject to a risk constraint:
+# asset returns are unknown and must be predicted from contextual features.
+
+using DecisionFocusedLearningBenchmarks
+using Plots
+
+b = PortfolioOptimizationBenchmark()
+
+# ## A training sample
+#
+# Each sample is a labeled triple `(x, θ, y)`:
+# - `x`: contextual feature vector (observable at train and test time)
+# - `θ`: true expected asset returns (training supervision only, hidden at test time)
+# - `y`: optimal portfolio weights solving the Markowitz QP given `θ`
+#
+# True expected returns θ (hidden at test time — the model observes only the feature vector `x`):
+dataset = generate_dataset(b, 20; seed=0)
+sample = first(dataset)
+plot_instance(b, sample)
+
+# Left: true returns θ. Right: optimal portfolio weights y:
+plot_solution(b, sample)
+
+# ## Untrained policy
+
+# A DFL policy chains two components: a statistical model predicting expected asset returns:
+model = generate_statistical_model(b)     # linear map: features → predicted returns
+# and a maximizer allocating the optimal portfolio given those returns:
+maximizer = generate_maximizer(b)         # Markowitz QP solver (Ipopt via JuMP)
+
+# A randomly initialized policy predicts arbitrary returns, leading to a suboptimal allocation:
+θ_pred = model(sample.x)
+plot_solution(b, DataSample(; sample.context..., x=sample.x, θ=θ_pred, y=maximizer(θ_pred)))
+
+# Optimality gap on the dataset (0 = optimal, higher is worse):
+compute_gap(b, dataset, model, maximizer)
+
+# ---
+# ## Problem Description
+#
+# A **Markowitz portfolio optimization** problem where asset expected returns are unknown.
+# Given contextual features ``x \in \mathbb{R}^p``, the learner predicts returns
+# ``\hat{\theta} \in \mathbb{R}^d`` and solves:
+#
+# ```math
+# \begin{aligned}
+# \max_{y} \quad & \hat{\theta}^\top y \\
+# \text{s.t.} \quad & y^\top \Sigma y \leq \gamma \\
+# & \mathbf{1}^\top y \leq 1 \\
+# & y \geq 0
+# \end{aligned}
+# ```
+#
+# where ``\Sigma`` is the asset covariance matrix and ``\gamma`` is the risk budget.
+# The solver uses [Ipopt.jl](https://github.com/jump-dev/Ipopt.jl) via JuMP.
+#
+# ## Key Parameters
+#
+# | Parameter | Description | Default |
+# |-----------|-------------|---------|
+# | `d` | Number of assets | 50 |
+# | `p` | Feature dimension | 5 |
+# | `deg` | Polynomial degree for data generation | 1 |
+# | `ν` | Noise hyperparameter | 1.0 |
+#
+# Data is generated following the process in
+# [Mandi et al., 2023](https://arxiv.org/abs/2307.13565).
+#
+# ## DFL Policy
+#
+# ```math
+# \xrightarrow[\text{Features}]{x \in \mathbb{R}^p}
+# \fbox{Linear model}
+# \xrightarrow[\text{Predicted returns}]{\hat{\theta} \in \mathbb{R}^d}
+# \fbox{QP solver (Ipopt)}
+# \xrightarrow[\text{Portfolio}]{y \in \mathbb{R}^d}
+# ```
+#
+# **Model:** `Dense(p → d)` — predicts one expected return per asset.
+#
+# **Maximizer:** Ipopt QP solver enforcing the variance and budget constraints.
+#
+# !!! note "Reference"
+#     Mandi et al. (2023), Decision-Focused Learning: Foundations, State of the Art, Benchmark and Future Opportunities.
+#     [arXiv:2307.13565](https://arxiv.org/abs/2307.13565)
diff --git a/docs/src/benchmarks/static/ranking.jl b/docs/src/benchmarks/static/ranking.jl
new file mode 100644
index 0000000..330785e
--- /dev/null
+++ b/docs/src/benchmarks/static/ranking.jl
@@ -0,0 +1,73 @@
+# # Ranking
+# Rank a set of items by predicted cost: the model must learn to sort items by their
+# hidden scores from observable features alone.
+
+using DecisionFocusedLearningBenchmarks
+using Plots
+
+b = RankingBenchmark()
+
+# ## A training sample
+#
+# Each sample is a labeled triple `(x, θ, y)`:
+# - `x`: feature matrix (rows = features, columns = items; observable at train and test time)
+# - `θ`: true item costs (training supervision only, hidden at test time)
+# - `y`: ordinal ranks derived from `θ` (`y[i] = 1` means item `i` has the highest cost)
+#
+# True costs θ (hidden at test time — the model observes only the feature matrix `x`):
+dataset = generate_dataset(b, 50; seed=0)
+sample = first(dataset)
+plot_instance(b, sample)
+
+# The same costs, colored by rank (dark blue = best, light = worst):
+plot_solution(b, sample)
+
+# ## Untrained policy
+
+# A DFL policy chains two components: a statistical model predicting item scores:
+model = generate_statistical_model(b)     # linear map: features → predicted costs
+# and a maximizer ranking items by those scores:
+maximizer = generate_maximizer(b)         # ordinal ranking via sortperm
+
+# A randomly initialized policy produces an arbitrary ranking:
+θ_pred = model(sample.x)
+plot_solution(b, DataSample(; sample.context..., x=sample.x, θ=θ_pred, y=maximizer(θ_pred)))
+
+# Optimality gap on the dataset (0 = optimal, higher is worse):
+compute_gap(b, dataset, model, maximizer)
+
+# ---
+# ## Problem Description
+#
+# In the **Ranking benchmark**, a feature matrix ``x \in \mathbb{R}^{p \times n}`` is
+# observed. A hidden linear encoder maps ``x`` to a cost vector
+# ``\theta \in \mathbb{R}^n``. The task is to compute the ordinal ranking of the items
+# by cost:
+# ```math
+# y_i = \mathrm{rank}(\theta_i \mid \theta_1, \ldots, \theta_n)
+# ```
+# where ``y_i = 1`` means item ``i`` has the highest cost.
+#
+# ## Key Parameters
+#
+# | Parameter | Description | Default |
+# |-----------|-------------|---------|
+# | `instance_dim` | Number of items to rank | 10 |
+# | `nb_features` | Feature dimension `p` | 5 |
+#
+# ## DFL Policy
+#
+# ```math
+# \xrightarrow[\text{Features}]{x}
+# \fbox{Linear model}
+# \xrightarrow{\hat{\theta}}
+# \fbox{ranking}
+# \xrightarrow{y}
+# ```
+#
+# **Model:** `Chain(Dense(nb_features → 1; bias=false), vec)` — predicts one score per item.
+#
+# **Maximizer:** `ranking(θ)` — returns a vector of ordinal ranks via `invperm(sortperm(θ))`.
+#
+# !!! note "Reference"
+#     TODO: add original reference.
diff --git a/docs/src/benchmarks/static/subset_selection.jl b/docs/src/benchmarks/static/subset_selection.jl
new file mode 100644
index 0000000..4edeba8
--- /dev/null
+++ b/docs/src/benchmarks/static/subset_selection.jl
@@ -0,0 +1,77 @@
+# # Subset Selection
+# Select the `k` most valuable items from a set of `n`: items with unknown values
+# must be identified from observable features alone.
+
+using DecisionFocusedLearningBenchmarks
+using Plots
+
+b = SubsetSelectionBenchmark()
+
+# ## A training sample
+#
+# Each sample is a labeled triple `(x, θ, y)`:
+# - `x`: item feature vector (observable at train and test time)
+# - `θ`: true item values (equal to `x` by default; otherwise derived via a hidden encoder)
+# - `y`: selection indicator (`y[i] = 1` for the `k` highest-value items, 0 otherwise)
+#
+# True item values θ (hidden at test time — the model observes only the feature vector `x`):
+dataset = generate_dataset(b, 50; seed=0)
+sample = first(dataset)
+plot_instance(b, sample)
+
+# The same values, with the `k` selected items highlighted in green:
+plot_solution(b, sample)
+
+# ## Untrained policy
+
+# A DFL policy chains two components: a statistical model predicting item scores:
+model = generate_statistical_model(b)     # linear map: features → predicted item scores
+# and a maximizer selecting the top-k items by those scores:
+maximizer = generate_maximizer(b)         # top-k selection
+
+# A randomly initialized policy selects items with no relation to their true values:
+θ_pred = model(sample.x)
+plot_solution(b, DataSample(; sample.context..., x=sample.x, θ=θ_pred, y=maximizer(θ_pred)))
+
+# Optimality gap on the dataset (0 = optimal, higher is worse):
+compute_gap(b, dataset, model, maximizer)
+
+# ---
+# ## Problem Description
+#
+# In the **Subset Selection benchmark**, ``n`` items have unknown values ``\theta_i``.
+# A feature vector ``x \in \mathbb{R}^n`` is observed (identity mapping by default).
+# The task is to select the ``k`` items with the highest values:
+# ```math
+# y = \mathrm{top}_k(\theta)
+# ```
+# where ``y \in \{0,1\}^n`` with exactly ``k`` ones.
+#
+# ## Key Parameters
+#
+# | Parameter | Description | Default |
+# |-----------|-------------|---------|
+# | `n` | Total number of items | 25 |
+# | `k` | Number of items to select | 5 |
+# | `identity_mapping` | Use identity as the hidden mapping | `true` |
+#
+# When `identity_mapping=true`, features equal item values directly (`x = θ`).
+# When `false`, a random linear layer is used as the hidden mapping.
+#
+# ## DFL Policy
+#
+# ```math
+# \xrightarrow[\text{Features}]{x}
+# \fbox{Linear model}
+# \xrightarrow{\hat{\theta}}
+# \fbox{top-k}
+# \xrightarrow{y}
+# ```
+#
+# **Model:** `Dense(n → n; bias=false)` — predicts a score per item.
+#
+# **Maximizer:** `top_k(θ, k)` — returns a boolean vector with `true` at the `k`
+# highest-scoring positions.
+#
+# !!! note "Reference"
+#     TODO: add original reference.
diff --git a/docs/src/benchmarks/static/warcraft.jl b/docs/src/benchmarks/static/warcraft.jl
new file mode 100644
index 0000000..dc98354
--- /dev/null
+++ b/docs/src/benchmarks/static/warcraft.jl
@@ -0,0 +1,89 @@
+# # Warcraft
+# Find the cheapest path on a 12×12 terrain map: cell travel costs are unknown and must
+# be inferred from the RGB terrain image using a neural network.
+
+using DecisionFocusedLearningBenchmarks
+using Plots
+
+b = WarcraftBenchmark()
+
+# ## Observable input
+#
+# At inference time the decision-maker observes only the terrain image `x` (not the costs `θ`):
+sample = generate_dataset(b, 1)[1]
+plot_instance(b, sample)
+
+# ## A training sample
+#
+# Each sample is a labeled triple `(x, θ, y)`:
+# - `x`: terrain image (12×12×3 RGB array; observable at train and test time)
+# - `θ`: true cell travel costs (training supervision only, hidden at test time)
+# - `y`: optimal path indicator (`y[i,j] = 1` if cell `(i,j)` is on the path)
+#
+# Left: terrain image. Middle: true costs θ. Right: optimal path y:
+plot_solution(b, sample)
+
+# ## Untrained policy
+
+# A DFL policy chains two components: a CNN predicting cell travel costs from the terrain image:
+model = generate_statistical_model(b)     # ResNet18 CNN: terrain image → 12×12 cost map
+# and a maximizer finding the shortest path given those costs:
+maximizer = generate_maximizer(b)         # Dijkstra shortest path on the 12×12 grid
+
+# An untrained CNN produces a near-uniform cost map, yielding a near-straight path:
+θ_pred = model(sample.x)
+plot_solution(b, DataSample(; sample.context..., x=sample.x, θ=θ_pred, y=maximizer(θ_pred)))
+
+# Optimality gap on this sample (0 = optimal, higher is worse):
+compute_gap(b, [sample], model, maximizer)
+
+# ---
+# ## Problem Description
+#
+# In the **Warcraft benchmark**, each instance is a 12×12 grid representing a Warcraft
+# terrain map. Each cell has an unknown travel cost depending on its terrain type (forest,
+# mountain, water, etc.). The task is to find the path from the top-left to the
+# bottom-right corner that minimizes total travel cost.
+#
+# Formally, let ``\theta_{ij}`` be the (unknown) cost of cell ``(i,j)`` and
+# ``y_{ij} \in \{0,1\}`` indicate whether cell ``(i,j)`` is on the path. The objective is:
+# ```math
+# y^* = \mathrm{argmin}_{y \in \mathcal{P}} \sum_{(i,j)} \theta_{ij} \, y_{ij}
+# ```
+# where ``\mathcal{P}`` is the set of valid grid paths (4-connected, source to sink).
+#
+# The dataset contains 10 000 labeled terrain images from the Warcraft II tileset.
+# It is downloaded automatically on first use via
+# [DataDeps.jl](https://github.com/oxinabox/DataDeps.jl).
+#
+# ## Key Components
+#
+# **[`WarcraftBenchmark`](@ref)** has no parameters.
+#
+# | Method | Description |
+# |--------|-------------|
+# | `generate_dataset(b, n)` | Downloads and loads `n` terrain images with true costs and paths |
+# | `generate_statistical_model(b)` | ResNet18 CNN (first 5 layers + adaptive maxpool + neg) |
+# | `generate_maximizer(b; dijkstra=true)` | Dijkstra or Bellman-Ford shortest path |
+#
+# ## DFL Policy
+#
+# ```math
+# \xrightarrow[\text{Terrain image}]{x \in \mathbb{R}^{12 \times 12 \times 3}}
+# \fbox{ResNet18 CNN}
+# \xrightarrow[\text{Cell costs}]{\hat{\theta} \in \mathbb{R}^{12 \times 12}}
+# \fbox{Dijkstra}
+# \xrightarrow[\text{Path}]{y \in \{0,1\}^{12 \times 12}}
+# ```
+#
+# The CNN maps terrain pixel values to predicted cell costs, which are then passed to a
+# shortest-path solver. Training end-to-end with
+# [InferOpt.jl](https://github.com/JuliaDecisionFocusedLearning/InferOpt.jl) teaches
+# the network to produce costs that lead to good paths, not just accurate cost estimates.
+#
+# !!! tip
+#     See the [Warcraft tutorial](../../warcraft_tutorial.md) for a complete end-to-end training
+#     example using `PerturbedMultiplicative` and `FenchelYoungLoss`.
+#
+# !!! note "Reference"
+#     Vlastelica et al. (2020), Differentiation of Blackbox Combinatorial Solvers, ICLR 2020.
diff --git a/docs/src/benchmarks/stochastic/vsp.jl b/docs/src/benchmarks/stochastic/vsp.jl
new file mode 100644
index 0000000..e4bbac5
--- /dev/null
+++ b/docs/src/benchmarks/stochastic/vsp.jl
@@ -0,0 +1,116 @@
+# # Stochastic Vehicle Scheduling
+# Assign vehicles to cover a set of tasks while minimizing costs under stochastic delays:
+# the DFL agent learns to predict adjusted costs that implicitly hedge against uncertainty.
+
+using DecisionFocusedLearningBenchmarks
+using Plots
+
+b = StochasticVehicleSchedulingBenchmark()
+
+# ## A sample instance
+#
+# Each instance is a city with task locations and scheduled times.
+# `store_city=true` is required to visualize the map (not needed for training):
+sample = generate_dataset(b, 1; store_city=true)[1]
+plot_instance(b, sample)
+
+# ## Untrained policy
+#
+# Each edge `(u, v)` has a 20-dimensional feature vector encoding schedule slack, travel
+# times, and timing — this is what the model receives as `x` per edge:
+# A DFL policy chains two components: a statistical model predicting adjusted edge costs:
+model = generate_statistical_model(b)     # linear map: task features → adjusted edge costs
+# and a maximizer solving the deterministic VSP given those costs:
+maximizer = generate_maximizer(b)         # deterministic VSP solver (HiGHS MIP)
+
+# The untrained model predicts random edge costs; the resulting schedule is arbitrary.
+# Run the solver on predicted costs to see a route visualization:
+θ_pred = model(sample.x)
+y_pred = maximizer(θ_pred; sample.context...)
+plot_solution(b, DataSample(; sample.context..., x=sample.x, θ=θ_pred, y=y_pred, extra=sample.extra))
+
+# ---
+# ## Problem Description
+#
+# ### Overview
+#
+# In the **Vehicle Scheduling Problem (VSP)**, we consider a set of tasks ``V``. Each
+# task ``v \in V`` has a scheduled beginning time ``t_v^b`` and end time ``t_v^e``, with
+# ``t_v^e > t_v^b``. We denote ``t^{tr}_{(u,v)}`` the travel time from task ``u`` to task
+# ``v``. A task ``v`` can follow ``u`` only if:
+# ```math
+# t_v^b \geq t_u^e + t^{tr}_{(u,v)}
+# ```
+#
+# An instance of VSP can be modeled as an acyclic directed graph where nodes are tasks
+# and edges represent feasible successions. A solution is a set of disjoint paths such
+# that all tasks are fulfilled exactly once to minimize total costs.
+#
+# In the **Stochastic VSP (StoVSP)**, after the scheduling decision is set, random delays
+# propagate along vehicle tours. The objective becomes minimizing base costs plus expected
+# total delay costs over scenarios.
+#
+# ### Mathematical Formulation
+#
+# **Variables:** Let ``y_{u,v} \in \{0,1\}`` indicate if a vehicle performs task ``v``
+# immediately after task ``u``.
+#
+# **Delay Propagation:** For each task ``v`` in scenario ``s``:
+# - ``\gamma_v^s``: intrinsic delay of task ``v``
+# - ``d_v^s``: total accumulated delay
+# - ``\delta_{u,v}^s = t_v^b - (t_u^e + t^{tr}_{(u,v)})``: slack time
+#
+# ```math
+# d_v^s = \gamma_v^s + \max(d_u^s - \delta_{u,v}^s,\; 0)
+# ```
+#
+# **Objective:**
+# ```math
+# \min_{y} \; \sum_{(u,v)} c_{u,v} \, y_{u,v} + \mathbb{E}_{s \in S}\!\left[\sum_v C_d \, d_v^s\right]
+# ```
+#
+# ## Key Components
+#
+# ### [`StochasticVehicleSchedulingBenchmark`](@ref)
+#
+# | Parameter | Description | Default |
+# |-----------|-------------|---------|
+# | `nb_tasks` | Number of tasks per instance | 25 |
+# | `nb_scenarios` | Number of scenarios for objective evaluation | 10 |
+#
+# ### Instance Generation
+#
+# Each instance simulates a geographic city with depots and task locations. Tasks have
+# realistic scheduled start/end times. Scenarios are random intrinsic delays ``\gamma``
+# drawn from a Log-Normal distribution. Feature vectors are 20-dimensional.
+#
+# ## Baseline Policies
+#
+# | Policy | Description |
+# |--------|-------------|
+# | `svs_deterministic_policy` | Solves the deterministic VSP, ignoring delays |
+# | `svs_saa_policy` | SAA via column generation over ``K`` scenarios |
+# | `svs_saa_mip_policy` | Exact SAA via compact MIP formulation |
+# | `svs_local_search_policy` | Heuristic local search over sampled scenarios |
+#
+# ## DFL Policy
+#
+# ```math
+# \xrightarrow[\text{Features}]{x \in \mathbb{R}^{20}}
+# \fbox{Linear model $\varphi_w$}
+# \xrightarrow[\text{Predicted cost}]{\hat{c}}
+# \fbox{Deterministic VSP solver}
+# \xrightarrow[\text{Routes}]{y}
+# ```
+#
+# By training end-to-end with the deterministic solver, the linear model learns adjusted
+# costs ``\hat{c}`` that implicitly account for expected stochastic delays, while keeping
+# the fast deterministic solver at inference time.
+#
+# **Model:** `Chain(Dense(20 → 1; bias=false), vec)` — predicts one adjusted cost per edge.
+#
+# **Maximizer:** `StochasticVehicleSchedulingMaximizer` — HiGHS MIP solver on the
+# deterministic VSP instance.
+#
+# !!! note "Reference"
+#     TODO: add original reference.
diff --git a/docs/src/benchmarks/subset_selection.md b/docs/src/benchmarks/subset_selection.md
deleted file mode 100644
index 918e424..0000000
--- a/docs/src/benchmarks/subset_selection.md
+++ /dev/null
@@ -1,13 +0,0 @@
-# Subset Selection
-
-[`SubsetSelectionBenchmark`](@ref) is the most trivial benchmark problem in this package.
-It is minimalistic and serves as a simple example for debugging and testing purposes.
-
-## Description
-We have a set of ``n`` items, each item having an unknown value.
-We want to select a subset of ``k`` items that maximizes the sum of the values of the selected items.
-
-As input, instead of the items costs, we are given a feature vector, such that an unknown linear mapping between the feature vector and the value of the items exists.
-
-By default, this linear mapping is the identity mapping, i.e., the value of each item is equal to the value of the corresponding feature vector element.
-However, this mapping can be changed by setting the `identity_mapping` parameter to `false`.
diff --git a/docs/src/benchmarks/toy/argmax.jl b/docs/src/benchmarks/toy/argmax.jl
new file mode 100644
index 0000000..aaed6cd
--- /dev/null
+++ b/docs/src/benchmarks/toy/argmax.jl
@@ -0,0 +1,84 @@
+# # Argmax
+# Select the single best item from a set of `n` items. Item scores are **hidden**, 
+# only a feature matrix `x` correlated with these scores is observable.
+# This problem can also be seen as a multiclass classification problem where
+# we use an argmax layer instead of a softmax. This is not very useful in practice, it's more a
+# minimalist toy problem to showcase DFL concepts in the simplest possible setting.
+
+using DecisionFocusedLearningBenchmarks
+using Plots
+using Statistics
+
+b = ArgmaxBenchmark(; instance_dim=10, nb_features=5, seed=0)
+
+# ## Observable input
+#
+# At inference time the decision-maker observes only a feature matrix `x`
+# (rows = features, columns = items):
+dataset = generate_dataset(b, 100; seed=0)
+sample = first(dataset)
+plot_instance(b, sample)
+
+# ## A training sample
+#
+# Each sample is a labeled triple `(x, θ, y)`:
+# - `x`: feature matrix (observable at train and test time)
+# - `θ`: true item scores (training supervision only, hidden at test time)
+# - `y`: optimal one-hot decision derived from `θ`
+#
+# The full training triple (features, true scores, and optimal decision):
+plot_solution(b, sample)
+
+# ## Untrained policy
+
+# A DFL policy chains two components: a statistical model predicting scores from features:
+model = generate_statistical_model(b)     # linear map: features → predicted scores
+# and a maximizer turning those scores into a decision:
+maximizer = generate_maximizer(b)         # one-hot argmax
+
+# A randomly initialized policy makes essentially random decisions:
+θ_pred = model(sample.x)
+y_pred = maximizer(θ_pred)
+#
+plot_solution(b, DataSample(; x=sample.x, θ=θ_pred, y=y_pred, sample.context...))
+
+# The goal of training is to find parameters that maximize accuracy.
+# Current accuracy on the dataset:
+mean(maximizer(model(s.x)) == s.y for s in dataset)
+
+# ---
+# ## Problem Description
+#
+# In the **Argmax benchmark**, a feature matrix ``x \in \mathbb{R}^{p \times n}`` is
+# observed. A hidden linear encoder maps ``x`` to a score vector
+# ``\theta = \text{encoder}(x) \in \mathbb{R}^n``. The task is to select the item with
+# the highest score:
+# ```math
+# y = \mathrm{argmax}(\theta)
+# ```
+# The solution ``y`` is encoded as a one-hot vector.
+# The score vector ``\theta`` is never observed (only features ``x`` are available).
+# The DFL pipeline trains a model ``f_w`` so that ``\mathrm{argmax}(f_w(x))`` matches
+# ``\mathrm{argmax}(\theta)`` at decision time.
+#
+# ## Key Parameters
+#
+# | Parameter | Description | Default |
+# |-----------|-------------|---------|
+# | `instance_dim` | Number of items | 10 |
+# | `nb_features` | Feature dimension `p` | 5 |
+#
+# ## DFL Policy
+#
+# ```math
+# \xrightarrow[\text{Features}]{x \in \mathbb{R}^{p \times n}}
+# \fbox{Linear model $f_w$}
+# \xrightarrow[\text{Predicted scores}]{\hat{\theta} \in \mathbb{R}^n}
+# \fbox{argmax}
+# \xrightarrow[\text{Selection}]{y \in \{0,1\}^n}
+# ```
+#
+# **Model:** `Chain(Dense(nb_features → 1; bias=false), vec)`: a single linear layer
+# predicting one score per item.
+#
+# **Maximizer:** `one_hot_argmax`: returns a one-hot vector at the argmax index.
diff --git a/docs/src/benchmarks/toy/argmax2d.jl b/docs/src/benchmarks/toy/argmax2d.jl
new file mode 100644
index 0000000..16cbcf0
--- /dev/null
+++ b/docs/src/benchmarks/toy/argmax2d.jl
@@ -0,0 +1,74 @@
+# # Argmax on a 2D polytope
+# Select the best vertex of a random 2D polytope: predict a 2D cost vector from features,
+# then return the vertex maximizing the dot product with it.
+
+using DecisionFocusedLearningBenchmarks
+using Plots
+
+b = Argmax2DBenchmark(; seed=0)
+
+# ## Observable input
+#
+# At inference time the decision-maker observes the feature vector `x` and the polytope shape,
+# but not the cost direction hidden `θ`:
+dataset = generate_dataset(b, 50; seed=0)
+sample = first(dataset)
+plot_instance(b, sample)
+
+# ## A training sample
+#
+# Each sample is a labeled triple `(x, θ, y)`:
+# - `x`: feature vector (observable at train and test time)
+# - `θ`: 2D cost direction (training supervision only, hidden at test time)
+# - `y`: polytope vertex maximizing `θᵀv` (optimal decision)
+# - `instance` (in `context`): polytope vertices (observable problem structure)
+#
+# The full training triple (polytope, cost direction θ, optimal vertex y):
+plot_solution(b, sample)
+
+# ## Untrained policy
+
+# A DFL policy chains two components: a statistical model predicting a 2D cost direction:
+model = generate_statistical_model(b)     # linear map: features → 2D cost vector
+# and a maximizer selecting the best polytope vertex for that direction:
+maximizer = generate_maximizer(b)         # vertex maximizing θᵀv over polytope vertices
+
+# A randomly initialized policy predicts an arbitrary cost direction:
+θ_pred = model(sample.x)
+plot_solution(b, DataSample(; sample.context..., x=sample.x, θ=θ_pred, y=maximizer(θ_pred; sample.context...)))
+
+# ---
+# ## Problem Description
+#
+# In the **Argmax2D benchmark**, each instance defines a random convex polytope
+# ``\mathcal{Y}(x) = \mathrm{conv}(v_1, \ldots, v_m)`` in ``\mathbb{R}^2``.
+# A hidden encoder maps features ``x \in \mathbb{R}^p`` to a 2D cost vector
+# ``\theta \in \mathbb{R}^2``. The task is to find the polytope vertex maximizing
+# the dot product:
+# ```math
+# y^* = \mathrm{argmax}_{v \in \mathcal{Y}(x)} \; \theta^\top v
+# ```
+#
+# This is a toy 2D combinatorial optimization problem useful for visualizing
+# how well a model learns the cost direction.
+#
+# ## Key Parameters
+#
+# | Parameter | Description | Default |
+# |-----------|-------------|---------|
+# | `nb_features` | Feature dimension `p` | 5 |
+# | `polytope_vertex_range` | Number of polytope vertices (list; one value drawn at random per instance) | `[6]` |
+#
+# ## DFL Policy
+#
+# ```math
+# \xrightarrow[\text{Features}]{x}
+# \fbox{Linear model}
+# \xrightarrow{\hat{\theta} \in \mathbb{R}^2}
+# \fbox{Polytope argmax}
+# \xrightarrow{y}
+# ```
+#
+# **Model:** `Dense(nb_features → 2; bias=false)` — predicts a 2D cost direction.
+#
+# **Maximizer:** finds the vertex of the instance polytope with maximum dot product with θ.
diff --git a/docs/src/benchmarks/toy/contextual_stochastic_argmax.jl b/docs/src/benchmarks/toy/contextual_stochastic_argmax.jl
new file mode 100644
index 0000000..9940b55
--- /dev/null
+++ b/docs/src/benchmarks/toy/contextual_stochastic_argmax.jl
@@ -0,0 +1,102 @@
+# # Contextual Stochastic Argmax
+# Select the best item when utilities are random but correlated with observable context:
+# a linear model must learn the mapping from context to expected utilities.
+
+using DecisionFocusedLearningBenchmarks
+using Plots
+
+b = ContextualStochasticArgmaxBenchmark()
+
+# Stochastic benchmarks need a labeling policy to generate training targets.
+# We use the anticipative oracle: given realized scenario ξ it returns the best item.
+anticipative = generate_anticipative_solver(b)
+policy = (ctx, scenarios) -> [
+    DataSample(; ctx.context..., x=ctx.x, y=anticipative(ξ), extra=(; scenario=ξ))
+    for ξ in scenarios
+]
+dataset = generate_dataset(b, 20; target_policy=policy, seed=0)
+sample = first(dataset)
+
+# ## Observable input
+#
+# At inference time `c_base` and `x_raw` are known (not the realized utility vector ξ).
+# `plot_instance` shows the base utilities `c_base`:
+plot_instance(b, sample)
+
+# ## A training sample
+#
+# Stochastic benchmarks have no single ground-truth label: the optimal item depends on
+# which utility scenario is realized. We label each sample with the anticipative oracle,
+# which returns the best item given the realized scenario ξ.
+#
+# Each labeled sample contains:
+# - `x`: feature vector `[c_base; x_raw]` (observable at train and test time)
+# - `y`: optimal item for the realized scenario ξ (one-hot; anticipative oracle label)
+# - `extra.scenario`: realized utility vector ξ (available only during training)
+#
+# Left: realized scenario ξ. Right: selected item (red):
+plot_solution(b, sample)
+
+# ## Untrained policy
+
+# A DFL policy chains two components: a statistical model predicting expected item utilities:
+model = generate_statistical_model(b)     # linear map: features → predicted expected utilities
+# and a maximizer selecting the item with the highest predicted utility:
+maximizer = generate_maximizer(b)         # one-hot argmax
+
+# A randomly initialized policy selects items with no relation to their expected utilities.
+# Left: predicted utilities θ̂. Right: selected item (red):
+θ_pred = model(sample.x)
+plot_solution(b, DataSample(; sample.context..., x=sample.x, θ=θ_pred, y=maximizer(θ_pred)))
+
+# ---
+# ## Problem Description
+#
+# ### Overview
+#
+# In the **Contextual Stochastic Argmax benchmark**, ``n`` items have random utilities
+# that depend on observable context. Per instance:
+# - ``c_\text{base} \sim U[0,1]^n``: base utilities (stored in `context`)
+# - ``x_\text{raw} \sim \mathcal{N}(0, I_d)``: observable context features
+# - Full features: ``x = [c_\text{base}; x_\text{raw}] \in \mathbb{R}^{n+d}``
+#
+# The realized utility (scenario) is drawn as:
+# ```math
+# \xi = c_\text{base} + W \, x_\text{raw} + \varepsilon, \quad \varepsilon \sim \mathcal{N}(0, \sigma^2 I)
+# ```
+# where ``W \in \mathbb{R}^{n \times d}`` is a fixed unknown perturbation matrix.
+#
+# The task is to select the item with the highest realized utility:
+# ```math
+# y^* = \mathrm{argmax}(\xi)
+# ```
+#
+# A linear model ``\hat{\theta} = [I \mid W] \cdot x`` can exactly recover the optimal
+# solution in expectation.
+#
+# ## Key Parameters
+#
+# | Parameter | Description | Default |
+# |-----------|-------------|---------|
+# | `n` | Number of items | 10 |
+# | `d` | Context feature dimension | 5 |
+# | `noise_std` | Noise standard deviation σ | 0.1 |
+#
+# ## Baseline Policies
+#
+# - **SAA**: selects the item with highest mean utility over available scenarios.
+#
+# ## DFL Policy
+#
+# ```math
+# \xrightarrow[\text{Features}]{x = [c_\text{base}; x_\text{raw}]}
+# \fbox{Linear model}
+# \xrightarrow{\hat{\theta} \in \mathbb{R}^n}
+# \fbox{argmax}
+# \xrightarrow{y}
+# ```
+#
+# **Model:** `Dense(n+d → n; bias=false)` — can in principle recover the exact mapping
+# ``[I \mid W]`` from training data.
+#
+# **Maximizer:** `one_hot_argmax`.
diff --git a/docs/src/benchmarks/vsp.md b/docs/src/benchmarks/vsp.md
deleted file mode 100644
index 89b51a8..0000000
--- a/docs/src/benchmarks/vsp.md
+++ /dev/null
@@ -1,93 +0,0 @@
-# Stochastic Vehicle Scheduling
-
-[`StochasticVehicleSchedulingBenchmark`](@ref).
-
-The Stochastic Vehicle Scheduling Problem (StoVSP) is a stochastic combinatorial optimization benchmark. The problem consists in assigning vehicles to cover a set of scheduled tasks, minimizing base operational costs while accounting for random delays that propagate along vehicle tours.
-
-## Problem Description
-
-### Overview
-
-In the **Vehicle Scheduling Problem (VSP)**, we consider a set of tasks $V$. Each task $v\in V$ has a scheduled beginning time $t_v^b$ and a scheduled end time $t_v^e$, such that $t_v^e > t_v^b$. We denote $t^{tr}_{(u, v)}$ the travel time from task $u$ to task $v$. A task $v$ can be scheduled consecutively after another task $u$ only if we can reach it in time, i.e.,
-```math
-t_v^b \geq t_u^e + t^{tr}_{(u, v)}
-```
-
-An instance of VSP can be modeled with an acyclic directed graph where nodes are tasks and edges represent feasible successions. A solution is a set of disjoint paths such that all tasks are fulfilled exactly once to minimize total costs. The constraints matrix of this deterministic version is totally unimodular, so integrity constraints can be relaxed and the problem easily solved using a standard linear programming solver.
-
-In the **Stochastic Vehicle Scheduling Problem (StoVSP)**, we consider the same setting but after the scheduling decision is set, we observe random delays which propagate along the tours of the vehicles. The objective becomes minimizing the sum of the vehicles' base operational costs and the expected total delay costs over a finite set of scenarios $s \in S$.
-
-### Mathematical Formulation
-
-The deterministic problem can be formulated as a minimum-cost network flow problem. The stochastic version introduces scenarios that add complexities to the objective function.
-
-**Variables:**
-Let $y_{u,v} \in \{0, 1\}$ be the binary decision variable indicating if a vehicle performs task $v$ immediately after task $u$. Formally, this defines the edges of the selected disjoint paths.
-
-**Delay Propagation:**
-
-For each task $v$, we denote:
-- $\gamma_v^s \in \mathbb{R}_+$: The intrinsic delay of task $v$ in scenario $s$.
-- $d_v^s \in \mathbb{R}_+$: The total delay accumulated by task $v$ in scenario $s$.
-- $\delta_{u, v}^s = t_v^b - (t_u^e + t^{tr}_{(u, v)})$: The slack time between tasks $u$ and $v$.
-
-These quantities follow the *delay propagation equation*. When $u$ and $v$ are consecutively operated by the same vehicle ($y_{u,v} = 1$), the total delay transfers with the following dynamic:
-```math
-d_v^s = \gamma_v^s + \max(d_u^s - \delta_{u, v}^s, 0)
-```
-
-This leads to a much more difficult problem to solve since the recursive max-function breaks the total unimodularity. This makes it an excellent benchmark for Decision-Focused Learning, where predicting robust base costs that account for expectation of future delays yields superior scheduling decisions.
-
-**Objective**: Find a scheduling policy (defined by $y$) that minimizes the total cost:
-```math
-\min_{y} \quad \sum_{(u,v)} c_{u,v} y_{u,v} + \mathbb{E}_{s \in S}\left[ \sum_v C_d d_v^s \right]
-```
-where $c_{u,v}$ are the deterministic transition costs and $C_d$ is the unit penalty for delays.
-
-## Key Components
-
-### [`StochasticVehicleSchedulingBenchmark`](@ref)
-
-The main benchmark configuration with the following parameters:
-
-- `nb_tasks`: Number of tasks to schedule in each instance (default: 25)
-- `nb_scenarios`: Number of scenarios to evaluate the expected delay costs (default: 10)
-
-### Instance Generation
-
-Each problem instance is generated by simulating a geographic city landscape with depots and task locations:
-- **Tasks**: Generated with realistic scheduled start and end times respecting spatial bounds.
-- **Scenarios**: Random intrinsic delays $\gamma$ drawn from probability distributions (e.g. Log-Normal).
-- **Features**: A 20-dimensional feature vector ($d=20$) describing the tasks and network properties (spatial coordinates, start times, route density, etc.).
-
-## Benchmark Policies
-
-The benchmark provides the following baseline policies:
-
-### Deterministic Policy
-[`svs_deterministic_policy`](@ref) solves the deterministic version of the VSP using a Mixed Integer Programming (MIP) solver. It completely ignores scenario delays and slack capacities.
-
-### Sample Average Approximation (SAA)
-This approach builds a stochastic instance using a finite set of $K$ available scenarios and minimizes the empirical expected cost. Two formulations are provided:
-- **SAA (col gen)** ([`svs_saa_policy`](@ref)): Solves the stochastic MIP using a column generation algorithm.
-- **SAA (exact MIP)** ([`svs_saa_mip_policy`](@ref)): Solves the exact stochastic MIP via a compact linearized formulation.
-
-### Local Search Policy
-[`svs_local_search_policy`](@ref) begins with a heuristic initialization (usually deterministic) and iteratively explores neighboring schedules, accepting moves that improve the expected cost over the sampled scenarios.
-
-## Decision-Focused Learning Policy
-
-```math
-\xrightarrow[\text{Features}]{x_t \in \mathbb{R}^{20}}
-\fbox{Neural network $\varphi_w$}
-\xrightarrow[\text{Predicted Cost}]{\hat{c}}
-\fbox{Deterministic VSP Solver}
-\xrightarrow[\text{Paths}]{y_t}
-```
-
-**Components**:
-
-1. **Neural Network** ``\varphi_w``: A linear model (mapping 20-dimensional features to 1 scalar) predicting an adjusted edge cost ``\hat{c}_{u,v}`` for each possible assignment.
-2. **Optimization Layer (Maximizer)**: A deterministic mathematical programming solver `StochasticVechicleSchedulingMaximizer` that takes the predicted costs $\hat{c}$ and solves the easily tractable deterministic VSP to map back to a routing decision $y_t$.
-
-By training the neural network end-to-end with the combinatorial solver, the Decision-Focused Learning agent learns to produce adjusted costs $\hat{c}$ that serve as proxies, implicitly hedging against the actual stochastic delays while retaining the rapid evaluation of the deterministic solver.
diff --git a/docs/src/benchmarks/warcraft.md b/docs/src/benchmarks/warcraft.md
deleted file mode 100644
index c78850e..0000000
--- a/docs/src/benchmarks/warcraft.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# Warcraft
-
-See the tutorial for a full demo of [`WarcraftBenchmark`](@ref).
diff --git a/ext/DFLBenchmarksPlotsExt.jl b/ext/DFLBenchmarksPlotsExt.jl
index 0a5caae..c7e47b9 100644
--- a/ext/DFLBenchmarksPlotsExt.jl
+++ b/ext/DFLBenchmarksPlotsExt.jl
@@ -7,10 +7,18 @@ using Plots
 import DecisionFocusedLearningBenchmarks:
     has_visualization, plot_instance, plot_solution, plot_trajectory, animate_trajectory
 
+include("plots/argmax_plots.jl")
 include("plots/argmax2d_plots.jl")
+include("plots/ranking_plots.jl")
+include("plots/subset_selection_plots.jl")
+include("plots/portfolio_plots.jl")
+include("plots/shortest_path_plots.jl")
+include("plots/contextual_stochastic_argmax_plots.jl")
 include("plots/warcraft_plots.jl")
 include("plots/svs_plots.jl")
 include("plots/dvs_plots.jl")
+include("plots/dynamic_assortment_plots.jl")
+include("plots/maintenance_plots.jl")
 
 """
     plot_solution(bench::AbstractBenchmark, sample::DataSample, y; kwargs...)
diff --git a/ext/plots/argmax_plots.jl b/ext/plots/argmax_plots.jl
new file mode 100644
index 0000000..a93516e
--- /dev/null
+++ b/ext/plots/argmax_plots.jl
@@ -0,0 +1,61 @@
+has_visualization(::ArgmaxBenchmark) = true
+
+"""
+$TYPEDSIGNATURES
+
+Plot the input features as a heatmap. Columns correspond to items, rows correspond to features.
+"""
+function plot_instance(::ArgmaxBenchmark, sample::DataSample; kwargs...)
+    x = sample.x  # nb_features × n
+    n = size(x, 2)
+    return Plots.heatmap(
+        x;
+        xlabel="Item",
+        ylabel="Feature",
+        title="Features x (observable input)",
+        xticks=1:n,
+        kwargs...,
+    )
+end
+
+"""
+$TYPEDSIGNATURES
+
+Plot the features `x`, scores `θ`, and decision `y` in `sample` as heatmaps.
+All three share the same item axis (columns).
+"""
+function plot_solution(::ArgmaxBenchmark, sample::DataSample; kwargs...)
+    x = sample.x  # nb_features × n
+    θ = sample.θ  # length n
+    y = sample.y  # one-hot, length n
+    n = length(θ)
+
+    p1 = Plots.heatmap(
+        x;
+        ylabel="Feature",
+        title="x (features, observable)",
+        xticks=(1:n, fill("", n)),
+    )
+    θ_min, θ_max = extrema(θ)
+    p2 = Plots.heatmap(
+        reshape(Float64.(θ), 1, n);
+        ylabel="θ",
+        title="θ: scores [$(round(θ_min; sigdigits=2)), $(round(θ_max; sigdigits=2))]",
+        yticks=false,
+        xticks=(1:n, fill("", n)),
+        colorbar=false,
+    )
+    p3 = Plots.heatmap(
+        reshape(Float64.(y), 1, n);
+        xlabel="Item",
+        ylabel="y",
+        title="y (decision, one-hot)",
+        yticks=false,
+        xticks=1:n,
+        color=:Greens,
+        colorbar=false,
+    )
+
+    l = Plots.@layout [a{0.65h}; b{0.175h}; c{0.175h}]
+    return Plots.plot(p1, p2, p3; layout=l, size=(600, 420), kwargs...)
+end
diff --git a/ext/plots/contextual_stochastic_argmax_plots.jl b/ext/plots/contextual_stochastic_argmax_plots.jl
new file mode 100644
index 0000000..601f941
--- /dev/null
+++ b/ext/plots/contextual_stochastic_argmax_plots.jl
@@ -0,0 +1,56 @@
+has_visualization(::ContextualStochasticArgmaxBenchmark) = true
+
+function plot_instance(::ContextualStochasticArgmaxBenchmark, sample::DataSample; kwargs...)
+    c_base = sample.c_base  # base utilities from context
+    n = length(c_base)
+    return Plots.bar(
+        1:n,
+        c_base;
+        legend=false,
+        xlabel="Item",
+        ylabel="Base utility",
+        title="Instance (base utilities c_base)",
+        color=:steelblue,
+        kwargs...,
+    )
+end
+
+function plot_solution(::ContextualStochasticArgmaxBenchmark, sample::DataSample; kwargs...)
+    y = sample.y  # one-hot vector
+    n = length(y)
+
+    # Pick the best available utility vector to display
+    if hasproperty(sample.extra, :scenario)
+        u = sample.extra.scenario
+        u_title = "Realized scenario ξ"
+    elseif hasproperty(sample, :θ) && !isnothing(sample.θ)
+        u = sample.θ
+        u_title = "Predicted utilities θ̂"
+    else
+        u = sample.c_base
+        u_title = "Base utilities c_base"
+    end
+
+    p1 = Plots.bar(
+        1:n,
+        u;
+        legend=false,
+        xlabel="Item",
+        ylabel="Utility",
+        title=u_title,
+        color=:steelblue,
+    )
+
+    colors = [y[i] > 0 ? :firebrick : :steelblue for i in 1:n]
+    p2 = Plots.bar(
+        1:n,
+        u;
+        color=colors,
+        legend=false,
+        xlabel="Item",
+        ylabel="Utility",
+        title="Selected item (red)",
+    )
+
+    return Plots.plot(p1, p2; layout=(1, 2), size=(800, 300), kwargs...)
+end
diff --git a/ext/plots/dynamic_assortment_plots.jl b/ext/plots/dynamic_assortment_plots.jl
new file mode 100644
index 0000000..dff56ec
--- /dev/null
+++ b/ext/plots/dynamic_assortment_plots.jl
@@ -0,0 +1,50 @@
+has_visualization(::DynamicAssortmentBenchmark) = true
+
+function plot_instance(::DynamicAssortmentBenchmark, sample::DataSample; kwargs...)
+    # sample.instance = (env.features, purchase_history); row 1 of features = prices (×10 to undo normalization)
+    prices = sample.instance[1][1, :] .* 10
+    N = length(prices)
+    return Plots.bar(
+        1:N,
+        prices;
+        legend=false,
+        xlabel="Item",
+        ylabel="Price",
+        title="Instance (item prices) — step $(length(sample.instance[2]) + 1)",
+        color=:steelblue,
+        kwargs...,
+    )
+end
+
+function plot_solution(::DynamicAssortmentBenchmark, sample::DataSample; kwargs...)
+    prices = sample.instance[1][1, :] .* 10
+    y = sample.y  # BitVector, selected items
+    N = length(prices)
+    colors = [y[i] ? :seagreen : :lightgray for i in 1:N]
+    return Plots.bar(
+        1:N,
+        prices;
+        legend=false,
+        xlabel="Item",
+        ylabel="Price",
+        title="Assortment (green = offered) — step $(length(sample.instance[2]) + 1)",
+        color=colors,
+        kwargs...,
+    )
+end
+
+function plot_trajectory(
+    bench::DynamicAssortmentBenchmark,
+    trajectory::Vector{<:DataSample};
+    max_steps=6,
+    cols=3,
+    kwargs...,
+)
+    n = min(length(trajectory), max_steps)
+    rows = ceil(Int, n / cols)
+    steps = round.(Int, range(1, length(trajectory); length=n))
+    plots = [plot_solution(bench, trajectory[t]) for t in steps]
+    return Plots.plot(
+        plots...; layout=(rows, cols), size=(cols * 300, rows * 250), kwargs...
+    )
+end
diff --git a/ext/plots/maintenance_plots.jl b/ext/plots/maintenance_plots.jl
new file mode 100644
index 0000000..3a95e82
--- /dev/null
+++ b/ext/plots/maintenance_plots.jl
@@ -0,0 +1,54 @@
+has_visualization(::MaintenanceBenchmark) = true
+
+function plot_instance(bench::MaintenanceBenchmark, sample::DataSample; kwargs...)
+    # sample.instance = degradation_state (Vector{Int}, values 1..n)
+    state = sample.instance
+    N = length(state)
+    n = bench.n
+    return Plots.bar(
+        1:N,
+        state;
+        legend=false,
+        xlabel="Component",
+        ylabel="Degradation level",
+        title="Instance (degradation state)",
+        ylim=(0, n + 0.5),
+        color=:steelblue,
+        kwargs...,
+    )
+end
+
+function plot_solution(bench::MaintenanceBenchmark, sample::DataSample; kwargs...)
+    state = sample.instance
+    y = sample.y  # BitVector, maintained components
+    N = length(state)
+    n = bench.n
+    colors = [y[i] ? :seagreen : (state[i] == n ? :firebrick : :steelblue) for i in 1:N]
+    labels = ["comp $i$(y[i] ? " ✓" : "")" for i in 1:N]
+    return Plots.bar(
+        labels,
+        state;
+        legend=false,
+        ylabel="Degradation level",
+        title="Solution (green = maintained, red = failed)",
+        ylim=(0, n + 0.5),
+        color=colors,
+        kwargs...,
+    )
+end
+
+function plot_trajectory(
+    bench::MaintenanceBenchmark,
+    trajectory::Vector{<:DataSample};
+    max_steps=6,
+    cols=3,
+    kwargs...,
+)
+    n = min(length(trajectory), max_steps)
+    rows = ceil(Int, n / cols)
+    steps = round.(Int, range(1, length(trajectory); length=n))
+    plots = [plot_solution(bench, trajectory[t]) for t in steps]
+    return Plots.plot(
+        plots...; layout=(rows, cols), size=(cols * 300, rows * 250), kwargs...
+    )
+end
diff --git a/ext/plots/portfolio_plots.jl b/ext/plots/portfolio_plots.jl
new file mode 100644
index 0000000..77c3a7e
--- /dev/null
+++ b/ext/plots/portfolio_plots.jl
@@ -0,0 +1,41 @@
+has_visualization(::PortfolioOptimizationBenchmark) = true
+
+function plot_instance(::PortfolioOptimizationBenchmark, sample::DataSample; kwargs...)
+    θ = sample.θ
+    d = length(θ)
+    return Plots.bar(
+        1:d,
+        θ;
+        legend=false,
+        xlabel="Asset",
+        ylabel="Expected return",
+        title="Instance (expected returns θ)",
+        color=:steelblue,
+        kwargs...,
+    )
+end
+
+function plot_solution(::PortfolioOptimizationBenchmark, sample::DataSample; kwargs...)
+    θ = sample.θ
+    y = sample.y
+    d = length(θ)
+    p1 = Plots.bar(
+        1:d,
+        θ;
+        legend=false,
+        xlabel="Asset",
+        ylabel="Expected return",
+        title="Expected returns θ",
+        color=:steelblue,
+    )
+    p2 = Plots.bar(
+        1:d,
+        y;
+        legend=false,
+        xlabel="Asset",
+        ylabel="Portfolio weight",
+        title="Portfolio weights y",
+        color=:seagreen,
+    )
+    return Plots.plot(p1, p2; layout=(1, 2), size=(800, 300), kwargs...)
+end
diff --git a/ext/plots/ranking_plots.jl b/ext/plots/ranking_plots.jl
new file mode 100644
index 0000000..dc50b2d
--- /dev/null
+++ b/ext/plots/ranking_plots.jl
@@ -0,0 +1,35 @@
+has_visualization(::RankingBenchmark) = true
+
+function plot_instance(::RankingBenchmark, sample::DataSample; kwargs...)
+    θ = sample.θ
+    n = length(θ)
+    return Plots.bar(
+        1:n,
+        θ;
+        legend=false,
+        xlabel="Item",
+        ylabel="Cost",
+        title="Instance (costs θ)",
+        color=:steelblue,
+        kwargs...,
+    )
+end
+
+function plot_solution(::RankingBenchmark, sample::DataSample; kwargs...)
+    θ = sample.θ
+    y = sample.y  # y[i] = rank of item i (1 = best)
+    n = length(θ)
+    # Color by rank: rank 1 (best) in dark blue, rank n (worst) in light
+    palette = Plots.cgrad(:Blues, n; rev=true, categorical=true)
+    colors = [palette[y[i]] for i in 1:n]
+    return Plots.bar(
+        1:n,
+        θ;
+        legend=false,
+        xlabel="Item",
+        ylabel="Cost",
+        title="Solution (color = rank, dark = best)",
+        color=colors,
+        kwargs...,
+    )
+end
diff --git a/ext/plots/shortest_path_plots.jl b/ext/plots/shortest_path_plots.jl
new file mode 100644
index 0000000..0d25629
--- /dev/null
+++ b/ext/plots/shortest_path_plots.jl
@@ -0,0 +1,94 @@
+import Graphs: edges, src, dst
+
+has_visualization(::FixedSizeShortestPathBenchmark) = true
+
+"""
+Map edge weights to a (rows × cols) vertex weight matrix by averaging incident edge weights,
+and return a boolean (rows × cols) matrix marking vertices on the path.
+"""
+function _grid_matrices(bench::FixedSizeShortestPathBenchmark, θ, y)
+    rows, cols = bench.grid_size
+    n_v = rows * cols
+    g = bench.graph
+
+    # Vertex weights: mean of absolute weights of incident edges
+    v_weights = zeros(Float64, n_v)
+    v_counts = zeros(Int, n_v)
+    for (i, e) in enumerate(edges(g))
+        v_weights[src(e)] += abs(θ[i])
+        v_counts[src(e)] += 1
+        v_weights[dst(e)] += abs(θ[i])
+        v_counts[dst(e)] += 1
+    end
+    v_weights ./= max.(v_counts, 1)
+
+    # Path vertices
+    on_path = falses(n_v)
+    for (i, e) in enumerate(edges(g))
+        if y[i]
+            on_path[src(e)] = true
+            on_path[dst(e)] = true
+        end
+    end
+
+    # Reshape to (rows, cols): vertex v → row ceil(v/cols), col ((v-1)%cols)+1
+    weight_grid = reshape(v_weights, cols, rows)'
+    path_grid = reshape(on_path, cols, rows)'
+    return weight_grid, path_grid
+end
+
+function plot_instance(bench::FixedSizeShortestPathBenchmark, sample::DataSample; kwargs...)
+    weight_grid, _ = _grid_matrices(bench, sample.θ, falses(length(sample.θ)))
+    return Plots.heatmap(
+        weight_grid;
+        yflip=true,
+        aspect_ratio=:equal,
+        title="Edge weights (per vertex)",
+        colorbar=true,
+        kwargs...,
+    )
+end
+
+function plot_solution(bench::FixedSizeShortestPathBenchmark, sample::DataSample; kwargs...)
+    weight_grid, path_grid = _grid_matrices(bench, sample.θ, sample.y)
+    rows, cols = bench.grid_size
+
+    p1 = Plots.heatmap(
+        weight_grid;
+        yflip=true,
+        aspect_ratio=:equal,
+        title="Edge weights",
+        colorbar=true,
+        framestyle=:none,
+    )
+
+    p2 = Plots.heatmap(
+        weight_grid;
+        yflip=true,
+        aspect_ratio=:equal,
+        title="Shortest path",
+        colorbar=false,
+        framestyle=:none,
+        color=:Blues,
+    )
+    # Highlight path vertices with scatter
+    path_xs = Int[]
+    path_ys = Int[]
+    for r in 1:rows, c in 1:cols
+        if path_grid[r, c]
+            push!(path_xs, c)
+            push!(path_ys, r)
+        end
+    end
+    Plots.scatter!(
+        p2,
+        path_xs,
+        path_ys;
+        color=:white,
+        markersize=6,
+        markerstrokewidth=0,
+        label=false,
+    )
+
+    return Plots.plot(p1, p2; layout=(1, 2), size=(700, 320), kwargs...)
+end
diff --git a/ext/plots/subset_selection_plots.jl b/ext/plots/subset_selection_plots.jl
new file mode 100644
index 0000000..40e778b
--- /dev/null
+++ b/ext/plots/subset_selection_plots.jl
@@ -0,0 +1,33 @@
+has_visualization(::SubsetSelectionBenchmark) = true
+
+function plot_instance(::SubsetSelectionBenchmark, sample::DataSample; kwargs...)
+    θ = sample.θ
+    n = length(θ)
+    return Plots.bar(
+        1:n,
+        θ;
+        legend=false,
+        xlabel="Item",
+        ylabel="Value",
+        title="Instance (values θ)",
+        color=:steelblue,
+        kwargs...,
+    )
+end
+
+function plot_solution(::SubsetSelectionBenchmark, sample::DataSample; kwargs...)
+    θ = sample.θ
+    y = sample.y  # y[i] = true if item i is selected
+    n = length(θ)
+    colors = [y[i] ? :seagreen : :lightgray for i in 1:n]
+    return Plots.bar(
+        1:n,
+        θ;
+        legend=false,
+        xlabel="Item",
+        ylabel="Value",
+        title="Solution (selected items in green)",
+        color=colors,
+        kwargs...,
+    )
+end
diff --git a/src/ContextualStochasticArgmax/ContextualStochasticArgmax.jl b/src/ContextualStochasticArgmax/ContextualStochasticArgmax.jl
index 49d22e2..5a84825 100644
--- a/src/ContextualStochasticArgmax/ContextualStochasticArgmax.jl
+++ b/src/ContextualStochasticArgmax/ContextualStochasticArgmax.jl
@@ -121,6 +121,16 @@ include("policies.jl")
 """
 $TYPEDSIGNATURES
 
+Return the named baseline policies for [`ContextualStochasticArgmaxBenchmark`](@ref).
+Each policy has signature `(ctx_sample, scenarios) -> Vector{DataSample}`.
+"""
+function Utils.generate_baseline_policies(::ContextualStochasticArgmaxBenchmark)
+    return (; saa=Policy("SAA", "argmax of mean scenarios", csa_saa_policy))
+end
+
+"""
+$TYPEDSIGNATURES
+
 Generates the anticipative solver for the benchmark.
 """
 function Utils.generate_anticipative_solver(::ContextualStochasticArgmaxBenchmark)
diff --git a/src/ContextualStochasticArgmax/policies.jl b/src/ContextualStochasticArgmax/policies.jl
index 1dc2d28..2cf7ae5 100644
--- a/src/ContextualStochasticArgmax/policies.jl
+++ b/src/ContextualStochasticArgmax/policies.jl
@@ -17,16 +17,6 @@ function csa_saa_policy(ctx_sample, scenarios)
     ]
 end
 
-"""
-$TYPEDSIGNATURES
-
-Return the named baseline policies for [`ContextualStochasticArgmaxBenchmark`](@ref).
-Each policy has signature `(ctx_sample, scenarios) -> Vector{DataSample}`.
-"""
-function Utils.generate_baseline_policies(::ContextualStochasticArgmaxBenchmark)
-    return (; saa=Policy("SAA", "argmax of mean scenarios", csa_saa_policy))
-end
-
 """
 $TYPEDEF
 

From 837d3fa2e7db67f70d9c958ba654267468b913e0 Mon Sep 17 00:00:00 2001
From: BatyLeo <leo.baty67@gmail.com>
Date: Fri, 3 Apr 2026 16:58:41 +0200
Subject: [PATCH 03/21] formatting

---
 docs/src/benchmarks/stochastic/vsp.jl                   | 4 +++-
 docs/src/benchmarks/toy/argmax2d.jl                     | 7 ++++++-
 docs/src/benchmarks/toy/contextual_stochastic_argmax.jl | 9 +++++----
 ext/plots/argmax_plots.jl                               | 5 +----
 ext/plots/shortest_path_plots.jl                        | 8 +-------
 5 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/docs/src/benchmarks/stochastic/vsp.jl b/docs/src/benchmarks/stochastic/vsp.jl
index e4bbac5..d132822 100644
--- a/docs/src/benchmarks/stochastic/vsp.jl
+++ b/docs/src/benchmarks/stochastic/vsp.jl
@@ -27,7 +27,9 @@ maximizer = generate_maximizer(b)         # deterministic VSP solver (HiGHS MIP)
 # Run the solver on predicted costs to see a route visualization:
 θ_pred = model(sample.x)
 y_pred = maximizer(θ_pred; sample.context...)
-plot_solution(b, DataSample(; sample.context..., x=sample.x, θ=θ_pred, y=y_pred, extra=sample.extra))
+plot_solution(
+    b, DataSample(; sample.context..., x=sample.x, θ=θ_pred, y=y_pred, extra=sample.extra)
+)
 
 # ---
 # ## Problem Description
diff --git a/docs/src/benchmarks/toy/argmax2d.jl b/docs/src/benchmarks/toy/argmax2d.jl
index 16cbcf0..0ef7649 100644
--- a/docs/src/benchmarks/toy/argmax2d.jl
+++ b/docs/src/benchmarks/toy/argmax2d.jl
@@ -35,7 +35,12 @@ maximizer = generate_maximizer(b)         # vertex maximizing θᵀv over polyto
 
 # A randomly initialized policy predicts an arbitrary cost direction:
 θ_pred = model(sample.x)
-plot_solution(b, DataSample(; sample.context..., x=sample.x, θ=θ_pred, y=maximizer(θ_pred; sample.context...)))
+plot_solution(
+    b,
+    DataSample(;
+        sample.context..., x=sample.x, θ=θ_pred, y=maximizer(θ_pred; sample.context...)
+    ),
+)
 
 # ---
 # ## Problem Description
diff --git a/docs/src/benchmarks/toy/contextual_stochastic_argmax.jl b/docs/src/benchmarks/toy/contextual_stochastic_argmax.jl
index 9940b55..293616c 100644
--- a/docs/src/benchmarks/toy/contextual_stochastic_argmax.jl
+++ b/docs/src/benchmarks/toy/contextual_stochastic_argmax.jl
@@ -10,10 +10,11 @@ b = ContextualStochasticArgmaxBenchmark()
 # Stochastic benchmarks need a labeling policy to generate training targets.
 # We use the anticipative oracle: given realized scenario ξ it returns the best item.
 anticipative = generate_anticipative_solver(b)
-policy = (ctx, scenarios) -> [
-    DataSample(; ctx.context..., x=ctx.x, y=anticipative(ξ), extra=(; scenario=ξ))
-    for ξ in scenarios
-]
+policy =
+    (ctx, scenarios) -> [
+        DataSample(; ctx.context..., x=ctx.x, y=anticipative(ξ), extra=(; scenario=ξ))
+        for ξ in scenarios
+    ]
 dataset = generate_dataset(b, 20; target_policy=policy, seed=0)
 sample = first(dataset)
 
diff --git a/ext/plots/argmax_plots.jl b/ext/plots/argmax_plots.jl
index a93516e..7886aad 100644
--- a/ext/plots/argmax_plots.jl
+++ b/ext/plots/argmax_plots.jl
@@ -31,10 +31,7 @@ function plot_solution(::ArgmaxBenchmark, sample::DataSample; kwargs...)
     n = length(θ)
 
     p1 = Plots.heatmap(
-        x;
-        ylabel="Feature",
-        title="x (features, observable)",
-        xticks=(1:n, fill("", n)),
+        x; ylabel="Feature", title="x (features, observable)", xticks=(1:n, fill("", n))
     )
     θ_min, θ_max = extrema(θ)
     p2 = Plots.heatmap(
diff --git a/ext/plots/shortest_path_plots.jl b/ext/plots/shortest_path_plots.jl
index 0d25629..2a7fcf8 100644
--- a/ext/plots/shortest_path_plots.jl
+++ b/ext/plots/shortest_path_plots.jl
@@ -81,13 +81,7 @@ function plot_solution(bench::FixedSizeShortestPathBenchmark, sample::DataSample
         end
     end
     Plots.scatter!(
-        p2,
-        path_xs,
-        path_ys;
-        color=:white,
-        markersize=6,
-        markerstrokewidth=0,
-        label=false,
+        p2, path_xs, path_ys; color=:white, markersize=6, markerstrokewidth=0, label=false
     )
 
     return Plots.plot(p1, p2; layout=(1, 2), size=(700, 320), kwargs...)

From bba4ce8b603b3d3460cf9a6de61243d7568083ea Mon Sep 17 00:00:00 2001
From: BatyLeo <leo.baty67@gmail.com>
Date: Tue, 28 Apr 2026 13:07:30 +0200
Subject: [PATCH 04/21] cleanup and add missing references

---
 docs/src/benchmarks/dynamic/dvsp.jl            | 5 ++++-
 docs/src/benchmarks/dynamic/maintenance.jl     | 3 ---
 docs/src/benchmarks/static/ranking.jl          | 3 ---
 docs/src/benchmarks/static/subset_selection.jl | 2 +-
 docs/src/benchmarks/stochastic/vsp.jl          | 2 +-
 docs/src/index.md                              | 3 ++-
 6 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/docs/src/benchmarks/dynamic/dvsp.jl b/docs/src/benchmarks/dynamic/dvsp.jl
index 9c86f5e..a72d3ab 100644
--- a/docs/src/benchmarks/dynamic/dvsp.jl
+++ b/docs/src/benchmarks/dynamic/dvsp.jl
@@ -112,4 +112,7 @@ maximizer = generate_maximizer(b)         # prize-collecting VSP solver
 # - Full features: `Dense(27 → 1)` applied independently per customer
 #
 # !!! note "Reference"
-#     TODO: add original reference.
+#     This problem is a simplified version of the
+#     [EURO-NeurIPS challenge 2022](https://euro-neurips-vrp-2022.challenges.ortec.com/),
+#     and solved using DFL in [Combinatorial Optimization enriched Machine Learning to solve the
+#     Dynamic Vehicle Routing Problem with Time Windows](https://arxiv.org/abs/2304.00789).
diff --git a/docs/src/benchmarks/dynamic/maintenance.jl b/docs/src/benchmarks/dynamic/maintenance.jl
index a9205de..3953da8 100644
--- a/docs/src/benchmarks/dynamic/maintenance.jl
+++ b/docs/src/benchmarks/dynamic/maintenance.jl
@@ -100,6 +100,3 @@ maximizer = generate_maximizer(b)         # top-K selection among components wit
 #
 # **Maximizer:** `TopKPositiveMaximizer(K)` — selects the ``K`` components with the
 # highest positive scores for maintenance.
-#
-# !!! note "Reference"
-#     TODO: add original reference.
diff --git a/docs/src/benchmarks/static/ranking.jl b/docs/src/benchmarks/static/ranking.jl
index 330785e..ab55f46 100644
--- a/docs/src/benchmarks/static/ranking.jl
+++ b/docs/src/benchmarks/static/ranking.jl
@@ -68,6 +68,3 @@ compute_gap(b, dataset, model, maximizer)
 # **Model:** `Chain(Dense(nb_features → 1; bias=false), vec)` — predicts one score per item.
 #
 # **Maximizer:** `ranking(θ)` — returns a vector of ordinal ranks via `invperm(sortperm(θ))`.
-#
-# !!! note "Reference"
-#     TODO: add original reference.
diff --git a/docs/src/benchmarks/static/subset_selection.jl b/docs/src/benchmarks/static/subset_selection.jl
index 4edeba8..acbc00e 100644
--- a/docs/src/benchmarks/static/subset_selection.jl
+++ b/docs/src/benchmarks/static/subset_selection.jl
@@ -74,4 +74,4 @@ compute_gap(b, dataset, model, maximizer)
 # highest-scoring positions.
 #
 # !!! note "Reference"
-#     TODO: add original reference.
+#     Setting from [Decision-Focused Learning: Foundations, State of the Art, Benchmark and Future Opportunities](https://arxiv.org/abs/2307.13565)
diff --git a/docs/src/benchmarks/stochastic/vsp.jl b/docs/src/benchmarks/stochastic/vsp.jl
index d132822..c0e83d9 100644
--- a/docs/src/benchmarks/stochastic/vsp.jl
+++ b/docs/src/benchmarks/stochastic/vsp.jl
@@ -115,4 +115,4 @@ plot_solution(
 # deterministic VSP instance.
 #
 # !!! note "Reference"
-#     TODO: add original reference.
+#     [Learning to Approximate Industrial Problems by Operations Research Classic Problems](https://hal.science/hal-02396091/document)
diff --git a/docs/src/index.md b/docs/src/index.md
index 4294eb6..6704100 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -53,8 +53,9 @@ Single-stage optimization problems with no randomness involved:
 - [`FixedSizeShortestPathBenchmark`](@ref): find shortest path on grid graphs with fixed size
 - [`WarcraftBenchmark`](@ref): shortest path on image maps
 
-### Stochastic Benchmarks (`AbstractStochasticBenchmark`)  
+### Stochastic Benchmarks (`AbstractStochasticBenchmark`)
 Single-stage optimization problems under uncertainty:
+- [`ContextualStochasticArgmaxBenchmark`](@ref): contextual argmax with stochastic utilities
 - [`StochasticVehicleSchedulingBenchmark`](@ref): stochastic vehicle scheduling under delay uncertainty
 
 ### Dynamic Benchmarks (`AbstractDynamicBenchmark`)

From d00f25cd9f710956dcf41865befc49a982e5ab9c Mon Sep 17 00:00:00 2001
From: BatyLeo <leo.baty67@gmail.com>
Date: Tue, 28 Apr 2026 18:24:10 +0200
Subject: [PATCH 05/21] Wip rework

---
 docs/src/benchmarks/dynamic/dvsp.jl           | 25 +++++++--
 .../benchmarks/dynamic/dynamic_assortment.jl  | 25 +++++++--
 docs/src/benchmarks/dynamic/maintenance.jl    | 24 ++++++--
 .../static/fixed_size_shortest_path.jl        | 15 +++--
 .../static/portfolio_optimization.jl          | 14 +++--
 docs/src/benchmarks/static/ranking.jl         | 22 +++++---
 .../src/benchmarks/static/subset_selection.jl | 18 +++---
 docs/src/benchmarks/stochastic/vsp.jl         | 29 ++++++----
 docs/src/benchmarks/toy/argmax.jl             |  9 ++-
 docs/src/benchmarks/toy/argmax2d.jl           |  6 +-
 .../toy/contextual_stochastic_argmax.jl       | 16 ++++--
 ext/plots/argmax2d_plots.jl                   |  2 +-
 ext/plots/argmax_plots.jl                     | 21 ++++---
 .../contextual_stochastic_argmax_plots.jl     | 56 ++++++++++++-------
 ext/plots/dynamic_assortment_plots.jl         |  4 +-
 ext/plots/portfolio_plots.jl                  | 47 ++++++++--------
 ext/plots/ranking_plots.jl                    | 49 +++++++++-------
 ext/plots/shortest_path_plots.jl              | 56 ++++++++++---------
 ext/plots/subset_selection_plots.jl           | 42 ++++++++------
 ext/plots/warcraft_plots.jl                   |  4 +-
 20 files changed, 297 insertions(+), 187 deletions(-)

diff --git a/docs/src/benchmarks/dynamic/dvsp.jl b/docs/src/benchmarks/dynamic/dvsp.jl
index a72d3ab..bf3aa13 100644
--- a/docs/src/benchmarks/dynamic/dvsp.jl
+++ b/docs/src/benchmarks/dynamic/dvsp.jl
@@ -7,19 +7,32 @@ using Plots
 
 b = DynamicVehicleSchedulingBenchmark()
 
-# ## A sample episode
+# ## Observable input
 #
-# Generate one environment and roll it out with the greedy policy (serves all pending
-# customers immediately):
+# Generate one environment and roll it out with the greedy policy to collect a sample
+# trajectory. At each step the agent observes customer positions, start times, and which
+# customers have reached their dispatch deadline:
 policies = generate_baseline_policies(b)
 env = generate_environments(b, 1)[1]
 _, trajectory = evaluate_policy!(policies.greedy, env)
 
-# One step: depot (green square), must-dispatch customers (red stars; deadline reached),
-# postponable customers (blue triangles), vehicle routes (lines):
+# The observable state at step 1: depot (green square), must-dispatch customers
+# (red stars; deadline reached), postponable customers (blue triangles):
+plot_instance(b, trajectory[1])
+
+# ## A training sample
+#
+# Each step in a trajectory is a labeled tuple `(x, θ, y)` plus state and reward:
+# - `x`: 27-dimensional feature vector per customer (schedule slack, travel times, reachability)
+# - `θ`: prize per customer (predicted by the model; used as optimization input)
+# - `y`: routes dispatched at this step
+# - `instance`: full DVSP state (customer positions, deadlines, current epoch)
+# - `reward`: negative travel cost incurred at this step
+#
+# One step with dispatched routes:
 plot_solution(b, trajectory[1])
 
-# Multiple steps side by side — customers accumulate and routes change over time:
+# Multiple steps side by side: customers accumulate and routes change over time:
 plot_trajectory(b, trajectory[1:min(3, length(trajectory))])
 
 # ## DFL pipeline components
diff --git a/docs/src/benchmarks/dynamic/dynamic_assortment.jl b/docs/src/benchmarks/dynamic/dynamic_assortment.jl
index 9d00d3c..e74369d 100644
--- a/docs/src/benchmarks/dynamic/dynamic_assortment.jl
+++ b/docs/src/benchmarks/dynamic/dynamic_assortment.jl
@@ -7,15 +7,28 @@ using Plots
 
 b = DynamicAssortmentBenchmark()
 
-# ## A sample episode
+# ## Observable input
 #
-# Generate one environment and roll out with the greedy policy (offers the K highest-priced
-# items at every step):
+# Generate one environment and roll it out with the greedy policy to collect a sample
+# trajectory. At each step the agent observes item prices, hype levels, saturation, and
+# purchase history:
 policies = generate_baseline_policies(b)
 env = generate_environments(b, 1)[1]
 _, trajectory = evaluate_policy!(policies.greedy, env)
 
-# One step: bar chart of item prices, green = items in the offered assortment:
+# The observable state at step 1: item prices (fixed across steps):
+plot_instance(b, trajectory[1])
+
+# ## A training sample
+#
+# Each step in a trajectory is a labeled tuple `(x, θ, y)` plus state and reward:
+# - `x`: `(d+8) × N` feature matrix per step (prices, hype, saturation, history, time)
+# - `θ`: predicted utility score per item
+# - `y`: offered assortment at this step (BitVector of length N, true = offered)
+# - `instance`: full state tuple (features matrix, purchase history)
+# - `reward`: price of the purchased item (0 if no purchase)
+#
+# One step with the offered assortment highlighted (green = offered):
 plot_solution(b, trajectory[1])
 
 # A few steps side by side (prices are fixed; assortment composition changes over time):
@@ -103,10 +116,10 @@ maximizer = generate_maximizer(b)         # top-K selection by predicted utility
 # \xrightarrow[\text{Assortment}]{a_t}
 # ```
 #
-# **Model:** `Chain(Dense(d+8 → 5), Dense(5 → 1), vec)` — predicts one utility score
+# **Model:** `Chain(Dense(d+8 → 5), Dense(5 → 1), vec)`: predicts one utility score
 # per item from the current state features.
 #
-# **Maximizer:** `TopKMaximizer(K)` — selects the top ``K`` items by predicted utility.
+# **Maximizer:** `TopKMaximizer(K)`: selects the top ``K`` items by predicted utility.
 #
 # !!! note "Reference"
 #     [Structured Reinforcement Learning for Combinatorial Decision-Making](https://arxiv.org/abs/2505.19053)
diff --git a/docs/src/benchmarks/dynamic/maintenance.jl b/docs/src/benchmarks/dynamic/maintenance.jl
index 3953da8..dc07a5e 100644
--- a/docs/src/benchmarks/dynamic/maintenance.jl
+++ b/docs/src/benchmarks/dynamic/maintenance.jl
@@ -7,15 +7,27 @@ using Plots
 
 b = MaintenanceBenchmark(; N=5, K=2)  # 5 components, maintain up to 2 per step
 
-# ## A sample episode
+# ## Observable input
 #
-# Generate one environment and roll out with the greedy policy (maintains the most degraded
-# components up to capacity):
+# Generate one environment and roll it out with the greedy policy to collect a sample
+# trajectory. At each step the agent observes the degradation level of each component:
 policies = generate_baseline_policies(b)
 env = generate_environments(b, 1)[1]
 _, trajectory = evaluate_policy!(policies.greedy, env)
 
-# One step: bars show degradation levels (1 = new, n = failed), green = maintained, red = failed:
+# The observable state at step 1: degradation levels per component (1 = new, n = failed):
+plot_instance(b, trajectory[1])
+
+# ## A training sample
+#
+# Each step in a trajectory is a labeled tuple `(x, θ, y)` plus state and reward:
+# - `x`: degradation state vector (values in `1..n` per component)
+# - `θ`: urgency score per component (predicted by model)
+# - `y`: which components are maintained at this step (BitVector of length N)
+# - `instance`: degradation state vector
+# - `reward`: negative cost (maintenance and failure costs) at this step
+#
+# One step with maintenance decisions (green = maintained, red = failed):
 plot_solution(b, trajectory[1])
 
 # A few steps side by side showing degradation evolving over time:
@@ -95,8 +107,8 @@ maximizer = generate_maximizer(b)         # top-K selection among components wit
 # \xrightarrow[\text{Maintenance}]{a_t}
 # ```
 #
-# **Model:** `Chain(Dense(N → N), Dense(N → N), vec)` — two-layer MLP predicting one
+# **Model:** `Chain(Dense(N → N), Dense(N → N), vec)`: two-layer MLP predicting one
 # urgency score per component.
 #
-# **Maximizer:** `TopKPositiveMaximizer(K)` — selects the ``K`` components with the
+# **Maximizer:** `TopKPositiveMaximizer(K)`: selects the ``K`` components with the
 # highest positive scores for maintenance.
diff --git a/docs/src/benchmarks/static/fixed_size_shortest_path.jl b/docs/src/benchmarks/static/fixed_size_shortest_path.jl
index 8a6779d..c8d43ae 100644
--- a/docs/src/benchmarks/static/fixed_size_shortest_path.jl
+++ b/docs/src/benchmarks/static/fixed_size_shortest_path.jl
@@ -7,6 +7,14 @@ using Plots
 
 b = FixedSizeShortestPathBenchmark()
 
+# ## Observable input
+#
+# At inference time the decision-maker observes the feature vector `x` and the fixed grid
+# structure (source top-left, sink bottom-right):
+dataset = generate_dataset(b, 50; seed=0)
+sample = first(dataset)
+plot_instance(b, sample)
+
 # ## A training sample
 #
 # Each sample is a labeled triple `(x, θ, y)`:
@@ -14,12 +22,7 @@ b = FixedSizeShortestPathBenchmark()
 # - `θ`: true edge costs (training supervision only, hidden at test time)
 # - `y`: path indicator vector (`y[e] = 1` if edge `e` is on the optimal path)
 #
-# True edge costs θ, averaged per vertex for display (hidden at test time — the model observes only `x`):
-dataset = generate_dataset(b, 50; seed=0)
-sample = first(dataset)
-plot_instance(b, sample)
-
-# Left: edge costs. Right: optimal path (white dots):
+# Top: feature vector x. Bottom left: edge costs θ. Bottom right: optimal path y (white dots):
 plot_solution(b, sample)
 
 # ## Untrained policy
diff --git a/docs/src/benchmarks/static/portfolio_optimization.jl b/docs/src/benchmarks/static/portfolio_optimization.jl
index d7e7df0..794b71d 100644
--- a/docs/src/benchmarks/static/portfolio_optimization.jl
+++ b/docs/src/benchmarks/static/portfolio_optimization.jl
@@ -7,6 +7,13 @@ using Plots
 
 b = PortfolioOptimizationBenchmark()
 
+# ## Observable input
+#
+# At inference time the decision-maker observes only the contextual feature vector `x`:
+dataset = generate_dataset(b, 20; seed=0)
+sample = first(dataset)
+plot_instance(b, sample)
+
 # ## A training sample
 #
 # Each sample is a labeled triple `(x, θ, y)`:
@@ -14,12 +21,7 @@ b = PortfolioOptimizationBenchmark()
 # - `θ`: true expected asset returns (training supervision only, hidden at test time)
 # - `y`: optimal portfolio weights solving the Markowitz QP given `θ`
 #
-# True expected returns θ (hidden at test time — the model observes only the feature vector `x`):
-dataset = generate_dataset(b, 20; seed=0)
-sample = first(dataset)
-plot_instance(b, sample)
-
-# Left: true returns θ. Right: optimal portfolio weights y:
+# Top: feature vector x. Bottom left: true returns θ. Bottom right: optimal weights y:
 plot_solution(b, sample)
 
 # ## Untrained policy
diff --git a/docs/src/benchmarks/static/ranking.jl b/docs/src/benchmarks/static/ranking.jl
index ab55f46..a536a5d 100644
--- a/docs/src/benchmarks/static/ranking.jl
+++ b/docs/src/benchmarks/static/ranking.jl
@@ -1,25 +1,29 @@
 # # Ranking
-# Rank a set of items by predicted cost: the model must learn to sort items by their
-# hidden scores from observable features alone.
+# Rank a set of items. Each item has a hidden score, correlated with observable input
+# features. The goal is to learn to sort items by their hidden scores, using observable
+# features alone.
 
 using DecisionFocusedLearningBenchmarks
 using Plots
 
 b = RankingBenchmark()
 
+# ## Observable input
+#
+# At inference time the decision-maker observes only the feature matrix `x`
+# (rows = features, columns = items):
+dataset = generate_dataset(b, 50; seed=0)
+sample = first(dataset)
+plot_instance(b, sample)
+
 # ## A training sample
 #
 # Each sample is a labeled triple `(x, θ, y)`:
 # - `x`: feature matrix (rows = features, columns = items; observable at train and test time)
 # - `θ`: true item costs (training supervision only, hidden at test time)
-# - `y`: ordinal ranks derived from `θ` (`y[i] = 1` means item `i` has the highest cost)
+# - `y`: ordinal ranks derived from `θ` (`y[i] = 1` means item `i` has the lowest cost)
 #
-# True costs θ (hidden at test time — the model observes only the feature matrix `x`):
-dataset = generate_dataset(b, 50; seed=0)
-sample = first(dataset)
-plot_instance(b, sample)
-
-# The same costs, colored by rank (dark blue = best, light = worst):
+# The full training triple (features, true costs, and derived ranking):
 plot_solution(b, sample)
 
 # ## Untrained policy
diff --git a/docs/src/benchmarks/static/subset_selection.jl b/docs/src/benchmarks/static/subset_selection.jl
index acbc00e..0ac7904 100644
--- a/docs/src/benchmarks/static/subset_selection.jl
+++ b/docs/src/benchmarks/static/subset_selection.jl
@@ -5,21 +5,23 @@
 using DecisionFocusedLearningBenchmarks
 using Plots
 
-b = SubsetSelectionBenchmark()
+b = SubsetSelectionBenchmark(; identity_mapping=false)
+
+# ## Observable input
+#
+# At inference time the decision-maker observes only the feature vector `x`:
+dataset = generate_dataset(b, 50; seed=0)
+sample = first(dataset)
+plot_instance(b, sample)
 
 # ## A training sample
 #
 # Each sample is a labeled triple `(x, θ, y)`:
 # - `x`: item feature vector (observable at train and test time)
-# - `θ`: true item values (equal to `x` by default; otherwise derived via a hidden encoder)
+# - `θ`: true item values, derived from `x` via a hidden encoder (training supervision only)
 # - `y`: selection indicator (`y[i] = 1` for the `k` highest-value items, 0 otherwise)
 #
-# True item values θ (hidden at test time — the model observes only the feature vector `x`):
-dataset = generate_dataset(b, 50; seed=0)
-sample = first(dataset)
-plot_instance(b, sample)
-
-# The same values, with the `k` selected items highlighted in green:
+# The full training triple (features, hidden values, and selection):
 plot_solution(b, sample)
 
 # ## Untrained policy
diff --git a/docs/src/benchmarks/stochastic/vsp.jl b/docs/src/benchmarks/stochastic/vsp.jl
index c0e83d9..22ae547 100644
--- a/docs/src/benchmarks/stochastic/vsp.jl
+++ b/docs/src/benchmarks/stochastic/vsp.jl
@@ -7,24 +7,33 @@ using Plots
 
 b = StochasticVehicleSchedulingBenchmark()
 
-# ## A sample instance
+# ## Observable input
 #
-# Each instance is a city with task locations and scheduled times.
+# Each instance is a city with task locations and scheduled times. Task spatial positions
+# and scheduled times are observable at inference time.
 # `store_city=true` is required to visualize the map (not needed for training):
 sample = generate_dataset(b, 1; store_city=true)[1]
 plot_instance(b, sample)
 
-# ## Untrained policy
+# ## A training sample
+#
+# Each sample is a labeled triple `(x, θ, y)`:
+# - `x`: 20-dimensional feature vector per edge, encoding schedule slack and travel times
+# - `θ`: adjusted edge costs (training supervision only, hidden at test time)
+# - `y`: binary assignment (`y[(u,v)] = 1` if a vehicle travels edge `(u, v)` in the schedule)
 #
-# Each edge `(u, v)` has a 20-dimensional feature vector encoding schedule slack, travel
-# times, and timing — this is what the model receives as `x` per edge:
+# Unlike static benchmarks, `y` labels are not available by default and must be attached
+# via a `target_policy` (e.g., the deterministic VSP solver). Routes are visualized
+# in the untrained policy section below.
+
+# ## Untrained policy
+
 # A DFL policy chains two components: a statistical model predicting adjusted edge costs:
-model = generate_statistical_model(b)     # linear map: task features → adjusted edge costs
+model = generate_statistical_model(b)     # linear map: task features -> adjusted edge costs
 # and a maximizer solving the deterministic VSP given those costs:
 maximizer = generate_maximizer(b)         # deterministic VSP solver (HiGHS MIP)
 
-# The untrained model predicts random edge costs; the resulting schedule is arbitrary.
-# Run the solver on predicted costs to see a route visualization:
+# The untrained model predicts random edge costs; the resulting schedule is arbitrary:
 θ_pred = model(sample.x)
 y_pred = maximizer(θ_pred; sample.context...)
 plot_solution(
@@ -109,9 +118,9 @@ plot_solution(
 # costs ``\hat{c}`` that implicitly account for expected stochastic delays, while keeping
 # the fast deterministic solver at inference time.
 #
-# **Model:** `Chain(Dense(20 → 1; bias=false), vec)` — predicts one adjusted cost per edge.
+# **Model:** `Chain(Dense(20 -> 1; bias=false), vec)`: predicts one adjusted cost per edge.
 #
-# **Maximizer:** `StochasticVehicleSchedulingMaximizer` — HiGHS MIP solver on the
+# **Maximizer:** `StochasticVehicleSchedulingMaximizer`: HiGHS MIP solver on the
 # deterministic VSP instance.
 #
 # !!! note "Reference"
diff --git a/docs/src/benchmarks/toy/argmax.jl b/docs/src/benchmarks/toy/argmax.jl
index aaed6cd..ed83e2c 100644
--- a/docs/src/benchmarks/toy/argmax.jl
+++ b/docs/src/benchmarks/toy/argmax.jl
@@ -1,9 +1,8 @@
 # # Argmax
-# Select the single best item from a set of `n` items. Item scores are **hidden**, 
-# only a feature matrix `x` correlated with these scores is observable.
-# This problem can also be seen as a multiclass classification problem where
-# we use an argmax layer instead of a softmax. This is not very useful in practice, it's more a
-# minimalist toy problem to showcase DFL concepts in the simplest possible setting.
+# Select the single best item from a set of `n` items, given features correlated with hidden
+# item scores. This is the **simplest possible DFL setting**: equivalent to multiclass
+# classification, but with an argmax layer instead of softmax. Useful as a minimal sandbox for
+# understanding DFL concepts.
 
 using DecisionFocusedLearningBenchmarks
 using Plots
diff --git a/docs/src/benchmarks/toy/argmax2d.jl b/docs/src/benchmarks/toy/argmax2d.jl
index 0ef7649..4f57c25 100644
--- a/docs/src/benchmarks/toy/argmax2d.jl
+++ b/docs/src/benchmarks/toy/argmax2d.jl
@@ -1,6 +1,8 @@
 # # Argmax on a 2D polytope
-# Select the best vertex of a random 2D polytope: predict a 2D cost vector from features,
-# then return the vertex maximizing the dot product with it.
+# Select the best vertex of a random convex polytope in 2D: predict a cost direction θ from
+# features, then return the vertex `v` maximizing `θᵀv`. The 2D setting makes this benchmark
+# visual: the cost direction and selected vertex can be plotted directly, and the loss
+# landscape can be shown as a contour plot over the 2D θ space.
 
 using DecisionFocusedLearningBenchmarks
 using Plots
diff --git a/docs/src/benchmarks/toy/contextual_stochastic_argmax.jl b/docs/src/benchmarks/toy/contextual_stochastic_argmax.jl
index 293616c..8621887 100644
--- a/docs/src/benchmarks/toy/contextual_stochastic_argmax.jl
+++ b/docs/src/benchmarks/toy/contextual_stochastic_argmax.jl
@@ -1,14 +1,18 @@
 # # Contextual Stochastic Argmax
-# Select the best item when utilities are random but correlated with observable context:
-# a linear model must learn the mapping from context to expected utilities.
+# Select the best item from a set of `n` items with stochastic utilities: each scenario draws
+# a different utility vector, but utilities depend on observable context features. This is a
+# toy benchmark designed so that a linear model can exactly recover the optimal
+# context-to-utility mapping.
 
 using DecisionFocusedLearningBenchmarks
 using Plots
 
 b = ContextualStochasticArgmaxBenchmark()
 
-# Stochastic benchmarks need a labeling policy to generate training targets.
-# We use the anticipative oracle: given realized scenario ξ it returns the best item.
+# `generate_dataset` returns unlabeled samples (`y = nothing`) for this benchmark.
+# A `target_policy` must be provided to attach labels. Here we use the anticipative
+# oracle: it returns the item with the highest realized utility for each scenario,
+# giving one labeled sample per scenario per instance.
 anticipative = generate_anticipative_solver(b)
 policy =
     (ctx, scenarios) -> [
@@ -20,8 +24,8 @@ sample = first(dataset)
 
 # ## Observable input
 #
-# At inference time `c_base` and `x_raw` are known (not the realized utility vector ξ).
-# `plot_instance` shows the base utilities `c_base`:
+# At inference time the model observes `x = [c_base; x_raw]`. `plot_instance` shows both
+# components: base utilities `c_base` (left) and context features `x_raw` (right):
 plot_instance(b, sample)
 
 # ## A training sample
diff --git a/ext/plots/argmax2d_plots.jl b/ext/plots/argmax2d_plots.jl
index cdb9800..b8cfb79 100644
--- a/ext/plots/argmax2d_plots.jl
+++ b/ext/plots/argmax2d_plots.jl
@@ -39,7 +39,7 @@ function _plot_y!(pl, y)
         color="#CB3C33",
         markersize=9,
         markershape=:square,
-        label=L"f(\theta)",
+        label=L"y = \mathrm{argmax}_v\; \theta^\top v",
     )
 end
 
diff --git a/ext/plots/argmax_plots.jl b/ext/plots/argmax_plots.jl
index 7886aad..e159398 100644
--- a/ext/plots/argmax_plots.jl
+++ b/ext/plots/argmax_plots.jl
@@ -21,8 +21,8 @@ end
 """
 $TYPEDSIGNATURES
 
-Plot the features `x`, scores `θ`, and decision `y` in `sample` as heatmaps.
-All three share the same item axis (columns).
+Plot the features `x` as a heatmap, the scores `θ` as a bar chart, and the
+decision `y` as a one-hot heatmap. All three share the same item axis.
 """
 function plot_solution(::ArgmaxBenchmark, sample::DataSample; kwargs...)
     x = sample.x  # nb_features × n
@@ -33,14 +33,13 @@ function plot_solution(::ArgmaxBenchmark, sample::DataSample; kwargs...)
     p1 = Plots.heatmap(
         x; ylabel="Feature", title="x (features, observable)", xticks=(1:n, fill("", n))
     )
-    θ_min, θ_max = extrema(θ)
-    p2 = Plots.heatmap(
-        reshape(Float64.(θ), 1, n);
-        ylabel="θ",
-        title="θ: scores [$(round(θ_min; sigdigits=2)), $(round(θ_max; sigdigits=2))]",
-        yticks=false,
+    p2 = Plots.bar(
+        1:n,
+        Float64.(θ);
+        legend=false,
+        ylabel="Score",
+        title="θ (scores)",
         xticks=(1:n, fill("", n)),
-        colorbar=false,
     )
     p3 = Plots.heatmap(
         reshape(Float64.(y), 1, n);
@@ -53,6 +52,6 @@ function plot_solution(::ArgmaxBenchmark, sample::DataSample; kwargs...)
         colorbar=false,
     )
 
-    l = Plots.@layout [a{0.65h}; b{0.175h}; c{0.175h}]
-    return Plots.plot(p1, p2, p3; layout=l, size=(600, 420), kwargs...)
+    l = Plots.@layout [a{0.55h}; b{0.3h}; c{0.15h}]
+    return Plots.plot(p1, p2, p3; layout=l, size=(600, 480), kwargs...)
 end
diff --git a/ext/plots/contextual_stochastic_argmax_plots.jl b/ext/plots/contextual_stochastic_argmax_plots.jl
index 601f941..d9d2e63 100644
--- a/ext/plots/contextual_stochastic_argmax_plots.jl
+++ b/ext/plots/contextual_stochastic_argmax_plots.jl
@@ -1,23 +1,50 @@
 has_visualization(::ContextualStochasticArgmaxBenchmark) = true
 
 function plot_instance(::ContextualStochasticArgmaxBenchmark, sample::DataSample; kwargs...)
-    c_base = sample.c_base  # base utilities from context
+    c_base = sample.c_base  # base utilities (first n components of x)
+    x_raw = sample.x_raw    # context features (last d components of x)
     n = length(c_base)
-    return Plots.bar(
+    d = length(x_raw)
+
+    p1 = Plots.bar(
         1:n,
         c_base;
         legend=false,
         xlabel="Item",
         ylabel="Base utility",
-        title="Instance (base utilities c_base)",
+        title="c_base (base utilities)",
         color=:steelblue,
-        kwargs...,
     )
+    p2 = Plots.bar(
+        1:d,
+        x_raw;
+        legend=false,
+        xlabel="Feature",
+        ylabel="Value",
+        title="x_raw (context features)",
+        color=:darkorange,
+    )
+    return Plots.plot(p1, p2; layout=(1, 2), size=(800, 300), kwargs...)
 end
 
 function plot_solution(::ContextualStochasticArgmaxBenchmark, sample::DataSample; kwargs...)
-    y = sample.y  # one-hot vector
+    x = sample.x    # full feature vector [c_base; x_raw]
+    y = sample.y    # one-hot vector
+    n_x = length(x)
     n = length(y)
+    n_c = length(sample.c_base)
+
+    # Color x bars: steelblue for c_base components, darkorange for x_raw components
+    x_colors = vcat(fill(:steelblue, n_c), fill(:darkorange, n_x - n_c))
+    p_x = Plots.bar(
+        1:n_x,
+        x;
+        color=x_colors,
+        legend=false,
+        xlabel="Feature index",
+        ylabel="Value",
+        title="x (blue = c_base, orange = x_raw)",
+    )
 
     # Pick the best available utility vector to display
     if hasproperty(sample.extra, :scenario)
@@ -32,25 +59,16 @@ function plot_solution(::ContextualStochasticArgmaxBenchmark, sample::DataSample
     end
 
     p1 = Plots.bar(
-        1:n,
-        u;
-        legend=false,
-        xlabel="Item",
-        ylabel="Utility",
-        title=u_title,
-        color=:steelblue,
+        1:n, u; legend=false, xlabel="Item", ylabel="Utility",
+        title=u_title, color=:steelblue,
     )
 
     colors = [y[i] > 0 ? :firebrick : :steelblue for i in 1:n]
     p2 = Plots.bar(
-        1:n,
-        u;
-        color=colors,
-        legend=false,
-        xlabel="Item",
-        ylabel="Utility",
+        1:n, u; color=colors, legend=false, xlabel="Item", ylabel="Utility",
         title="Selected item (red)",
     )
 
-    return Plots.plot(p1, p2; layout=(1, 2), size=(800, 300), kwargs...)
+    l = Plots.@layout [a{0.35h}; [b c]]
+    return Plots.plot(p_x, p1, p2; layout=l, size=(800, 500), kwargs...)
 end
diff --git a/ext/plots/dynamic_assortment_plots.jl b/ext/plots/dynamic_assortment_plots.jl
index dff56ec..293cdd7 100644
--- a/ext/plots/dynamic_assortment_plots.jl
+++ b/ext/plots/dynamic_assortment_plots.jl
@@ -10,7 +10,7 @@ function plot_instance(::DynamicAssortmentBenchmark, sample::DataSample; kwargs.
         legend=false,
         xlabel="Item",
         ylabel="Price",
-        title="Instance (item prices) — step $(length(sample.instance[2]) + 1)",
+        title="Instance (item prices): step $(length(sample.instance[2]) + 1)",
         color=:steelblue,
         kwargs...,
     )
@@ -27,7 +27,7 @@ function plot_solution(::DynamicAssortmentBenchmark, sample::DataSample; kwargs.
         legend=false,
         xlabel="Item",
         ylabel="Price",
-        title="Assortment (green = offered) — step $(length(sample.instance[2]) + 1)",
+        title="Assortment (green = offered): step $(length(sample.instance[2]) + 1)",
         color=colors,
         kwargs...,
     )
diff --git a/ext/plots/portfolio_plots.jl b/ext/plots/portfolio_plots.jl
index 77c3a7e..ba6e5f8 100644
--- a/ext/plots/portfolio_plots.jl
+++ b/ext/plots/portfolio_plots.jl
@@ -1,41 +1,44 @@
 has_visualization(::PortfolioOptimizationBenchmark) = true
 
 function plot_instance(::PortfolioOptimizationBenchmark, sample::DataSample; kwargs...)
-    θ = sample.θ
-    d = length(θ)
+    x = sample.x
+    p = length(x)
     return Plots.bar(
-        1:d,
-        θ;
+        1:p,
+        Float64.(x);
         legend=false,
-        xlabel="Asset",
-        ylabel="Expected return",
-        title="Instance (expected returns θ)",
+        xlabel="Feature",
+        ylabel="Value",
+        title="Features x (observable input)",
         color=:steelblue,
+        xticks=1:p,
         kwargs...,
     )
 end
 
 function plot_solution(::PortfolioOptimizationBenchmark, sample::DataSample; kwargs...)
+    x = sample.x
     θ = sample.θ
     y = sample.y
+    p = length(x)
     d = length(θ)
+
+    p_x = Plots.bar(
+        1:p, Float64.(x);
+        legend=false, xlabel="Feature", ylabel="Value",
+        title="x (features, observable)", color=:steelblue, xticks=1:p,
+    )
     p1 = Plots.bar(
-        1:d,
-        θ;
-        legend=false,
-        xlabel="Asset",
-        ylabel="Expected return",
-        title="Expected returns θ",
-        color=:steelblue,
+        1:d, θ;
+        legend=false, xlabel="Asset", ylabel="Expected return",
+        title="θ (expected returns)", color=:steelblue,
     )
     p2 = Plots.bar(
-        1:d,
-        y;
-        legend=false,
-        xlabel="Asset",
-        ylabel="Portfolio weight",
-        title="Portfolio weights y",
-        color=:seagreen,
+        1:d, y;
+        legend=false, xlabel="Asset", ylabel="Portfolio weight",
+        title="y (portfolio weights)", color=:seagreen,
     )
-    return Plots.plot(p1, p2; layout=(1, 2), size=(800, 300), kwargs...)
+
+    l = Plots.@layout [a{0.3h}; [b c]]
+    return Plots.plot(p_x, p1, p2; layout=l, size=(800, 500), kwargs...)
 end
diff --git a/ext/plots/ranking_plots.jl b/ext/plots/ranking_plots.jl
index dc50b2d..1c4592c 100644
--- a/ext/plots/ranking_plots.jl
+++ b/ext/plots/ranking_plots.jl
@@ -1,35 +1,46 @@
 has_visualization(::RankingBenchmark) = true
 
 function plot_instance(::RankingBenchmark, sample::DataSample; kwargs...)
-    θ = sample.θ
-    n = length(θ)
-    return Plots.bar(
-        1:n,
-        θ;
-        legend=false,
+    x = sample.x  # nb_features × n
+    n = size(x, 2)
+    return Plots.heatmap(
+        x;
         xlabel="Item",
-        ylabel="Cost",
-        title="Instance (costs θ)",
-        color=:steelblue,
+        ylabel="Feature",
+        title="Features x (observable input)",
+        xticks=1:n,
         kwargs...,
     )
 end
 
 function plot_solution(::RankingBenchmark, sample::DataSample; kwargs...)
-    θ = sample.θ
+    x = sample.x  # nb_features × n
+    θ = sample.θ  # length n
     y = sample.y  # y[i] = rank of item i (1 = best)
     n = length(θ)
-    # Color by rank: rank 1 (best) in dark blue, rank n (worst) in light
-    palette = Plots.cgrad(:Blues, n; rev=true, categorical=true)
-    colors = [palette[y[i]] for i in 1:n]
-    return Plots.bar(
+
+    p1 = Plots.heatmap(
+        x; ylabel="Feature", title="x (features, observable)", xticks=(1:n, fill("", n))
+    )
+    p2 = Plots.bar(
         1:n,
-        θ;
+        Float64.(θ);
         legend=false,
-        xlabel="Item",
         ylabel="Cost",
-        title="Solution (color = rank, dark = best)",
-        color=colors,
-        kwargs...,
+        title="θ (costs)",
+        xticks=(1:n, fill("", n)),
     )
+    p3 = Plots.bar(
+        1:n,
+        Float64.(y);
+        legend=false,
+        xlabel="Item",
+        ylabel="Rank",
+        title="y (rank, lower = better)",
+        color=:steelblue,
+        xticks=1:n,
+    )
+
+    l = Plots.@layout [a{0.55h}; b{0.225h}; c{0.225h}]
+    return Plots.plot(p1, p2, p3; layout=l, size=(600, 500), kwargs...)
 end
diff --git a/ext/plots/shortest_path_plots.jl b/ext/plots/shortest_path_plots.jl
index 2a7fcf8..85cb6b8 100644
--- a/ext/plots/shortest_path_plots.jl
+++ b/ext/plots/shortest_path_plots.jl
@@ -38,40 +38,47 @@ function _grid_matrices(bench::FixedSizeShortestPathBenchmark, θ, y)
 end
 
 function plot_instance(bench::FixedSizeShortestPathBenchmark, sample::DataSample; kwargs...)
-    weight_grid, _ = _grid_matrices(bench, sample.θ, falses(length(sample.θ)))
-    return Plots.heatmap(
-        weight_grid;
-        yflip=true,
-        aspect_ratio=:equal,
-        title="Edge weights (per vertex)",
-        colorbar=true,
-        kwargs...,
+    rows, cols = bench.grid_size
+    # Show only the known graph structure (no edge costs)
+    interior_xs = [c for r in 1:rows for c in 1:cols if !(r == 1 && c == 1) && !(r == rows && c == cols)]
+    interior_ys = [r for r in 1:rows for c in 1:cols if !(r == 1 && c == 1) && !(r == rows && c == cols)]
+    pl = Plots.plot(;
+        xlim=(0.5, cols + 0.5), ylim=(0.5, rows + 0.5),
+        yflip=true, aspect_ratio=:equal, legend=:topright,
+        title="Grid graph ($(rows)×$(cols))",
+        framestyle=:box, grid=false, kwargs...,
     )
+    Plots.scatter!(pl, interior_xs, interior_ys;
+        color=:lightgray, markersize=8, markerstrokecolor=:gray,
+        markerstrokewidth=1, label=false)
+    Plots.scatter!(pl, [1], [1];
+        color=:seagreen, markersize=10, markershape=:square, label="source", markerstrokewidth=0)
+    Plots.scatter!(pl, [cols], [rows];
+        color=:crimson, markersize=10, markershape=:square, label="sink", markerstrokewidth=0)
+    return pl
 end
 
 function plot_solution(bench::FixedSizeShortestPathBenchmark, sample::DataSample; kwargs...)
+    x = sample.x
+    p_feat = length(x)
     weight_grid, path_grid = _grid_matrices(bench, sample.θ, sample.y)
     rows, cols = bench.grid_size
 
+    p_x = Plots.bar(
+        1:p_feat, Float64.(x);
+        legend=false, xlabel="Feature", ylabel="Value",
+        title="x (features, observable)", color=:steelblue, xticks=1:p_feat,
+    )
     p1 = Plots.heatmap(
         weight_grid;
-        yflip=true,
-        aspect_ratio=:equal,
-        title="Edge weights",
-        colorbar=true,
-        framestyle=:none,
+        yflip=true, aspect_ratio=:equal,
+        title="Edge weights θ", colorbar=true, framestyle=:none,
     )
-
     p2 = Plots.heatmap(
         weight_grid;
-        yflip=true,
-        aspect_ratio=:equal,
-        title="Shortest path",
-        colorbar=false,
-        framestyle=:none,
-        color=:Blues,
+        yflip=true, aspect_ratio=:equal,
+        title="Shortest path y", colorbar=false, framestyle=:none, color=:Blues,
     )
-    # Highlight path vertices with scatter
     path_xs = Int[]
     path_ys = Int[]
     for r in 1:rows, c in 1:cols
@@ -80,9 +87,8 @@ function plot_solution(bench::FixedSizeShortestPathBenchmark, sample::DataSample
             push!(path_ys, r)
         end
     end
-    Plots.scatter!(
-        p2, path_xs, path_ys; color=:white, markersize=6, markerstrokewidth=0, label=false
-    )
+    Plots.scatter!(p2, path_xs, path_ys; color=:white, markersize=6, markerstrokewidth=0, label=false)
 
-    return Plots.plot(p1, p2; layout=(1, 2), size=(700, 320), kwargs...)
+    l = Plots.@layout [a{0.25h}; [b c]]
+    return Plots.plot(p_x, p1, p2; layout=l, size=(700, 500), kwargs...)
 end
diff --git a/ext/plots/subset_selection_plots.jl b/ext/plots/subset_selection_plots.jl
index 40e778b..cea84a5 100644
--- a/ext/plots/subset_selection_plots.jl
+++ b/ext/plots/subset_selection_plots.jl
@@ -1,33 +1,43 @@
 has_visualization(::SubsetSelectionBenchmark) = true
 
 function plot_instance(::SubsetSelectionBenchmark, sample::DataSample; kwargs...)
-    θ = sample.θ
-    n = length(θ)
+    x = sample.x  # length n feature vector
+    n = length(x)
     return Plots.bar(
         1:n,
-        θ;
+        Float64.(x);
         legend=false,
         xlabel="Item",
-        ylabel="Value",
-        title="Instance (values θ)",
+        ylabel="Feature value",
+        title="Features x (observable input)",
         color=:steelblue,
+        xticks=1:n,
         kwargs...,
     )
 end
 
 function plot_solution(::SubsetSelectionBenchmark, sample::DataSample; kwargs...)
-    θ = sample.θ
+    x = sample.x  # length n feature vector
+    θ = sample.θ  # length n hidden values
     y = sample.y  # y[i] = true if item i is selected
     n = length(θ)
-    colors = [y[i] ? :seagreen : :lightgray for i in 1:n]
-    return Plots.bar(
-        1:n,
-        θ;
-        legend=false,
-        xlabel="Item",
-        ylabel="Value",
-        title="Solution (selected items in green)",
-        color=colors,
-        kwargs...,
+
+    p1 = Plots.bar(
+        1:n, Float64.(x);
+        legend=false, ylabel="Feature value", title="x (features, observable)",
+        color=:steelblue, xticks=(1:n, fill("", n)),
+    )
+    p2 = Plots.bar(
+        1:n, Float64.(θ);
+        legend=false, ylabel="Value", title="θ (true values)",
+        color=:steelblue, xticks=(1:n, fill("", n)),
     )
+    p3 = Plots.heatmap(
+        reshape(Float64.(y), 1, n);
+        xlabel="Item", ylabel="y", title="y (selected items)",
+        yticks=false, xticks=1:n, color=:Greens, colorbar=false,
+    )
+
+    l = Plots.@layout [a{0.35h}; b{0.35h}; c{0.3h}]
+    return Plots.plot(p1, p2, p3; layout=l, size=(600, 480), kwargs...)
 end
diff --git a/ext/plots/warcraft_plots.jl b/ext/plots/warcraft_plots.jl
index 2029225..805d391 100644
--- a/ext/plots/warcraft_plots.jl
+++ b/ext/plots/warcraft_plots.jl
@@ -15,8 +15,8 @@ function plot_solution(
     ::WarcraftBenchmark,
     sample::DataSample;
     θ_true=sample.θ,
-    θ_title="Weights",
-    y_title="Path",
+    θ_title="Cell costs θ",
+    y_title="Path y",
     kwargs...,
 )
     x = sample.x

From f5cce6a06b875422a3597bd2951625e2cbfb14d3 Mon Sep 17 00:00:00 2001
From: BatyLeo <leo.baty67@gmail.com>
Date: Tue, 28 Apr 2026 18:34:11 +0200
Subject: [PATCH 06/21] formatting

---
 .../contextual_stochastic_argmax_plots.jl     | 16 +++-
 ext/plots/portfolio_plots.jl                  | 31 +++++--
 ext/plots/shortest_path_plots.jl              | 88 ++++++++++++++-----
 ext/plots/subset_selection_plots.jl           | 29 ++++--
 4 files changed, 124 insertions(+), 40 deletions(-)

diff --git a/ext/plots/contextual_stochastic_argmax_plots.jl b/ext/plots/contextual_stochastic_argmax_plots.jl
index d9d2e63..a83f4da 100644
--- a/ext/plots/contextual_stochastic_argmax_plots.jl
+++ b/ext/plots/contextual_stochastic_argmax_plots.jl
@@ -59,13 +59,23 @@ function plot_solution(::ContextualStochasticArgmaxBenchmark, sample::DataSample
     end
 
     p1 = Plots.bar(
-        1:n, u; legend=false, xlabel="Item", ylabel="Utility",
-        title=u_title, color=:steelblue,
+        1:n,
+        u;
+        legend=false,
+        xlabel="Item",
+        ylabel="Utility",
+        title=u_title,
+        color=:steelblue,
     )
 
     colors = [y[i] > 0 ? :firebrick : :steelblue for i in 1:n]
     p2 = Plots.bar(
-        1:n, u; color=colors, legend=false, xlabel="Item", ylabel="Utility",
+        1:n,
+        u;
+        color=colors,
+        legend=false,
+        xlabel="Item",
+        ylabel="Utility",
         title="Selected item (red)",
     )
 
diff --git a/ext/plots/portfolio_plots.jl b/ext/plots/portfolio_plots.jl
index ba6e5f8..eb4b771 100644
--- a/ext/plots/portfolio_plots.jl
+++ b/ext/plots/portfolio_plots.jl
@@ -24,19 +24,32 @@ function plot_solution(::PortfolioOptimizationBenchmark, sample::DataSample; kwa
     d = length(θ)
 
     p_x = Plots.bar(
-        1:p, Float64.(x);
-        legend=false, xlabel="Feature", ylabel="Value",
-        title="x (features, observable)", color=:steelblue, xticks=1:p,
+        1:p,
+        Float64.(x);
+        legend=false,
+        xlabel="Feature",
+        ylabel="Value",
+        title="x (features, observable)",
+        color=:steelblue,
+        xticks=1:p,
     )
     p1 = Plots.bar(
-        1:d, θ;
-        legend=false, xlabel="Asset", ylabel="Expected return",
-        title="θ (expected returns)", color=:steelblue,
+        1:d,
+        θ;
+        legend=false,
+        xlabel="Asset",
+        ylabel="Expected return",
+        title="θ (expected returns)",
+        color=:steelblue,
     )
     p2 = Plots.bar(
-        1:d, y;
-        legend=false, xlabel="Asset", ylabel="Portfolio weight",
-        title="y (portfolio weights)", color=:seagreen,
+        1:d,
+        y;
+        legend=false,
+        xlabel="Asset",
+        ylabel="Portfolio weight",
+        title="y (portfolio weights)",
+        color=:seagreen,
     )
 
     l = Plots.@layout [a{0.3h}; [b c]]
diff --git a/ext/plots/shortest_path_plots.jl b/ext/plots/shortest_path_plots.jl
index 85cb6b8..4787910 100644
--- a/ext/plots/shortest_path_plots.jl
+++ b/ext/plots/shortest_path_plots.jl
@@ -40,21 +40,55 @@ end
 function plot_instance(bench::FixedSizeShortestPathBenchmark, sample::DataSample; kwargs...)
     rows, cols = bench.grid_size
     # Show only the known graph structure (no edge costs)
-    interior_xs = [c for r in 1:rows for c in 1:cols if !(r == 1 && c == 1) && !(r == rows && c == cols)]
-    interior_ys = [r for r in 1:rows for c in 1:cols if !(r == 1 && c == 1) && !(r == rows && c == cols)]
+    interior_xs = [
+        c for r in 1:rows for
+        c in 1:cols if !(r == 1 && c == 1) && !(r == rows && c == cols)
+    ]
+    interior_ys = [
+        r for r in 1:rows for
+        c in 1:cols if !(r == 1 && c == 1) && !(r == rows && c == cols)
+    ]
     pl = Plots.plot(;
-        xlim=(0.5, cols + 0.5), ylim=(0.5, rows + 0.5),
-        yflip=true, aspect_ratio=:equal, legend=:topright,
+        xlim=(0.5, cols + 0.5),
+        ylim=(0.5, rows + 0.5),
+        yflip=true,
+        aspect_ratio=:equal,
+        legend=:topright,
         title="Grid graph ($(rows)×$(cols))",
-        framestyle=:box, grid=false, kwargs...,
+        framestyle=:box,
+        grid=false,
+        kwargs...,
+    )
+    Plots.scatter!(
+        pl,
+        interior_xs,
+        interior_ys;
+        color=:lightgray,
+        markersize=8,
+        markerstrokecolor=:gray,
+        markerstrokewidth=1,
+        label=false,
+    )
+    Plots.scatter!(
+        pl,
+        [1],
+        [1];
+        color=:seagreen,
+        markersize=10,
+        markershape=:square,
+        label="source",
+        markerstrokewidth=0,
+    )
+    Plots.scatter!(
+        pl,
+        [cols],
+        [rows];
+        color=:crimson,
+        markersize=10,
+        markershape=:square,
+        label="sink",
+        markerstrokewidth=0,
     )
-    Plots.scatter!(pl, interior_xs, interior_ys;
-        color=:lightgray, markersize=8, markerstrokecolor=:gray,
-        markerstrokewidth=1, label=false)
-    Plots.scatter!(pl, [1], [1];
-        color=:seagreen, markersize=10, markershape=:square, label="source", markerstrokewidth=0)
-    Plots.scatter!(pl, [cols], [rows];
-        color=:crimson, markersize=10, markershape=:square, label="sink", markerstrokewidth=0)
     return pl
 end
 
@@ -65,19 +99,31 @@ function plot_solution(bench::FixedSizeShortestPathBenchmark, sample::DataSample
     rows, cols = bench.grid_size
 
     p_x = Plots.bar(
-        1:p_feat, Float64.(x);
-        legend=false, xlabel="Feature", ylabel="Value",
-        title="x (features, observable)", color=:steelblue, xticks=1:p_feat,
+        1:p_feat,
+        Float64.(x);
+        legend=false,
+        xlabel="Feature",
+        ylabel="Value",
+        title="x (features, observable)",
+        color=:steelblue,
+        xticks=1:p_feat,
     )
     p1 = Plots.heatmap(
         weight_grid;
-        yflip=true, aspect_ratio=:equal,
-        title="Edge weights θ", colorbar=true, framestyle=:none,
+        yflip=true,
+        aspect_ratio=:equal,
+        title="Edge weights θ",
+        colorbar=true,
+        framestyle=:none,
     )
     p2 = Plots.heatmap(
         weight_grid;
-        yflip=true, aspect_ratio=:equal,
-        title="Shortest path y", colorbar=false, framestyle=:none, color=:Blues,
+        yflip=true,
+        aspect_ratio=:equal,
+        title="Shortest path y",
+        colorbar=false,
+        framestyle=:none,
+        color=:Blues,
     )
     path_xs = Int[]
     path_ys = Int[]
@@ -87,7 +133,9 @@ function plot_solution(bench::FixedSizeShortestPathBenchmark, sample::DataSample
             push!(path_ys, r)
         end
     end
-    Plots.scatter!(p2, path_xs, path_ys; color=:white, markersize=6, markerstrokewidth=0, label=false)
+    Plots.scatter!(
+        p2, path_xs, path_ys; color=:white, markersize=6, markerstrokewidth=0, label=false
+    )
 
     l = Plots.@layout [a{0.25h}; [b c]]
     return Plots.plot(p_x, p1, p2; layout=l, size=(700, 500), kwargs...)
diff --git a/ext/plots/subset_selection_plots.jl b/ext/plots/subset_selection_plots.jl
index cea84a5..66b3b66 100644
--- a/ext/plots/subset_selection_plots.jl
+++ b/ext/plots/subset_selection_plots.jl
@@ -23,19 +23,32 @@ function plot_solution(::SubsetSelectionBenchmark, sample::DataSample; kwargs...
     n = length(θ)
 
     p1 = Plots.bar(
-        1:n, Float64.(x);
-        legend=false, ylabel="Feature value", title="x (features, observable)",
-        color=:steelblue, xticks=(1:n, fill("", n)),
+        1:n,
+        Float64.(x);
+        legend=false,
+        ylabel="Feature value",
+        title="x (features, observable)",
+        color=:steelblue,
+        xticks=(1:n, fill("", n)),
     )
     p2 = Plots.bar(
-        1:n, Float64.(θ);
-        legend=false, ylabel="Value", title="θ (true values)",
-        color=:steelblue, xticks=(1:n, fill("", n)),
+        1:n,
+        Float64.(θ);
+        legend=false,
+        ylabel="Value",
+        title="θ (true values)",
+        color=:steelblue,
+        xticks=(1:n, fill("", n)),
     )
     p3 = Plots.heatmap(
         reshape(Float64.(y), 1, n);
-        xlabel="Item", ylabel="y", title="y (selected items)",
-        yticks=false, xticks=1:n, color=:Greens, colorbar=false,
+        xlabel="Item",
+        ylabel="y",
+        title="y (selected items)",
+        yticks=false,
+        xticks=1:n,
+        color=:Greens,
+        colorbar=false,
     )
 
     l = Plots.@layout [a{0.35h}; b{0.35h}; c{0.3h}]

From d29b5cdc6d8a306614d30948e209d99a2c2dd5ed Mon Sep 17 00:00:00 2001
From: BatyLeo <leo.baty67@gmail.com>
Date: Thu, 30 Apr 2026 15:46:23 +0200
Subject: [PATCH 07/21] update

---
 docs/src/benchmarks/toy/argmax.jl   | 4 ++--
 docs/src/benchmarks/toy/argmax2d.jl | 2 +-
 docs/src/index.md                   | 8 ++++----
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/docs/src/benchmarks/toy/argmax.jl b/docs/src/benchmarks/toy/argmax.jl
index ed83e2c..f7ad4fc 100644
--- a/docs/src/benchmarks/toy/argmax.jl
+++ b/docs/src/benchmarks/toy/argmax.jl
@@ -1,6 +1,6 @@
 # # Argmax
 # Select the single best item from a set of `n` items, given features correlated with hidden
-# item scores. This is the **simplest possible DFL setting**: equivalent to multiclass
+# item scores. This is a minimalist DFL setting: equivalent to multiclass
 # classification, but with an argmax layer instead of softmax. Useful as a minimal sandbox for
 # understanding DFL concepts.
 
@@ -72,7 +72,7 @@ mean(maximizer(model(s.x)) == s.y for s in dataset)
 # ```math
 # \xrightarrow[\text{Features}]{x \in \mathbb{R}^{p \times n}}
 # \fbox{Linear model $f_w$}
-# \xrightarrow[\text{Predicted scores}]{\hat{\theta} \in \mathbb{R}^n}
+# \xrightarrow[\text{Predicted scores}]{\theta \in \mathbb{R}^n}
 # \fbox{argmax}
 # \xrightarrow[\text{Selection}]{y \in \{0,1\}^n}
 # ```
diff --git a/docs/src/benchmarks/toy/argmax2d.jl b/docs/src/benchmarks/toy/argmax2d.jl
index 4f57c25..93ae1ca 100644
--- a/docs/src/benchmarks/toy/argmax2d.jl
+++ b/docs/src/benchmarks/toy/argmax2d.jl
@@ -71,7 +71,7 @@ plot_solution(
 # ```math
 # \xrightarrow[\text{Features}]{x}
 # \fbox{Linear model}
-# \xrightarrow{\hat{\theta} \in \mathbb{R}^2}
+# \xrightarrow{\theta \in \mathbb{R}^2}
 # \fbox{Polytope argmax}
 # \xrightarrow{y}
 # ```
diff --git a/docs/src/index.md b/docs/src/index.md
index 6704100..ad5bd73 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -32,12 +32,12 @@ Where:
 
 ## Package Overview
 
-**DecisionFocusedLearningBenchmarks.jl** provides a collection of benchmark problems for evaluating decision-focused learning algorithms. The package offers:
+**DecisionFocusedLearningBenchmarks.jl** provides a collection of benchmark problems for evaluating decision-focused learning algorithms. The package contains:
 
-- **Collection of benchmark problems** spanning diverse applications
+- A **collection of benchmark problems** spanning diverse applications
 - **Common tools** for creating datasets, statistical models, and optimization algorithms
-- **Generic interface** for building custom benchmarks
-- Compatibility with [InferOpt.jl](https://github.com/JuliaDecisionFocusedLearning/InferOpt.jl) and the whole [JuliaDecisionFocusedLearning](https://github.com/JuliaDecisionFocusedLearning) ecosystem
+- A **generic interface** for building custom benchmarks
+- **Compatibility** with [InferOpt.jl](https://github.com/JuliaDecisionFocusedLearning/InferOpt.jl) and the whole [JuliaDecisionFocusedLearning](https://github.com/JuliaDecisionFocusedLearning) ecosystem
 
 ## Benchmark Categories
 

From 240bb753fb3d9c691bf924f53142aad858a5d8b9 Mon Sep 17 00:00:00 2001
From: BatyLeo <leo.baty67@gmail.com>
Date: Thu, 30 Apr 2026 16:25:05 +0200
Subject: [PATCH 08/21] DataSample copy constructor

---
 docs/src/benchmarks/toy/argmax.jl |  4 ++--
 src/Utils/data_sample.jl          | 10 ++++++++++
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/docs/src/benchmarks/toy/argmax.jl b/docs/src/benchmarks/toy/argmax.jl
index f7ad4fc..e8b1453 100644
--- a/docs/src/benchmarks/toy/argmax.jl
+++ b/docs/src/benchmarks/toy/argmax.jl
@@ -8,7 +8,7 @@ using DecisionFocusedLearningBenchmarks
 using Plots
 using Statistics
 
-b = ArgmaxBenchmark(; instance_dim=10, nb_features=5, seed=0)
+b = ArgmaxBenchmark(; seed=0)
 
 # ## Observable input
 #
@@ -39,7 +39,7 @@ maximizer = generate_maximizer(b)         # one-hot argmax
 θ_pred = model(sample.x)
 y_pred = maximizer(θ_pred)
 #
-plot_solution(b, DataSample(; x=sample.x, θ=θ_pred, y=y_pred, sample.context...))
+plot_solution(b, DataSample(sample; θ=θ_pred, y=y_pred))
 
 # The goal of training is to find parameters that maximize accuracy.
 # Current accuracy on the dataset:
diff --git a/src/Utils/data_sample.jl b/src/Utils/data_sample.jl
index 1147761..e854d0e 100644
--- a/src/Utils/data_sample.jl
+++ b/src/Utils/data_sample.jl
@@ -82,6 +82,16 @@ function DataSample(; x=nothing, θ=nothing, y=nothing, extra=NamedTuple(), kwar
     return DataSample(x, θ, y, context, extra)
 end
 
+
+"""
+$TYPEDSIGNATURES
+
+Copy constructor for `DataSample` with optional overrides.
+"""
+function DataSample(sample::DataSample; x=sample.x, θ=sample.θ, y=sample.y, extra=sample.extra, context=sample.context, kwargs...)
+    return DataSample(x=x, θ=θ, y=y, extra=extra, context..., kwargs...)
+end
+
 """
 $TYPEDSIGNATURES
 

From 133959dd3661b0367f2c09942e13cd1f1eec03d2 Mon Sep 17 00:00:00 2001
From: BatyLeo <leo.baty67@gmail.com>
Date: Thu, 30 Apr 2026 16:44:17 +0200
Subject: [PATCH 09/21] Plotting coverage

---
 docs/src/benchmarks/toy/argmax2d.jl  |  8 ++------
 src/Utils/data_sample.jl             | 13 ++++++++++---
 test/argmax.jl                       | 11 +++++++++++
 test/contextual_stochastic_argmax.jl | 17 +++++++++++++++++
 test/dynamic_assortment.jl           | 20 ++++++++++++++++++++
 test/fixed_size_shortest_path.jl     | 11 +++++++++++
 test/maintenance.jl                  | 20 ++++++++++++++++++++
 test/portfolio_optimization.jl       | 11 +++++++++++
 test/ranking.jl                      | 11 +++++++++++
 test/subset_selection.jl             | 11 +++++++++++
 10 files changed, 124 insertions(+), 9 deletions(-)

diff --git a/docs/src/benchmarks/toy/argmax2d.jl b/docs/src/benchmarks/toy/argmax2d.jl
index 93ae1ca..eb2f3b1 100644
--- a/docs/src/benchmarks/toy/argmax2d.jl
+++ b/docs/src/benchmarks/toy/argmax2d.jl
@@ -37,12 +37,8 @@ maximizer = generate_maximizer(b)         # vertex maximizing θᵀv over polyto
 
 # A randomly initialized policy predicts an arbitrary cost direction:
 θ_pred = model(sample.x)
-plot_solution(
-    b,
-    DataSample(;
-        sample.context..., x=sample.x, θ=θ_pred, y=maximizer(θ_pred; sample.context...)
-    ),
-)
+y_pred = maximizer(θ_pred; sample.context...)
+plot_solution(b, DataSample(sample; θ=θ_pred, y=y_pred))
 
 # ---
 # ## Problem Description
diff --git a/src/Utils/data_sample.jl b/src/Utils/data_sample.jl
index e854d0e..1a7002d 100644
--- a/src/Utils/data_sample.jl
+++ b/src/Utils/data_sample.jl
@@ -82,14 +82,21 @@ function DataSample(; x=nothing, θ=nothing, y=nothing, extra=NamedTuple(), kwar
     return DataSample(x, θ, y, context, extra)
 end
 
-
 """
 $TYPEDSIGNATURES
 
 Copy constructor for `DataSample` with optional overrides.
 """
-function DataSample(sample::DataSample; x=sample.x, θ=sample.θ, y=sample.y, extra=sample.extra, context=sample.context, kwargs...)
-    return DataSample(x=x, θ=θ, y=y, extra=extra, context..., kwargs...)
+function DataSample(
+    sample::DataSample;
+    x=sample.x,
+    θ=sample.θ,
+    y=sample.y,
+    extra=sample.extra,
+    context=sample.context,
+    kwargs...,
+)
+    return DataSample(; x=x, θ=θ, y=y, extra=extra, context..., kwargs...)
 end
 
 """
diff --git a/test/argmax.jl b/test/argmax.jl
index d772c4d..0b05531 100644
--- a/test/argmax.jl
+++ b/test/argmax.jl
@@ -33,4 +33,15 @@
         y = maximizer(θ)
         @test length(y) == instance_dim
     end
+
+    @testset "Plots" begin
+        using Plots
+        @test has_visualization(b)
+        fig1 = plot_instance(b, dataset[1])
+        @test fig1 isa Plots.Plot
+        fig2 = plot_solution(b, dataset[1])
+        @test fig2 isa Plots.Plot
+        fig3 = plot_solution(b, dataset[1], dataset[2].y)
+        @test fig3 isa Plots.Plot
+    end
 end
diff --git a/test/contextual_stochastic_argmax.jl b/test/contextual_stochastic_argmax.jl
index 1e5e59a..38bcfa9 100644
--- a/test/contextual_stochastic_argmax.jl
+++ b/test/contextual_stochastic_argmax.jl
@@ -113,3 +113,20 @@ end
     s = DataSample(; x=randn(Float32, 8), y=maximizer(randn(Float32, 5)))
     @test_throws Exception objective_value(b, s, s.y)
 end
+
+@testset "ContextualStochasticArgmax - Plots" begin
+    using DecisionFocusedLearningBenchmarks
+    using Plots
+
+    b = ContextualStochasticArgmaxBenchmark(; n=5, d=3, seed=0)
+    policies = generate_baseline_policies(b)
+    dataset = generate_dataset(b, 2; nb_scenarios=2, target_policy=policies.saa)
+
+    @test has_visualization(b)
+    fig1 = plot_instance(b, dataset[1])
+    @test fig1 isa Plots.Plot
+    fig2 = plot_solution(b, dataset[1])
+    @test fig2 isa Plots.Plot
+    fig3 = plot_solution(b, dataset[1], dataset[2].y)
+    @test fig3 isa Plots.Plot
+end
diff --git a/test/dynamic_assortment.jl b/test/dynamic_assortment.jl
index 93c4f1e..7b4075a 100644
--- a/test/dynamic_assortment.jl
+++ b/test/dynamic_assortment.jl
@@ -356,3 +356,23 @@ end
     @test length(y) == DAP.item_count(b)
     @test sum(y) == DAP.assortment_size(b)
 end
+
+@testset "DynamicAssortment - Plots" begin
+    using DecisionFocusedLearningBenchmarks
+    using Plots
+
+    b = DynamicAssortmentBenchmark(; N=4, d=2, K=2, max_steps=5, exogenous=true)
+    envs = generate_environments(b, 2; seed=0)
+    policies = generate_baseline_policies(b)
+    _, traj = evaluate_policy!(policies[1], envs)
+
+    @test has_visualization(b)
+    fig1 = plot_instance(b, traj[1])
+    @test fig1 isa Plots.Plot
+    fig2 = plot_solution(b, traj[1])
+    @test fig2 isa Plots.Plot
+    fig3 = plot_solution(b, traj[1], traj[2].y)
+    @test fig3 isa Plots.Plot
+    fig4 = plot_trajectory(b, traj)
+    @test fig4 isa Plots.Plot
+end
diff --git a/test/fixed_size_shortest_path.jl b/test/fixed_size_shortest_path.jl
index eacdd64..1ca1d1c 100644
--- a/test/fixed_size_shortest_path.jl
+++ b/test/fixed_size_shortest_path.jl
@@ -32,4 +32,15 @@
         y = maximizer(θ)
         @test length(y) == length(y_true)
     end
+
+    @testset "Plots" begin
+        using Plots
+        @test has_visualization(b)
+        fig1 = plot_instance(b, dataset[1])
+        @test fig1 isa Plots.Plot
+        fig2 = plot_solution(b, dataset[1])
+        @test fig2 isa Plots.Plot
+        fig3 = plot_solution(b, dataset[1], dataset[2].y)
+        @test fig3 isa Plots.Plot
+    end
 end
diff --git a/test/maintenance.jl b/test/maintenance.jl
index a2a9983..3adab18 100644
--- a/test/maintenance.jl
+++ b/test/maintenance.jl
@@ -225,3 +225,23 @@ end
     θ = fill(-1.0, 10)
     @test maximizer(θ) == falses(10)
 end
+
+@testset "Maintenance - Plots" begin
+    using DecisionFocusedLearningBenchmarks
+    using Plots
+
+    b = MaintenanceBenchmark()
+    envs = generate_environments(b, 2; seed=0)
+    policies = generate_baseline_policies(b)
+    _, traj = evaluate_policy!(policies[1], envs)
+
+    @test has_visualization(b)
+    fig1 = plot_instance(b, traj[1])
+    @test fig1 isa Plots.Plot
+    fig2 = plot_solution(b, traj[1])
+    @test fig2 isa Plots.Plot
+    fig3 = plot_solution(b, traj[1], traj[2].y)
+    @test fig3 isa Plots.Plot
+    fig4 = plot_trajectory(b, traj)
+    @test fig4 isa Plots.Plot
+end
diff --git a/test/portfolio_optimization.jl b/test/portfolio_optimization.jl
index b436c81..a951054 100644
--- a/test/portfolio_optimization.jl
+++ b/test/portfolio_optimization.jl
@@ -29,4 +29,15 @@
 
     gap = compute_gap(b, dataset[1:5], model, maximizer)
     @test isfinite(gap)
+
+    @testset "Plots" begin
+        using Plots
+        @test has_visualization(b)
+        fig1 = plot_instance(b, dataset[1])
+        @test fig1 isa Plots.Plot
+        fig2 = plot_solution(b, dataset[1])
+        @test fig2 isa Plots.Plot
+        fig3 = plot_solution(b, dataset[1], dataset[2].y)
+        @test fig3 isa Plots.Plot
+    end
 end
diff --git a/test/ranking.jl b/test/ranking.jl
index b3c2a3b..59f0d46 100644
--- a/test/ranking.jl
+++ b/test/ranking.jl
@@ -34,4 +34,15 @@
     gap = compute_gap(b, dataset[1:5], model, maximizer)
     @test isfinite(gap)
     @test gap >= 0
+
+    @testset "Plots" begin
+        using Plots
+        @test has_visualization(b)
+        fig1 = plot_instance(b, dataset[1])
+        @test fig1 isa Plots.Plot
+        fig2 = plot_solution(b, dataset[1])
+        @test fig2 isa Plots.Plot
+        fig3 = plot_solution(b, dataset[1], dataset[2].y)
+        @test fig3 isa Plots.Plot
+    end
 end
diff --git a/test/subset_selection.jl b/test/subset_selection.jl
index 90d3150..4de6183 100644
--- a/test/subset_selection.jl
+++ b/test/subset_selection.jl
@@ -40,4 +40,15 @@
     gap = compute_gap(b, dataset[1:5], model, maximizer)
     @test isfinite(gap)
     @test gap >= 0
+
+    @testset "Plots" begin
+        using Plots
+        @test has_visualization(b_identity)
+        fig1 = plot_instance(b_identity, dataset[1])
+        @test fig1 isa Plots.Plot
+        fig2 = plot_solution(b_identity, dataset[1])
+        @test fig2 isa Plots.Plot
+        fig3 = plot_solution(b_identity, dataset[1], dataset[2].y)
+        @test fig3 isa Plots.Plot
+    end
 end

From 34df24c90f5771bea93953fcf40bc7fec9059af1 Mon Sep 17 00:00:00 2001
From: BatyLeo <leo.baty67@gmail.com>
Date: Thu, 30 Apr 2026 18:40:18 +0200
Subject: [PATCH 10/21] Breaking change: rename plot_solution to plot_sample

---
 docs/src/benchmarks/dynamic/dvsp.jl           |  2 +-
 .../benchmarks/dynamic/dynamic_assortment.jl  |  2 +-
 docs/src/benchmarks/dynamic/maintenance.jl    |  2 +-
 .../static/fixed_size_shortest_path.jl        |  6 +--
 .../static/portfolio_optimization.jl          |  8 ++--
 docs/src/benchmarks/static/ranking.jl         |  6 +--
 .../src/benchmarks/static/subset_selection.jl |  6 +--
 docs/src/benchmarks/static/warcraft.jl        |  6 +--
 docs/src/benchmarks/stochastic/vsp.jl         |  6 +--
 docs/src/benchmarks/toy/argmax.jl             |  4 +-
 docs/src/benchmarks/toy/argmax2d.jl           |  4 +-
 .../toy/contextual_stochastic_argmax.jl       | 13 +++---
 docs/src/custom_benchmarks.md                 |  2 +-
 docs/src/tutorials/warcraft_tutorial.jl       |  8 ++--
 docs/src/using_benchmarks.md                  |  8 ++--
 ext/DFLBenchmarksPlotsExt.jl                  | 10 ++---
 ext/plots/argmax2d_plots.jl                   |  4 +-
 ext/plots/argmax_plots.jl                     |  2 +-
 .../contextual_stochastic_argmax_plots.jl     | 42 +++++--------------
 ext/plots/dvs_plots.jl                        |  2 +-
 ext/plots/dynamic_assortment_plots.jl         |  4 +-
 ext/plots/maintenance_plots.jl                |  4 +-
 ext/plots/portfolio_plots.jl                  |  2 +-
 ext/plots/ranking_plots.jl                    |  2 +-
 ext/plots/shortest_path_plots.jl              |  2 +-
 ext/plots/subset_selection_plots.jl           |  2 +-
 ext/plots/svs_plots.jl                        |  4 +-
 ext/plots/warcraft_plots.jl                   |  2 +-
 src/DecisionFocusedLearningBenchmarks.jl      |  2 +-
 src/Utils/Utils.jl                            |  2 +-
 src/Utils/interface/abstract_benchmark.jl     |  6 +--
 src/Utils/interface/static_benchmark.jl       |  4 +-
 test/argmax.jl                                |  4 +-
 test/argmax_2d.jl                             |  4 +-
 test/contextual_stochastic_argmax.jl          |  6 ++-
 test/dynamic_assortment.jl                    |  4 +-
 test/dynamic_vsp_plots.jl                     |  4 +-
 test/fixed_size_shortest_path.jl              |  4 +-
 test/maintenance.jl                           |  4 +-
 test/portfolio_optimization.jl                |  4 +-
 test/ranking.jl                               |  4 +-
 test/subset_selection.jl                      |  4 +-
 test/vsp.jl                                   |  2 +-
 test/warcraft.jl                              |  4 +-
 44 files changed, 103 insertions(+), 124 deletions(-)

diff --git a/docs/src/benchmarks/dynamic/dvsp.jl b/docs/src/benchmarks/dynamic/dvsp.jl
index bf3aa13..bbc87fe 100644
--- a/docs/src/benchmarks/dynamic/dvsp.jl
+++ b/docs/src/benchmarks/dynamic/dvsp.jl
@@ -30,7 +30,7 @@ plot_instance(b, trajectory[1])
 # - `reward`: negative travel cost incurred at this step
 #
 # One step with dispatched routes:
-plot_solution(b, trajectory[1])
+plot_sample(b, trajectory[1])
 
 # Multiple steps side by side: customers accumulate and routes change over time:
 plot_trajectory(b, trajectory[1:min(3, length(trajectory))])
diff --git a/docs/src/benchmarks/dynamic/dynamic_assortment.jl b/docs/src/benchmarks/dynamic/dynamic_assortment.jl
index e74369d..d230c44 100644
--- a/docs/src/benchmarks/dynamic/dynamic_assortment.jl
+++ b/docs/src/benchmarks/dynamic/dynamic_assortment.jl
@@ -29,7 +29,7 @@ plot_instance(b, trajectory[1])
 # - `reward`: price of the purchased item (0 if no purchase)
 #
 # One step with the offered assortment highlighted (green = offered):
-plot_solution(b, trajectory[1])
+plot_sample(b, trajectory[1])
 
 # A few steps side by side (prices are fixed; assortment composition changes over time):
 plot_trajectory(b, trajectory[1:min(4, length(trajectory))])
diff --git a/docs/src/benchmarks/dynamic/maintenance.jl b/docs/src/benchmarks/dynamic/maintenance.jl
index dc07a5e..7b542f0 100644
--- a/docs/src/benchmarks/dynamic/maintenance.jl
+++ b/docs/src/benchmarks/dynamic/maintenance.jl
@@ -28,7 +28,7 @@ plot_instance(b, trajectory[1])
 # - `reward`: negative cost (maintenance and failure costs) at this step
 #
 # One step with maintenance decisions (green = maintained, red = failed):
-plot_solution(b, trajectory[1])
+plot_sample(b, trajectory[1])
 
 # A few steps side by side showing degradation evolving over time:
 plot_trajectory(b, trajectory[1:min(4, length(trajectory))])
diff --git a/docs/src/benchmarks/static/fixed_size_shortest_path.jl b/docs/src/benchmarks/static/fixed_size_shortest_path.jl
index c8d43ae..b7d170b 100644
--- a/docs/src/benchmarks/static/fixed_size_shortest_path.jl
+++ b/docs/src/benchmarks/static/fixed_size_shortest_path.jl
@@ -23,7 +23,7 @@ plot_instance(b, sample)
 # - `y`: path indicator vector (`y[e] = 1` if edge `e` is on the optimal path)
 #
 # Top: feature vector x. Bottom left: edge costs θ. Bottom right: optimal path y (white dots):
-plot_solution(b, sample)
+plot_sample(b, sample)
 
 # ## Untrained policy
 
@@ -34,7 +34,7 @@ maximizer = generate_maximizer(b)         # Dijkstra shortest path on the grid g
 
 # A randomly initialized policy predicts arbitrary costs, yielding a near-straight path:
 θ_pred = model(sample.x)
-plot_solution(b, DataSample(; sample.context..., x=sample.x, θ=θ_pred, y=maximizer(θ_pred)))
+plot_sample(b, DataSample(; sample.context..., x=sample.x, θ=θ_pred, y=maximizer(θ_pred)))
 
 # Optimality gap on the dataset (0 = optimal, higher is worse):
 compute_gap(b, dataset, model, maximizer)
@@ -70,7 +70,7 @@ compute_gap(b, dataset, model, maximizer)
 # ```math
 # \xrightarrow[\text{Features}]{x \in \mathbb{R}^p}
 # \fbox{Linear model}
-# \xrightarrow[\text{Predicted costs}]{\hat{\theta} \in \mathbb{R}^E}
+# \xrightarrow[\text{Predicted costs}]{\theta \in \mathbb{R}^E}
 # \fbox{Dijkstra / Bellman-Ford}
 # \xrightarrow[\text{Path}]{y \in \{0,1\}^E}
 # ```
diff --git a/docs/src/benchmarks/static/portfolio_optimization.jl b/docs/src/benchmarks/static/portfolio_optimization.jl
index 794b71d..d85bcf4 100644
--- a/docs/src/benchmarks/static/portfolio_optimization.jl
+++ b/docs/src/benchmarks/static/portfolio_optimization.jl
@@ -22,7 +22,7 @@ plot_instance(b, sample)
 # - `y`: optimal portfolio weights solving the Markowitz QP given `θ`
 #
 # Top: feature vector x. Bottom left: true returns θ. Bottom right: optimal weights y:
-plot_solution(b, sample)
+plot_sample(b, sample)
 
 # ## Untrained policy
 
@@ -33,7 +33,7 @@ maximizer = generate_maximizer(b)         # Markowitz QP solver (Ipopt via JuMP)
 
 # A randomly initialized policy predicts arbitrary returns, leading to a suboptimal allocation:
 θ_pred = model(sample.x)
-plot_solution(b, DataSample(; sample.context..., x=sample.x, θ=θ_pred, y=maximizer(θ_pred)))
+plot_sample(b, DataSample(; sample.context..., x=sample.x, θ=θ_pred, y=maximizer(θ_pred)))
 
 # Optimality gap on the dataset (0 = optimal, higher is worse):
 compute_gap(b, dataset, model, maximizer)
@@ -43,11 +43,11 @@ compute_gap(b, dataset, model, maximizer)
 #
 # A **Markowitz portfolio optimization** problem where asset expected returns are unknown.
 # Given contextual features ``x \in \mathbb{R}^p``, the learner predicts returns
-# ``\hat{\theta} \in \mathbb{R}^d`` and solves:
+# ``\theta \in \mathbb{R}^d`` and solves:
 #
 # ```math
 # \begin{aligned}
-# \max_{y} \quad & \hat{\theta}^\top y \\
+# \max_{y} \quad & \theta^\top y \\
 # \text{s.t.} \quad & y^\top \Sigma y \leq \gamma \\
 # & \mathbf{1}^\top y \leq 1 \\
 # & y \geq 0
diff --git a/docs/src/benchmarks/static/ranking.jl b/docs/src/benchmarks/static/ranking.jl
index a536a5d..c5c7092 100644
--- a/docs/src/benchmarks/static/ranking.jl
+++ b/docs/src/benchmarks/static/ranking.jl
@@ -24,7 +24,7 @@ plot_instance(b, sample)
 # - `y`: ordinal ranks derived from `θ` (`y[i] = 1` means item `i` has the lowest cost)
 #
 # The full training triple (features, true costs, and derived ranking):
-plot_solution(b, sample)
+plot_sample(b, sample)
 
 # ## Untrained policy
 
@@ -35,7 +35,7 @@ maximizer = generate_maximizer(b)         # ordinal ranking via sortperm
 
 # A randomly initialized policy produces an arbitrary ranking:
 θ_pred = model(sample.x)
-plot_solution(b, DataSample(; sample.context..., x=sample.x, θ=θ_pred, y=maximizer(θ_pred)))
+plot_sample(b, DataSample(; sample.context..., x=sample.x, θ=θ_pred, y=maximizer(θ_pred)))
 
 # Optimality gap on the dataset (0 = optimal, higher is worse):
 compute_gap(b, dataset, model, maximizer)
@@ -64,7 +64,7 @@ compute_gap(b, dataset, model, maximizer)
 # ```math
 # \xrightarrow[\text{Features}]{x}
 # \fbox{Linear model}
-# \xrightarrow{\hat{\theta}}
+# \xrightarrow{\theta}
 # \fbox{ranking}
 # \xrightarrow{y}
 # ```
diff --git a/docs/src/benchmarks/static/subset_selection.jl b/docs/src/benchmarks/static/subset_selection.jl
index 0ac7904..1bd7c2b 100644
--- a/docs/src/benchmarks/static/subset_selection.jl
+++ b/docs/src/benchmarks/static/subset_selection.jl
@@ -22,7 +22,7 @@ plot_instance(b, sample)
 # - `y`: selection indicator (`y[i] = 1` for the `k` highest-value items, 0 otherwise)
 #
 # The full training triple (features, hidden values, and selection):
-plot_solution(b, sample)
+plot_sample(b, sample)
 
 # ## Untrained policy
 
@@ -33,7 +33,7 @@ maximizer = generate_maximizer(b)         # top-k selection
 
 # A randomly initialized policy selects items with no relation to their true values:
 θ_pred = model(sample.x)
-plot_solution(b, DataSample(; sample.context..., x=sample.x, θ=θ_pred, y=maximizer(θ_pred)))
+plot_sample(b, DataSample(; sample.context..., x=sample.x, θ=θ_pred, y=maximizer(θ_pred)))
 
 # Optimality gap on the dataset (0 = optimal, higher is worse):
 compute_gap(b, dataset, model, maximizer)
@@ -65,7 +65,7 @@ compute_gap(b, dataset, model, maximizer)
 # ```math
 # \xrightarrow[\text{Features}]{x}
 # \fbox{Linear model}
-# \xrightarrow{\hat{\theta}}
+# \xrightarrow{\theta}
 # \fbox{top-k}
 # \xrightarrow{y}
 # ```
diff --git a/docs/src/benchmarks/static/warcraft.jl b/docs/src/benchmarks/static/warcraft.jl
index dc98354..0bbb5eb 100644
--- a/docs/src/benchmarks/static/warcraft.jl
+++ b/docs/src/benchmarks/static/warcraft.jl
@@ -21,7 +21,7 @@ plot_instance(b, sample)
 # - `y`: optimal path indicator (`y[i,j] = 1` if cell `(i,j)` is on the path)
 #
 # Left: terrain image. Middle: true costs θ. Right: optimal path y:
-plot_solution(b, sample)
+plot_sample(b, sample)
 
 # ## Untrained policy
 
@@ -32,7 +32,7 @@ maximizer = generate_maximizer(b)         # Dijkstra shortest path on the 12×12
 
 # An untrained CNN produces a near-uniform cost map, yielding a near-straight path:
 θ_pred = model(sample.x)
-plot_solution(b, DataSample(; sample.context..., x=sample.x, θ=θ_pred, y=maximizer(θ_pred)))
+plot_sample(b, DataSample(; sample.context..., x=sample.x, θ=θ_pred, y=maximizer(θ_pred)))
 
 # Optimality gap on this sample (0 = optimal, higher is worse):
 compute_gap(b, [sample], model, maximizer)
@@ -71,7 +71,7 @@ compute_gap(b, [sample], model, maximizer)
 # ```math
 # \xrightarrow[\text{Terrain image}]{x \in \mathbb{R}^{12 \times 12 \times 3}}
 # \fbox{ResNet18 CNN}
-# \xrightarrow[\text{Cell costs}]{\hat{\theta} \in \mathbb{R}^{12 \times 12}}
+# \xrightarrow[\text{Cell costs}]{\theta \in \mathbb{R}^{12 \times 12}}
 # \fbox{Dijkstra}
 # \xrightarrow[\text{Path}]{y \in \{0,1\}^{12 \times 12}}
 # ```
diff --git a/docs/src/benchmarks/stochastic/vsp.jl b/docs/src/benchmarks/stochastic/vsp.jl
index 22ae547..1fc4155 100644
--- a/docs/src/benchmarks/stochastic/vsp.jl
+++ b/docs/src/benchmarks/stochastic/vsp.jl
@@ -36,7 +36,7 @@ maximizer = generate_maximizer(b)         # deterministic VSP solver (HiGHS MIP)
 # The untrained model predicts random edge costs; the resulting schedule is arbitrary:
 θ_pred = model(sample.x)
 y_pred = maximizer(θ_pred; sample.context...)
-plot_solution(
+plot_sample(
     b, DataSample(; sample.context..., x=sample.x, θ=θ_pred, y=y_pred, extra=sample.extra)
 )
 
@@ -109,13 +109,13 @@ plot_solution(
 # ```math
 # \xrightarrow[\text{Features}]{x \in \mathbb{R}^{20}}
 # \fbox{Linear model $\varphi_w$}
-# \xrightarrow[\text{Predicted cost}]{\hat{c}}
+# \xrightarrow[\text{Predicted cost}]{c}
 # \fbox{Deterministic VSP solver}
 # \xrightarrow[\text{Routes}]{y}
 # ```
 #
 # By training end-to-end with the deterministic solver, the linear model learns adjusted
-# costs ``\hat{c}`` that implicitly account for expected stochastic delays, while keeping
+# costs ``c`` that implicitly account for expected stochastic delays, while keeping
 # the fast deterministic solver at inference time.
 #
 # **Model:** `Chain(Dense(20 -> 1; bias=false), vec)`: predicts one adjusted cost per edge.
diff --git a/docs/src/benchmarks/toy/argmax.jl b/docs/src/benchmarks/toy/argmax.jl
index e8b1453..ae00f8e 100644
--- a/docs/src/benchmarks/toy/argmax.jl
+++ b/docs/src/benchmarks/toy/argmax.jl
@@ -26,7 +26,7 @@ plot_instance(b, sample)
 # - `y`: optimal one-hot decision derived from `θ`
 #
 # The full training triple (features, true scores, and optimal decision):
-plot_solution(b, sample)
+plot_sample(b, sample)
 
 # ## Untrained policy
 
@@ -39,7 +39,7 @@ maximizer = generate_maximizer(b)         # one-hot argmax
 θ_pred = model(sample.x)
 y_pred = maximizer(θ_pred)
 #
-plot_solution(b, DataSample(sample; θ=θ_pred, y=y_pred))
+plot_sample(b, DataSample(sample; θ=θ_pred, y=y_pred))
 
 # The goal of training is to find parameters that maximize accuracy.
 # Current accuracy on the dataset:
diff --git a/docs/src/benchmarks/toy/argmax2d.jl b/docs/src/benchmarks/toy/argmax2d.jl
index eb2f3b1..7bbabd3 100644
--- a/docs/src/benchmarks/toy/argmax2d.jl
+++ b/docs/src/benchmarks/toy/argmax2d.jl
@@ -26,7 +26,7 @@ plot_instance(b, sample)
 # - `instance` (in `context`): polytope vertices (observable problem structure)
 #
 # The full training triple (polytope, cost direction θ, optimal vertex y):
-plot_solution(b, sample)
+plot_sample(b, sample)
 
 # ## Untrained policy
 
@@ -38,7 +38,7 @@ maximizer = generate_maximizer(b)         # vertex maximizing θᵀv over polyto
 # A randomly initialized policy predicts an arbitrary cost direction:
 θ_pred = model(sample.x)
 y_pred = maximizer(θ_pred; sample.context...)
-plot_solution(b, DataSample(sample; θ=θ_pred, y=y_pred))
+plot_sample(b, DataSample(sample; θ=θ_pred, y=y_pred))
 
 # ---
 # ## Problem Description
diff --git a/docs/src/benchmarks/toy/contextual_stochastic_argmax.jl b/docs/src/benchmarks/toy/contextual_stochastic_argmax.jl
index 8621887..32971be 100644
--- a/docs/src/benchmarks/toy/contextual_stochastic_argmax.jl
+++ b/docs/src/benchmarks/toy/contextual_stochastic_argmax.jl
@@ -39,8 +39,9 @@ plot_instance(b, sample)
 # - `y`: optimal item for the realized scenario ξ (one-hot; anticipative oracle label)
 # - `extra.scenario`: realized utility vector ξ (available only during training)
 #
-# Left: realized scenario ξ. Right: selected item (red):
-plot_solution(b, sample)
+# Top: feature vector x. Bottom: realized scenario ξ acting as the cost vector,
+# with the anticipative-optimal item in red:
+plot_sample(b, DataSample(sample; θ=sample.scenario))
 
 # ## Untrained policy
 
@@ -50,9 +51,9 @@ model = generate_statistical_model(b)     # linear map: features → predicted e
 maximizer = generate_maximizer(b)         # one-hot argmax
 
 # A randomly initialized policy selects items with no relation to their expected utilities.
-# Left: predicted utilities θ̂. Right: selected item (red):
+# Top: feature vector x. Bottom: predicted utilities θ̂ with the selected item in red:
 θ_pred = model(sample.x)
-plot_solution(b, DataSample(; sample.context..., x=sample.x, θ=θ_pred, y=maximizer(θ_pred)))
+plot_sample(b, DataSample(sample; θ=θ_pred, y=maximizer(θ_pred)))
 
 # ---
 # ## Problem Description
@@ -76,7 +77,7 @@ plot_solution(b, DataSample(; sample.context..., x=sample.x, θ=θ_pred, y=maxim
 # y^* = \mathrm{argmax}(\xi)
 # ```
 #
-# A linear model ``\hat{\theta} = [I \mid W] \cdot x`` can exactly recover the optimal
+# A linear model ``\theta = [I \mid W] \cdot x`` can exactly recover the optimal
 # solution in expectation.
 #
 # ## Key Parameters
@@ -96,7 +97,7 @@ plot_solution(b, DataSample(; sample.context..., x=sample.x, θ=θ_pred, y=maxim
 # ```math
 # \xrightarrow[\text{Features}]{x = [c_\text{base}; x_\text{raw}]}
 # \fbox{Linear model}
-# \xrightarrow{\hat{\theta} \in \mathbb{R}^n}
+# \xrightarrow{\theta \in \mathbb{R}^n}
 # \fbox{argmax}
 # \xrightarrow{y}
 # ```
diff --git a/docs/src/custom_benchmarks.md b/docs/src/custom_benchmarks.md
index 968d9e6..831895f 100644
--- a/docs/src/custom_benchmarks.md
+++ b/docs/src/custom_benchmarks.md
@@ -76,7 +76,7 @@ objective_value(bench::MyBenchmark, sample::DataSample, y) -> Real
 compute_gap(bench::MyBenchmark, dataset, model, maximizer) -> Float64
 has_visualization(bench::MyBenchmark) -> Bool                            # default: false; return true when plot methods are implemented/available
 plot_instance(bench::MyBenchmark, sample::DataSample; kwargs...)
-plot_solution(bench::MyBenchmark, sample::DataSample; kwargs...)
+plot_sample(bench::MyBenchmark, sample::DataSample; kwargs...)
 ```
 
 ---
diff --git a/docs/src/tutorials/warcraft_tutorial.jl b/docs/src/tutorials/warcraft_tutorial.jl
index b801d7a..4b1045f 100644
--- a/docs/src/tutorials/warcraft_tutorial.jl
+++ b/docs/src/tutorials/warcraft_tutorial.jl
@@ -33,8 +33,8 @@ y_true = sample.y
 # `context` is not used in this benchmark (no solver kwargs needed), so it is empty:
 isempty(sample.context)
 
-# For some benchmarks, we provide the following plotting method [`plot_solution`](@ref) to visualize the data:
-plot_solution(b, sample)
+# For some benchmarks, we provide the following plotting method [`plot_sample`](@ref) to visualize the data:
+plot_sample(b, sample)
 # We can see here the terrain image, the true terrain weights, and the true shortest path avoiding the high cost cells.
 
 # ## Building a pipeline
@@ -51,7 +51,7 @@ maximizer = generate_maximizer(b; dijkstra=true)
 # In the case o fthe Warcraft benchmark, the method has an additional keyword argument to chose the algorithm to use: Dijkstra's algorithm or Bellman-Ford algorithm.
 y = maximizer(θ)
 # As we can see, currently the pipeline predicts random noise as cell weights, and therefore the maximizer returns a straight line path.
-plot_solution(b, DataSample(; x, θ, y))
+plot_sample(b, DataSample(; x, θ, y))
 # We can evaluate the current pipeline performance using the optimality gap metric:
 starting_gap = compute_gap(b, test_dataset, model, maximizer)
 
@@ -85,7 +85,7 @@ final_gap = compute_gap(b, test_dataset, model, maximizer)
 #
 θ = model(x)
 y = maximizer(θ)
-plot_solution(b, DataSample(; x, θ, y))
+plot_sample(b, DataSample(; x, θ, y))
 
 using Test #src
 @test final_gap < starting_gap #src
diff --git a/docs/src/using_benchmarks.md b/docs/src/using_benchmarks.md
index d9ab6fc..bc394bd 100644
--- a/docs/src/using_benchmarks.md
+++ b/docs/src/using_benchmarks.md
@@ -191,8 +191,8 @@ sample = dataset[1]
 
 has_visualization(bench)           # true
 plot_instance(bench, sample)       # problem geometry only
-plot_solution(bench, sample)       # sample.y overlaid on the instance
-plot_solution(bench, sample, y)    # convenience 3-arg form: override y before plotting
+plot_sample(bench, sample)       # sample.y overlaid on the instance
+plot_sample(bench, sample, y)    # convenience 3-arg form: override y before plotting
 
 # Dynamic benchmarks only
 traj = generate_anticipative_solver(bench)(env)
@@ -203,7 +203,7 @@ gif(anim, "episode.gif")
 
 - `has_visualization(bench)`: returns `true` for benchmarks that implement plot support (if Plots is loaded).
 - `plot_instance(bench, sample; kwargs...)`: renders the problem geometry without any solution.
-- `plot_solution(bench, sample; kwargs...)`: renders `sample.y` overlaid on the instance.
-- `plot_solution(bench, sample, y; kwargs...)`: 3-arg convenience form that overrides `y` before plotting.
+- `plot_sample(bench, sample; kwargs...)`: renders `sample.y` overlaid on the instance.
+- `plot_sample(bench, sample, y; kwargs...)`: 3-arg convenience form that overrides `y` before plotting.
 - `plot_trajectory(bench, traj; kwargs...)`: dynamic benchmarks only; produces a grid of per-epoch subplots.
 - `animate_trajectory(bench, traj; kwargs...)`: dynamic benchmarks only, returns a `Plots.Animation` that can be saved with `gif(anim, "file.gif")`.
diff --git a/ext/DFLBenchmarksPlotsExt.jl b/ext/DFLBenchmarksPlotsExt.jl
index c7e47b9..a727d85 100644
--- a/ext/DFLBenchmarksPlotsExt.jl
+++ b/ext/DFLBenchmarksPlotsExt.jl
@@ -5,7 +5,7 @@ using DocStringExtensions: TYPEDSIGNATURES
 using LaTeXStrings: @L_str
 using Plots
 import DecisionFocusedLearningBenchmarks:
-    has_visualization, plot_instance, plot_solution, plot_trajectory, animate_trajectory
+    has_visualization, plot_instance, plot_sample, plot_trajectory, animate_trajectory
 
 include("plots/argmax_plots.jl")
 include("plots/argmax2d_plots.jl")
@@ -21,13 +21,13 @@ include("plots/dynamic_assortment_plots.jl")
 include("plots/maintenance_plots.jl")
 
 """
-    plot_solution(bench::AbstractBenchmark, sample::DataSample, y; kwargs...)
+    plot_sample(bench::AbstractBenchmark, sample::DataSample, y; kwargs...)
 
 Reconstruct a new sample with `y` overridden and delegate to the 2-arg
-[`plot_solution`](@ref). Only available when `Plots` is loaded.
+[`plot_sample`](@ref). Only available when `Plots` is loaded.
 """
-function plot_solution(bench::AbstractBenchmark, sample::DataSample, y; kwargs...)
-    return plot_solution(
+function plot_sample(bench::AbstractBenchmark, sample::DataSample, y; kwargs...)
+    return plot_sample(
         bench,
         DataSample(; sample.context..., x=sample.x, θ=sample.θ, y=y, extra=sample.extra);
         kwargs...,
diff --git a/ext/plots/argmax2d_plots.jl b/ext/plots/argmax2d_plots.jl
index b8cfb79..8454fb8 100644
--- a/ext/plots/argmax2d_plots.jl
+++ b/ext/plots/argmax2d_plots.jl
@@ -51,14 +51,14 @@ function plot_instance(::Argmax2DBenchmark, sample::DataSample; kwargs...)
     return pl
 end
 
-function plot_solution(::Argmax2DBenchmark, sample::DataSample; kwargs...)
+function plot_sample(::Argmax2DBenchmark, sample::DataSample; kwargs...)
     pl = _init_plot(; kwargs...)
     _plot_polytope!(pl, sample.instance)
     _plot_objective!(pl, sample.θ)
     return _plot_y!(pl, sample.y)
 end
 
-function plot_solution(::Argmax2DBenchmark, sample::DataSample, y; θ=sample.θ, kwargs...)
+function plot_sample(::Argmax2DBenchmark, sample::DataSample, y; θ=sample.θ, kwargs...)
     pl = _init_plot(; kwargs...)
     _plot_polytope!(pl, sample.instance)
     _plot_objective!(pl, θ)
diff --git a/ext/plots/argmax_plots.jl b/ext/plots/argmax_plots.jl
index e159398..94a104f 100644
--- a/ext/plots/argmax_plots.jl
+++ b/ext/plots/argmax_plots.jl
@@ -24,7 +24,7 @@ $TYPEDSIGNATURES
 Plot the features `x` as a heatmap, the scores `θ` as a bar chart, and the
 decision `y` as a one-hot heatmap. All three share the same item axis.
 """
-function plot_solution(::ArgmaxBenchmark, sample::DataSample; kwargs...)
+function plot_sample(::ArgmaxBenchmark, sample::DataSample; kwargs...)
     x = sample.x  # nb_features × n
     θ = sample.θ  # length n
     y = sample.y  # one-hot, length n
diff --git a/ext/plots/contextual_stochastic_argmax_plots.jl b/ext/plots/contextual_stochastic_argmax_plots.jl
index a83f4da..ac0b85c 100644
--- a/ext/plots/contextual_stochastic_argmax_plots.jl
+++ b/ext/plots/contextual_stochastic_argmax_plots.jl
@@ -27,14 +27,14 @@ function plot_instance(::ContextualStochasticArgmaxBenchmark, sample::DataSample
     return Plots.plot(p1, p2; layout=(1, 2), size=(800, 300), kwargs...)
 end
 
-function plot_solution(::ContextualStochasticArgmaxBenchmark, sample::DataSample; kwargs...)
-    x = sample.x    # full feature vector [c_base; x_raw]
-    y = sample.y    # one-hot vector
+function plot_sample(::ContextualStochasticArgmaxBenchmark, sample::DataSample; kwargs...)
+    x = sample.x
+    θ = sample.θ
+    y = sample.y
     n_x = length(x)
-    n = length(y)
+    n = length(θ)
     n_c = length(sample.c_base)
 
-    # Color x bars: steelblue for c_base components, darkorange for x_raw components
     x_colors = vcat(fill(:steelblue, n_c), fill(:darkorange, n_x - n_c))
     p_x = Plots.bar(
         1:n_x,
@@ -46,39 +46,17 @@ function plot_solution(::ContextualStochasticArgmaxBenchmark, sample::DataSample
         title="x (blue = c_base, orange = x_raw)",
     )
 
-    # Pick the best available utility vector to display
-    if hasproperty(sample.extra, :scenario)
-        u = sample.extra.scenario
-        u_title = "Realized scenario ξ"
-    elseif hasproperty(sample, :θ) && !isnothing(sample.θ)
-        u = sample.θ
-        u_title = "Predicted utilities θ̂"
-    else
-        u = sample.c_base
-        u_title = "Base utilities c_base"
-    end
-
-    p1 = Plots.bar(
-        1:n,
-        u;
-        legend=false,
-        xlabel="Item",
-        ylabel="Utility",
-        title=u_title,
-        color=:steelblue,
-    )
-
     colors = [y[i] > 0 ? :firebrick : :steelblue for i in 1:n]
-    p2 = Plots.bar(
+    p_θ = Plots.bar(
         1:n,
-        u;
+        θ;
         color=colors,
         legend=false,
         xlabel="Item",
         ylabel="Utility",
-        title="Selected item (red)",
+        title="θ (selected item in red)",
     )
 
-    l = Plots.@layout [a{0.35h}; [b c]]
-    return Plots.plot(p_x, p1, p2; layout=l, size=(800, 500), kwargs...)
+    l = Plots.@layout [a{0.4h}; b]
+    return Plots.plot(p_x, p_θ; layout=l, size=(700, 500), kwargs...)
 end
diff --git a/ext/plots/dvs_plots.jl b/ext/plots/dvs_plots.jl
index 0b61a5e..4bed8a8 100644
--- a/ext/plots/dvs_plots.jl
+++ b/ext/plots/dvs_plots.jl
@@ -200,7 +200,7 @@ function plot_instance(
     return plot_state(sample.instance; kwargs...)
 end
 
-function plot_solution(
+function plot_sample(
     bench::DynamicVehicleSchedulingBenchmark, sample::DataSample; kwargs...
 )
     return plot_routes(sample.instance, sample.y; reward=sample.reward, kwargs...)
diff --git a/ext/plots/dynamic_assortment_plots.jl b/ext/plots/dynamic_assortment_plots.jl
index 293cdd7..b165774 100644
--- a/ext/plots/dynamic_assortment_plots.jl
+++ b/ext/plots/dynamic_assortment_plots.jl
@@ -16,7 +16,7 @@ function plot_instance(::DynamicAssortmentBenchmark, sample::DataSample; kwargs.
     )
 end
 
-function plot_solution(::DynamicAssortmentBenchmark, sample::DataSample; kwargs...)
+function plot_sample(::DynamicAssortmentBenchmark, sample::DataSample; kwargs...)
     prices = sample.instance[1][1, :] .* 10
     y = sample.y  # BitVector, selected items
     N = length(prices)
@@ -43,7 +43,7 @@ function plot_trajectory(
     n = min(length(trajectory), max_steps)
     rows = ceil(Int, n / cols)
     steps = round.(Int, range(1, length(trajectory); length=n))
-    plots = [plot_solution(bench, trajectory[t]) for t in steps]
+    plots = [plot_sample(bench, trajectory[t]) for t in steps]
     return Plots.plot(
         plots...; layout=(rows, cols), size=(cols * 300, rows * 250), kwargs...
     )
diff --git a/ext/plots/maintenance_plots.jl b/ext/plots/maintenance_plots.jl
index 3a95e82..1ae8514 100644
--- a/ext/plots/maintenance_plots.jl
+++ b/ext/plots/maintenance_plots.jl
@@ -18,7 +18,7 @@ function plot_instance(bench::MaintenanceBenchmark, sample::DataSample; kwargs..
     )
 end
 
-function plot_solution(bench::MaintenanceBenchmark, sample::DataSample; kwargs...)
+function plot_sample(bench::MaintenanceBenchmark, sample::DataSample; kwargs...)
     state = sample.instance
     y = sample.y  # BitVector, maintained components
     N = length(state)
@@ -47,7 +47,7 @@ function plot_trajectory(
     n = min(length(trajectory), max_steps)
     rows = ceil(Int, n / cols)
     steps = round.(Int, range(1, length(trajectory); length=n))
-    plots = [plot_solution(bench, trajectory[t]) for t in steps]
+    plots = [plot_sample(bench, trajectory[t]) for t in steps]
     return Plots.plot(
         plots...; layout=(rows, cols), size=(cols * 300, rows * 250), kwargs...
     )
diff --git a/ext/plots/portfolio_plots.jl b/ext/plots/portfolio_plots.jl
index eb4b771..fe5d463 100644
--- a/ext/plots/portfolio_plots.jl
+++ b/ext/plots/portfolio_plots.jl
@@ -16,7 +16,7 @@ function plot_instance(::PortfolioOptimizationBenchmark, sample::DataSample; kwa
     )
 end
 
-function plot_solution(::PortfolioOptimizationBenchmark, sample::DataSample; kwargs...)
+function plot_sample(::PortfolioOptimizationBenchmark, sample::DataSample; kwargs...)
     x = sample.x
     θ = sample.θ
     y = sample.y
diff --git a/ext/plots/ranking_plots.jl b/ext/plots/ranking_plots.jl
index 1c4592c..bf5886c 100644
--- a/ext/plots/ranking_plots.jl
+++ b/ext/plots/ranking_plots.jl
@@ -13,7 +13,7 @@ function plot_instance(::RankingBenchmark, sample::DataSample; kwargs...)
     )
 end
 
-function plot_solution(::RankingBenchmark, sample::DataSample; kwargs...)
+function plot_sample(::RankingBenchmark, sample::DataSample; kwargs...)
     x = sample.x  # nb_features × n
     θ = sample.θ  # length n
     y = sample.y  # y[i] = rank of item i (1 = best)
diff --git a/ext/plots/shortest_path_plots.jl b/ext/plots/shortest_path_plots.jl
index 4787910..4d4d34e 100644
--- a/ext/plots/shortest_path_plots.jl
+++ b/ext/plots/shortest_path_plots.jl
@@ -92,7 +92,7 @@ function plot_instance(bench::FixedSizeShortestPathBenchmark, sample::DataSample
     return pl
 end
 
-function plot_solution(bench::FixedSizeShortestPathBenchmark, sample::DataSample; kwargs...)
+function plot_sample(bench::FixedSizeShortestPathBenchmark, sample::DataSample; kwargs...)
     x = sample.x
     p_feat = length(x)
     weight_grid, path_grid = _grid_matrices(bench, sample.θ, sample.y)
diff --git a/ext/plots/subset_selection_plots.jl b/ext/plots/subset_selection_plots.jl
index 66b3b66..b4529ea 100644
--- a/ext/plots/subset_selection_plots.jl
+++ b/ext/plots/subset_selection_plots.jl
@@ -16,7 +16,7 @@ function plot_instance(::SubsetSelectionBenchmark, sample::DataSample; kwargs...
     )
 end
 
-function plot_solution(::SubsetSelectionBenchmark, sample::DataSample; kwargs...)
+function plot_sample(::SubsetSelectionBenchmark, sample::DataSample; kwargs...)
     x = sample.x  # length n feature vector
     θ = sample.θ  # length n hidden values
     y = sample.y  # y[i] = true if item i is selected
diff --git a/ext/plots/svs_plots.jl b/ext/plots/svs_plots.jl
index 9a6161e..fd73b77 100644
--- a/ext/plots/svs_plots.jl
+++ b/ext/plots/svs_plots.jl
@@ -63,9 +63,7 @@ function plot_instance(
     return fig
 end
 
-function plot_solution(
-    ::StochasticVehicleSchedulingBenchmark, sample::DataSample; kwargs...
-)
+function plot_sample(::StochasticVehicleSchedulingBenchmark, sample::DataSample; kwargs...)
     @assert hasproperty(sample.instance, :city) "Sample does not contain city information."
     (; tasks, district_width, width) = sample.instance.city
     ticks = 0:district_width:width
diff --git a/ext/plots/warcraft_plots.jl b/ext/plots/warcraft_plots.jl
index 805d391..04d5d7e 100644
--- a/ext/plots/warcraft_plots.jl
+++ b/ext/plots/warcraft_plots.jl
@@ -11,7 +11,7 @@ function plot_instance(::WarcraftBenchmark, sample::DataSample; kwargs...)
     )
 end
 
-function plot_solution(
+function plot_sample(
     ::WarcraftBenchmark,
     sample::DataSample;
     θ_true=sample.θ,
diff --git a/src/DecisionFocusedLearningBenchmarks.jl b/src/DecisionFocusedLearningBenchmarks.jl
index 3ef3448..fe2ee12 100644
--- a/src/DecisionFocusedLearningBenchmarks.jl
+++ b/src/DecisionFocusedLearningBenchmarks.jl
@@ -82,7 +82,7 @@ export is_exogenous, is_endogenous
 export is_minimization_problem
 
 export objective_value
-export has_visualization, plot_instance, plot_solution, plot_trajectory, animate_trajectory
+export has_visualization, plot_instance, plot_sample, plot_trajectory, animate_trajectory
 export compute_gap
 
 # Export all benchmarks
diff --git a/src/Utils/Utils.jl b/src/Utils/Utils.jl
index e9eb16e..fd5232f 100644
--- a/src/Utils/Utils.jl
+++ b/src/Utils/Utils.jl
@@ -42,7 +42,7 @@ export generate_baseline_policies
 export generate_anticipative_solver, generate_parametric_anticipative_solver
 export is_minimization_problem
 
-export has_visualization, plot_instance, plot_solution, plot_trajectory, animate_trajectory
+export has_visualization, plot_instance, plot_sample, plot_trajectory, animate_trajectory
 export compute_gap
 export grid_graph, get_path, path_to_matrix
 export neg_tensor, squeeze_last_dims, average_tensor
diff --git a/src/Utils/interface/abstract_benchmark.jl b/src/Utils/interface/abstract_benchmark.jl
index bedf527..2e420ff 100644
--- a/src/Utils/interface/abstract_benchmark.jl
+++ b/src/Utils/interface/abstract_benchmark.jl
@@ -59,7 +59,7 @@ function generate_baseline_policies end
 """
     has_visualization(::AbstractBenchmark) -> Bool
 
-Return `true` if `plot_instance` and `plot_solution` are implemented for this benchmark
+Return `true` if `plot_instance` and `plot_sample` are implemented for this benchmark
 (requires `Plots` to be loaded). Default is `false`.
 """
 has_visualization(::AbstractBenchmark) = false
@@ -72,11 +72,11 @@ Plot the problem instance (no solution). Only available when `Plots` is loaded.
 function plot_instance end
 
 """
-    plot_solution(bench::AbstractBenchmark, sample::DataSample; kwargs...)
+    plot_sample(bench::AbstractBenchmark, sample::DataSample; kwargs...)
 
 Plot the instance with `sample.y` overlaid. Only available when `Plots` is loaded.
 """
-function plot_solution end
+function plot_sample end
 
 """
     objective_value(bench::AbstractBenchmark, sample::DataSample, y) -> Real
diff --git a/src/Utils/interface/static_benchmark.jl b/src/Utils/interface/static_benchmark.jl
index d809e05..8943fac 100644
--- a/src/Utils/interface/static_benchmark.jl
+++ b/src/Utils/interface/static_benchmark.jl
@@ -28,7 +28,7 @@ Also implement:
 - [`objective_value`](@ref)`(bench, sample, y)`: must be implemented by every static benchmark
 
 # Optional methods (no default, require `Plots` to be loaded)
-- [`plot_instance`](@ref), [`plot_solution`](@ref)
+- [`plot_instance`](@ref), [`plot_sample`](@ref)
 - [`generate_baseline_policies`](@ref)
 """
 abstract type AbstractStaticBenchmark <: AbstractBenchmark end
@@ -56,7 +56,7 @@ end
     generate_dataset(::AbstractStaticBenchmark, dataset_size::Int; target_policy=nothing, kwargs...) -> Vector{<:DataSample}
 
 Generate a `Vector` of [`DataSample`](@ref) of length `dataset_size` for given benchmark.
-Content of the dataset can be visualized using [`plot_solution`](@ref), when it applies.
+Content of the dataset can be visualized using [`plot_sample`](@ref), when it applies.
 
 By default, it uses [`generate_sample`](@ref) to create each sample in the dataset, and passes any
 keyword arguments to it. `target_policy` is applied if provided, it is called on each sample
diff --git a/test/argmax.jl b/test/argmax.jl
index 0b05531..59dfd46 100644
--- a/test/argmax.jl
+++ b/test/argmax.jl
@@ -39,9 +39,9 @@
         @test has_visualization(b)
         fig1 = plot_instance(b, dataset[1])
         @test fig1 isa Plots.Plot
-        fig2 = plot_solution(b, dataset[1])
+        fig2 = plot_sample(b, dataset[1])
         @test fig2 isa Plots.Plot
-        fig3 = plot_solution(b, dataset[1], dataset[2].y)
+        fig3 = plot_sample(b, dataset[1], dataset[2].y)
         @test fig3 isa Plots.Plot
     end
 end
diff --git a/test/argmax_2d.jl b/test/argmax_2d.jl
index e3bd6ff..71e5d6b 100644
--- a/test/argmax_2d.jl
+++ b/test/argmax_2d.jl
@@ -17,11 +17,11 @@
     @test gap >= 0
 
     @test has_visualization(b)
-    figure = plot_solution(b, dataset[1])
+    figure = plot_sample(b, dataset[1])
     @test figure isa Plots.Plot
     figure2 = plot_instance(b, dataset[1])
     @test figure2 isa Plots.Plot
-    figure3 = plot_solution(b, dataset[1], dataset[2].y)
+    figure3 = plot_sample(b, dataset[1], dataset[2].y)
     @test figure3 isa Plots.Plot
 
     for (i, sample) in enumerate(dataset)
diff --git a/test/contextual_stochastic_argmax.jl b/test/contextual_stochastic_argmax.jl
index 38bcfa9..d2004da 100644
--- a/test/contextual_stochastic_argmax.jl
+++ b/test/contextual_stochastic_argmax.jl
@@ -121,12 +121,14 @@ end
     b = ContextualStochasticArgmaxBenchmark(; n=5, d=3, seed=0)
     policies = generate_baseline_policies(b)
     dataset = generate_dataset(b, 2; nb_scenarios=2, target_policy=policies.saa)
+    model = generate_statistical_model(b; seed=0)
+    sample = DataSample(dataset[1]; θ=model(dataset[1].x))
 
     @test has_visualization(b)
     fig1 = plot_instance(b, dataset[1])
     @test fig1 isa Plots.Plot
-    fig2 = plot_solution(b, dataset[1])
+    fig2 = plot_sample(b, sample)
     @test fig2 isa Plots.Plot
-    fig3 = plot_solution(b, dataset[1], dataset[2].y)
+    fig3 = plot_sample(b, sample, dataset[2].y)
     @test fig3 isa Plots.Plot
 end
diff --git a/test/dynamic_assortment.jl b/test/dynamic_assortment.jl
index 7b4075a..3c20e05 100644
--- a/test/dynamic_assortment.jl
+++ b/test/dynamic_assortment.jl
@@ -369,9 +369,9 @@ end
     @test has_visualization(b)
     fig1 = plot_instance(b, traj[1])
     @test fig1 isa Plots.Plot
-    fig2 = plot_solution(b, traj[1])
+    fig2 = plot_sample(b, traj[1])
     @test fig2 isa Plots.Plot
-    fig3 = plot_solution(b, traj[1], traj[2].y)
+    fig3 = plot_sample(b, traj[1], traj[2].y)
     @test fig3 isa Plots.Plot
     fig4 = plot_trajectory(b, traj)
     @test fig4 isa Plots.Plot
diff --git a/test/dynamic_vsp_plots.jl b/test/dynamic_vsp_plots.jl
index 32cbc4a..bd70546 100644
--- a/test/dynamic_vsp_plots.jl
+++ b/test/dynamic_vsp_plots.jl
@@ -17,11 +17,11 @@
     fig2 = plot_trajectory(b, y)
     @test fig2 isa Plots.Plot
 
-    # Test plot_solution via baseline policy
+    # Test plot_sample via baseline policy
     policies = generate_baseline_policies(b)
     lazy = policies[1]
     _, d = evaluate_policy!(lazy, env)
-    fig3 = plot_solution(b, d[1])
+    fig3 = plot_sample(b, d[1])
     @test fig3 isa Plots.Plot
 
     # Test animate_trajectory — returns Animation, save separately with gif()
diff --git a/test/fixed_size_shortest_path.jl b/test/fixed_size_shortest_path.jl
index 1ca1d1c..8d9c935 100644
--- a/test/fixed_size_shortest_path.jl
+++ b/test/fixed_size_shortest_path.jl
@@ -38,9 +38,9 @@
         @test has_visualization(b)
         fig1 = plot_instance(b, dataset[1])
         @test fig1 isa Plots.Plot
-        fig2 = plot_solution(b, dataset[1])
+        fig2 = plot_sample(b, dataset[1])
         @test fig2 isa Plots.Plot
-        fig3 = plot_solution(b, dataset[1], dataset[2].y)
+        fig3 = plot_sample(b, dataset[1], dataset[2].y)
         @test fig3 isa Plots.Plot
     end
 end
diff --git a/test/maintenance.jl b/test/maintenance.jl
index 3adab18..990d210 100644
--- a/test/maintenance.jl
+++ b/test/maintenance.jl
@@ -238,9 +238,9 @@ end
     @test has_visualization(b)
     fig1 = plot_instance(b, traj[1])
     @test fig1 isa Plots.Plot
-    fig2 = plot_solution(b, traj[1])
+    fig2 = plot_sample(b, traj[1])
     @test fig2 isa Plots.Plot
-    fig3 = plot_solution(b, traj[1], traj[2].y)
+    fig3 = plot_sample(b, traj[1], traj[2].y)
     @test fig3 isa Plots.Plot
     fig4 = plot_trajectory(b, traj)
     @test fig4 isa Plots.Plot
diff --git a/test/portfolio_optimization.jl b/test/portfolio_optimization.jl
index a951054..88c9b6d 100644
--- a/test/portfolio_optimization.jl
+++ b/test/portfolio_optimization.jl
@@ -35,9 +35,9 @@
         @test has_visualization(b)
         fig1 = plot_instance(b, dataset[1])
         @test fig1 isa Plots.Plot
-        fig2 = plot_solution(b, dataset[1])
+        fig2 = plot_sample(b, dataset[1])
         @test fig2 isa Plots.Plot
-        fig3 = plot_solution(b, dataset[1], dataset[2].y)
+        fig3 = plot_sample(b, dataset[1], dataset[2].y)
         @test fig3 isa Plots.Plot
     end
 end
diff --git a/test/ranking.jl b/test/ranking.jl
index 59f0d46..d0c7ed4 100644
--- a/test/ranking.jl
+++ b/test/ranking.jl
@@ -40,9 +40,9 @@
         @test has_visualization(b)
         fig1 = plot_instance(b, dataset[1])
         @test fig1 isa Plots.Plot
-        fig2 = plot_solution(b, dataset[1])
+        fig2 = plot_sample(b, dataset[1])
         @test fig2 isa Plots.Plot
-        fig3 = plot_solution(b, dataset[1], dataset[2].y)
+        fig3 = plot_sample(b, dataset[1], dataset[2].y)
         @test fig3 isa Plots.Plot
     end
 end
diff --git a/test/subset_selection.jl b/test/subset_selection.jl
index 4de6183..76fe95f 100644
--- a/test/subset_selection.jl
+++ b/test/subset_selection.jl
@@ -46,9 +46,9 @@
         @test has_visualization(b_identity)
         fig1 = plot_instance(b_identity, dataset[1])
         @test fig1 isa Plots.Plot
-        fig2 = plot_solution(b_identity, dataset[1])
+        fig2 = plot_sample(b_identity, dataset[1])
         @test fig2 isa Plots.Plot
-        fig3 = plot_solution(b_identity, dataset[1], dataset[2].y)
+        fig3 = plot_sample(b_identity, dataset[1], dataset[2].y)
         @test fig3 isa Plots.Plot
     end
 end
diff --git a/test/vsp.jl b/test/vsp.jl
index 5a18429..56931fd 100644
--- a/test/vsp.jl
+++ b/test/vsp.jl
@@ -51,7 +51,7 @@
     # Plots work unchanged
     figure_1 = plot_instance(b, saa_dataset[1])
     @test figure_1 isa Plots.Plot
-    figure_2 = plot_solution(b, saa_dataset[1])
+    figure_2 = plot_sample(b, saa_dataset[1])
     @test figure_2 isa Plots.Plot
 
     maximizer = generate_maximizer(b)
diff --git a/test/warcraft.jl b/test/warcraft.jl
index 94a4678..e23a6e5 100644
--- a/test/warcraft.jl
+++ b/test/warcraft.jl
@@ -14,11 +14,11 @@
     dijkstra_maximizer = generate_maximizer(b; dijkstra=true)
 
     @test has_visualization(b)
-    figure = plot_solution(b, dataset[1])
+    figure = plot_sample(b, dataset[1])
     @test figure isa Plots.Plot
     figure2 = plot_instance(b, dataset[1])
     @test figure2 isa Plots.Plot
-    figure3 = plot_solution(b, dataset[1], dataset[2].y)
+    figure3 = plot_sample(b, dataset[1], dataset[2].y)
     @test figure3 isa Plots.Plot
     gap = compute_gap(b, dataset, model, dijkstra_maximizer)
     @test gap >= 0

From e701b9c162ab723b348860a7361ab7b2a1813e9a Mon Sep 17 00:00:00 2001
From: BatyLeo <leo.baty67@gmail.com>
Date: Tue, 5 May 2026 09:21:54 +0200
Subject: [PATCH 11/21] fix: typos in tutorial

---
 docs/src/tutorials/warcraft_tutorial.jl | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/docs/src/tutorials/warcraft_tutorial.jl b/docs/src/tutorials/warcraft_tutorial.jl
index 4b1045f..b3a2368 100644
--- a/docs/src/tutorials/warcraft_tutorial.jl
+++ b/docs/src/tutorials/warcraft_tutorial.jl
@@ -22,7 +22,7 @@ dataset = generate_dataset(b, 50);
 # Subdatasets can be created through regular slicing:
 train_dataset, test_dataset = dataset[1:45], dataset[46:50]
 
-# And getting an individual sample will return a [`DataSample`](@ref) with four fields: `x`, `info`, `θ`, and `y`:
+# And getting an individual sample will return a [`DataSample`](@ref) with five fields: `x`, `θ`, `y`, `context`, and `extra`:
 sample = test_dataset[1]
 # `x` correspond to the input features, i.e. the input image (3D array) in the Warcraft benchmark case:
 x = sample.x
@@ -32,6 +32,8 @@ x = sample.x
 y_true = sample.y
 # `context` is not used in this benchmark (no solver kwargs needed), so it is empty:
 isempty(sample.context)
+# `extra` is also not used in this benchmark, so it is empty as well:
+isempty(sample.extra)
 
 # For some benchmarks, we provide the following plotting method [`plot_sample`](@ref) to visualize the data:
 plot_sample(b, sample)
@@ -48,7 +50,7 @@ model = generate_statistical_model(b)
 
 # Finally, the [`generate_maximizer`](@ref) method can be used to generate a combinatorial optimization algorithm that takes the predicted cell weights as input and returns the corresponding shortest path:
 maximizer = generate_maximizer(b; dijkstra=true)
-# In the case o fthe Warcraft benchmark, the method has an additional keyword argument to chose the algorithm to use: Dijkstra's algorithm or Bellman-Ford algorithm.
+# In the case of the Warcraft benchmark, the method has an additional keyword argument to chose the algorithm to use: Dijkstra's algorithm or Bellman-Ford algorithm.
 y = maximizer(θ)
 # As we can see, currently the pipeline predicts random noise as cell weights, and therefore the maximizer returns a straight line path.
 plot_sample(b, DataSample(; x, θ, y))

From bb36515e031e32293e4e40955f8608ab435da1ed Mon Sep 17 00:00:00 2001
From: BatyLeo <leo.baty67@gmail.com>
Date: Tue, 5 May 2026 09:27:30 +0200
Subject: [PATCH 12/21] docs: add installation instructions in README and
 index.md

---
 README.md         | 7 +++++++
 docs/src/index.md | 7 +++++++
 2 files changed, 14 insertions(+)

diff --git a/README.md b/README.md
index 42fc0da..aff309e 100644
--- a/README.md
+++ b/README.md
@@ -23,6 +23,13 @@ maximizer = generate_maximizer(bench)
 
 For the full list of benchmarks, the common interface, and detailed usage examples, refer to the [documentation](https://JuliaDecisionFocusedLearning.github.io/DecisionFocusedLearningBenchmarks.jl/stable/).
 
+## Installation
+
+```julia
+using Pkg
+Pkg.add("DecisionFocusedLearningBenchmarks")
+```
+
 ## Related Packages
 
 This package is part of the [JuliaDecisionFocusedLearning](https://github.com/JuliaDecisionFocusedLearning) organization, and built to be compatible with other packages in the ecosystem:
diff --git a/docs/src/index.md b/docs/src/index.md
index ad5bd73..4ffb150 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -66,6 +66,13 @@ Multi-stage sequential decision-making problems:
 
 ## Getting Started
 
+First, make sure to install the package from the Julia registry:
+
+```julia
+using Pkg
+Pkg.add("DecisionFocusedLearningBenchmarks")
+```
+
 In a few lines of code, you can create benchmark instances, generate datasets, initialize learning components, and evaluate performance, using the same syntax across all benchmarks:
 
 ```julia

From 668d4309e4d63e98ce1ca84cf36350a4c0fdee76 Mon Sep 17 00:00:00 2001
From: BatyLeo <leo.baty67@gmail.com>
Date: Tue, 5 May 2026 09:35:48 +0200
Subject: [PATCH 13/21] docs: remove toy category, and reorder benchmarks
 bynumbering file names

---
 ...assortment.jl => 01_dynamic_assortment.jl} |   0
 .../dynamic/01_dynamic_assortment.md          | 155 +++++++++++++++++
 .../{maintenance.jl => 02_maintenance.jl}     |   0
 docs/src/benchmarks/dynamic/02_maintenance.md | 144 ++++++++++++++++
 .../dynamic/{dvsp.jl => 03_dvsp.jl}           |   0
 docs/src/benchmarks/dynamic/03_dvsp.md        | 161 ++++++++++++++++++
 .../{toy/argmax.jl => static/01_argmax.jl}    |   0
 docs/src/benchmarks/static/01_argmax.md       | 115 +++++++++++++
 .../argmax2d.jl => static/02_argmax2d.jl}     |   0
 docs/src/benchmarks/static/02_argmax2d.md     | 104 +++++++++++
 .../static/{ranking.jl => 03_ranking.jl}      |   0
 docs/src/benchmarks/static/03_ranking.md      | 104 +++++++++++
 ...zation.jl => 04_portfolio_optimization.jl} |   0
 .../static/04_portfolio_optimization.md       | 118 +++++++++++++
 ...et_selection.jl => 05_subset_selection.jl} |   0
 .../benchmarks/static/05_subset_selection.md  | 109 ++++++++++++
 ...path.jl => 06_fixed_size_shortest_path.jl} |   0
 .../static/06_fixed_size_shortest_path.md     | 115 +++++++++++++
 .../static/{warcraft.jl => 07_warcraft.jl}    |   0
 docs/src/benchmarks/static/07_warcraft.md     | 119 +++++++++++++
 .../01_contextual_stochastic_argmax.jl}       |   0
 .../01_contextual_stochastic_argmax.md        | 138 +++++++++++++++
 .../stochastic/{vsp.jl => 02_vsp.jl}          |   0
 docs/src/benchmarks/stochastic/02_vsp.md      | 151 ++++++++++++++++
 24 files changed, 1533 insertions(+)
 rename docs/src/benchmarks/dynamic/{dynamic_assortment.jl => 01_dynamic_assortment.jl} (100%)
 create mode 100644 docs/src/benchmarks/dynamic/01_dynamic_assortment.md
 rename docs/src/benchmarks/dynamic/{maintenance.jl => 02_maintenance.jl} (100%)
 create mode 100644 docs/src/benchmarks/dynamic/02_maintenance.md
 rename docs/src/benchmarks/dynamic/{dvsp.jl => 03_dvsp.jl} (100%)
 create mode 100644 docs/src/benchmarks/dynamic/03_dvsp.md
 rename docs/src/benchmarks/{toy/argmax.jl => static/01_argmax.jl} (100%)
 create mode 100644 docs/src/benchmarks/static/01_argmax.md
 rename docs/src/benchmarks/{toy/argmax2d.jl => static/02_argmax2d.jl} (100%)
 create mode 100644 docs/src/benchmarks/static/02_argmax2d.md
 rename docs/src/benchmarks/static/{ranking.jl => 03_ranking.jl} (100%)
 create mode 100644 docs/src/benchmarks/static/03_ranking.md
 rename docs/src/benchmarks/static/{portfolio_optimization.jl => 04_portfolio_optimization.jl} (100%)
 create mode 100644 docs/src/benchmarks/static/04_portfolio_optimization.md
 rename docs/src/benchmarks/static/{subset_selection.jl => 05_subset_selection.jl} (100%)
 create mode 100644 docs/src/benchmarks/static/05_subset_selection.md
 rename docs/src/benchmarks/static/{fixed_size_shortest_path.jl => 06_fixed_size_shortest_path.jl} (100%)
 create mode 100644 docs/src/benchmarks/static/06_fixed_size_shortest_path.md
 rename docs/src/benchmarks/static/{warcraft.jl => 07_warcraft.jl} (100%)
 create mode 100644 docs/src/benchmarks/static/07_warcraft.md
 rename docs/src/benchmarks/{toy/contextual_stochastic_argmax.jl => stochastic/01_contextual_stochastic_argmax.jl} (100%)
 create mode 100644 docs/src/benchmarks/stochastic/01_contextual_stochastic_argmax.md
 rename docs/src/benchmarks/stochastic/{vsp.jl => 02_vsp.jl} (100%)
 create mode 100644 docs/src/benchmarks/stochastic/02_vsp.md

diff --git a/docs/src/benchmarks/dynamic/dynamic_assortment.jl b/docs/src/benchmarks/dynamic/01_dynamic_assortment.jl
similarity index 100%
rename from docs/src/benchmarks/dynamic/dynamic_assortment.jl
rename to docs/src/benchmarks/dynamic/01_dynamic_assortment.jl
diff --git a/docs/src/benchmarks/dynamic/01_dynamic_assortment.md b/docs/src/benchmarks/dynamic/01_dynamic_assortment.md
new file mode 100644
index 0000000..27820a2
--- /dev/null
+++ b/docs/src/benchmarks/dynamic/01_dynamic_assortment.md
@@ -0,0 +1,155 @@
+```@meta
+EditURL = "01_dynamic_assortment.jl"
+```
+
+# Dynamic Assortment
+Select which K items to offer at each step to maximize revenue: customer preferences
+evolve dynamically based on purchase history (hype and saturation effects).
+
+````@example 01_dynamic_assortment
+using DecisionFocusedLearningBenchmarks
+using Plots
+
+b = DynamicAssortmentBenchmark()
+````
+
+## Observable input
+
+Generate one environment and roll it out with the greedy policy to collect a sample
+trajectory. At each step the agent observes item prices, hype levels, saturation, and
+purchase history:
+
+````@example 01_dynamic_assortment
+policies = generate_baseline_policies(b)
+env = generate_environments(b, 1)[1]
+_, trajectory = evaluate_policy!(policies.greedy, env)
+````
+
+The observable state at step 1: item prices (fixed across steps):
+
+````@example 01_dynamic_assortment
+plot_instance(b, trajectory[1])
+````
+
+## A training sample
+
+Each step in a trajectory is a labeled tuple `(x, θ, y)` plus state and reward:
+- `x`: `(d+8) × N` feature matrix per step (prices, hype, saturation, history, time)
+- `θ`: predicted utility score per item
+- `y`: offered assortment at this step (BitVector of length N, true = offered)
+- `instance`: full state tuple (features matrix, purchase history)
+- `reward`: price of the purchased item (0 if no purchase)
+
+One step with the offered assortment highlighted (green = offered):
+
+````@example 01_dynamic_assortment
+plot_sample(b, trajectory[1])
+````
+
+A few steps side by side (prices are fixed; assortment composition changes over time):
+
+````@example 01_dynamic_assortment
+plot_trajectory(b, trajectory[1:min(4, length(trajectory))])
+````
+
+## DFL pipeline components
+
+The DFL agent chains two components: a neural network predicting utility scores per item:
+
+````@example 01_dynamic_assortment
+model = generate_statistical_model(b)     # MLP: state features → predicted utility per item
+````
+
+and a maximizer offering the K items with the highest predicted utilities:
+
+````@example 01_dynamic_assortment
+maximizer = generate_maximizer(b)         # top-K selection by predicted utility
+````
+
+At each step, the model maps the current state (prices, hype, saturation, history) to a
+utility score per item. The maximizer selects the K items with the highest scores.
+
+---
+## Problem Description
+
+### Overview
+
+In the **Dynamic Assortment problem**, a retailer has ``N`` items and must select
+``K`` to offer at each time step. Customer preferences evolve based on purchase history
+through **hype** (recent purchases increase demand) and **saturation** (repeated
+purchases slightly decrease demand).
+
+### Mathematical Formulation
+
+**State** ``s_t = (p, f, h_t, \sigma_t, t, \mathcal{H}_t)`` where:
+- ``p``: fixed item prices
+- ``f``: static item features
+- ``h_t, \sigma_t``: current hype and saturation levels
+- ``t``: current time step
+- ``\mathcal{H}_t``: purchase history (last 5 purchases)
+
+**Action:** ``a_t \subseteq \{1,\ldots,N\}`` with ``|a_t| = K``
+
+**Customer choice** (multinomial logit):
+```math
+\mathbb{P}(i \mid a_t, s_t) = \frac{\exp(\theta_i(s_t))}{\sum_{j \in a_t} \exp(\theta_j(s_t)) + 1}
+```
+
+**Transition dynamics:**
+- Hype: ``h_{t+1}^{(i)} = h_t^{(i)} \times m^{(i)}`` where the multiplier reflects recent purchases
+- Saturation: increases by ×1.01 for the purchased item
+
+**Reward:** ``r(s_t, a_t) = p_{i^\star}`` (price of the purchased item, 0 if no purchase)
+
+**Objective:**
+```math
+\max_\pi \; \mathbb{E}\!\left[\sum_{t=1}^T r(s_t, \pi(s_t))\right]
+```
+
+## Key Components
+
+### [`DynamicAssortmentBenchmark`](@ref)
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `N` | Number of items in catalog | 20 |
+| `d` | Static feature dimension per item | 2 |
+| `K` | Assortment size | 4 |
+| `max_steps` | Steps per episode | 80 |
+| `exogenous` | Whether dynamics are exogenous | `false` |
+
+### State Observation
+
+Agents observe a ``(d+8) \times N`` normalized feature matrix per step containing:
+current prices, hype, saturation, static features, change in hype/saturation from
+previous step and from initial state, and normalized time step.
+
+## Baseline Policies
+
+| Policy | Description |
+|--------|-------------|
+| Expert | Brute-force enumeration of all ``\binom{N}{K}`` subsets; optimal but slow |
+| Greedy | Selects the ``K`` items with highest prices |
+
+## DFL Policy
+
+```math
+\xrightarrow[\text{State}]{s_t}
+\fbox{Neural network $\varphi_w$}
+\xrightarrow[\text{Utilities}]{\theta \in \mathbb{R}^N}
+\fbox{Top-K}
+\xrightarrow[\text{Assortment}]{a_t}
+```
+
+**Model:** `Chain(Dense(d+8 → 5), Dense(5 → 1), vec)`: predicts one utility score
+per item from the current state features.
+
+**Maximizer:** `TopKMaximizer(K)`: selects the top ``K`` items by predicted utility.
+
+!!! note "Reference"
+    [Structured Reinforcement Learning for Combinatorial Decision-Making](https://arxiv.org/abs/2505.19053)
+
+---
+
+*This page was generated using [Literate.jl](https://github.com/fredrikekre/Literate.jl).*
+
diff --git a/docs/src/benchmarks/dynamic/maintenance.jl b/docs/src/benchmarks/dynamic/02_maintenance.jl
similarity index 100%
rename from docs/src/benchmarks/dynamic/maintenance.jl
rename to docs/src/benchmarks/dynamic/02_maintenance.jl
diff --git a/docs/src/benchmarks/dynamic/02_maintenance.md b/docs/src/benchmarks/dynamic/02_maintenance.md
new file mode 100644
index 0000000..89a7902
--- /dev/null
+++ b/docs/src/benchmarks/dynamic/02_maintenance.md
@@ -0,0 +1,144 @@
+```@meta
+EditURL = "02_maintenance.jl"
+```
+
+# Maintenance
+Decide which components to maintain at each step to minimize failure and maintenance costs:
+components degrade stochastically and the agent has limited maintenance capacity.
+
+````@example 02_maintenance
+using DecisionFocusedLearningBenchmarks
+using Plots
+
+b = MaintenanceBenchmark(; N=5, K=2)  # 5 components, maintain up to 2 per step
+````
+
+## Observable input
+
+Generate one environment and roll it out with the greedy policy to collect a sample
+trajectory. At each step the agent observes the degradation level of each component:
+
+````@example 02_maintenance
+policies = generate_baseline_policies(b)
+env = generate_environments(b, 1)[1]
+_, trajectory = evaluate_policy!(policies.greedy, env)
+````
+
+The observable state at step 1: degradation levels per component (1 = new, n = failed):
+
+````@example 02_maintenance
+plot_instance(b, trajectory[1])
+````
+
+## A training sample
+
+Each step in a trajectory is a labeled tuple `(x, θ, y)` plus state and reward:
+- `x`: degradation state vector (values in `1..n` per component)
+- `θ`: urgency score per component (predicted by model)
+- `y`: which components are maintained at this step (BitVector of length N)
+- `instance`: degradation state vector
+- `reward`: negative cost (maintenance and failure costs) at this step
+
+One step with maintenance decisions (green = maintained, red = failed):
+
+````@example 02_maintenance
+plot_sample(b, trajectory[1])
+````
+
+A few steps side by side showing degradation evolving over time:
+
+````@example 02_maintenance
+plot_trajectory(b, trajectory[1:min(4, length(trajectory))])
+````
+
+## DFL pipeline components
+
+The DFL agent chains two components: a neural network predicting urgency scores per component:
+
+````@example 02_maintenance
+model = generate_statistical_model(b)     # two-layer MLP: degradation state → urgency scores
+````
+
+and a maximizer selecting the most urgent components for maintenance:
+
+````@example 02_maintenance
+maximizer = generate_maximizer(b)         # top-K selection among components with positive scores
+````
+
+At each step, the model maps the current degradation state to an urgency score per component.
+The maximizer selects up to K components with the highest positive scores for maintenance.
+
+---
+## Problem Description
+
+### Overview
+
+In the **Maintenance benchmark**, a system has ``N`` identical components, each with
+``n`` discrete degradation states (1 = new, ``n`` = failed). At each step, the agent
+can maintain up to ``K`` components. Maintained components are reset to state 1.
+Unmaintained components degrade stochastically.
+
+### Mathematical Formulation
+
+**State** ``s_t \in \{1,\ldots,n\}^N``: degradation level of each component.
+
+**Action** ``a_t \subseteq \{1,\ldots,N\}`` with ``|a_t| \leq K``
+
+**Transition dynamics:** For each component ``i``:
+- If maintained: ``s_{t+1}^i = 1``
+- If not maintained: ``s_{t+1}^i = \min(s_t^i + 1, n)`` with probability ``p``, else ``s_t^i``
+
+**Cost:**
+```math
+c(s_t, a_t) = c_m \cdot |a_t| + c_f \cdot \#\{i : s_t^i = n\}
+```
+
+**Objective:**
+```math
+\min_\pi \; \mathbb{E}\!\left[\sum_{t=1}^T c(s_t, \pi(s_t))\right]
+```
+
+## Key Components
+
+### [`MaintenanceBenchmark`](@ref)
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `N` | Number of components | 2 |
+| `K` | Max simultaneous maintenance operations | 1 |
+| `n` | Degradation levels per component | 3 |
+| `p` | Degradation probability per step | 0.2 |
+| `c_f` | Failure cost per failed component | 10.0 |
+| `c_m` | Maintenance cost per maintained component | 3.0 |
+| `max_steps` | Steps per episode | 80 |
+
+### Instance Generation
+
+Each instance has random starting degradation states uniformly drawn from ``\{1,\ldots,n\}``.
+
+## Baseline Policies
+
+| Policy | Description |
+|--------|-------------|
+| Greedy | Maintains components in the last degradation state before failure, up to capacity |
+
+## DFL Policy
+
+```math
+\xrightarrow[\text{State}]{s_t \in \{1,\ldots,n\}^N}
+\fbox{Neural network $\varphi_w$}
+\xrightarrow[\text{Scores}]{\theta \in \mathbb{R}^N}
+\fbox{Top-K (positive)}
+\xrightarrow[\text{Maintenance}]{a_t}
+```
+
+**Model:** `Chain(Dense(N → N), Dense(N → N), vec)`: two-layer MLP predicting one
+urgency score per component.
+
+**Maximizer:** `TopKPositiveMaximizer(K)`: selects the ``K`` components with the
+highest positive scores for maintenance.
+
+---
+
+*This page was generated using [Literate.jl](https://github.com/fredrikekre/Literate.jl).*
+
diff --git a/docs/src/benchmarks/dynamic/dvsp.jl b/docs/src/benchmarks/dynamic/03_dvsp.jl
similarity index 100%
rename from docs/src/benchmarks/dynamic/dvsp.jl
rename to docs/src/benchmarks/dynamic/03_dvsp.jl
diff --git a/docs/src/benchmarks/dynamic/03_dvsp.md b/docs/src/benchmarks/dynamic/03_dvsp.md
new file mode 100644
index 0000000..0bb16e5
--- /dev/null
+++ b/docs/src/benchmarks/dynamic/03_dvsp.md
@@ -0,0 +1,161 @@
+```@meta
+EditURL = "03_dvsp.jl"
+```
+
+# Dynamic Vehicle Scheduling
+Dispatch vehicles to customers arriving over time: at each step the agent decides which
+customers to serve now and which to postpone, minimizing total travel cost.
+
+````@example 03_dvsp
+using DecisionFocusedLearningBenchmarks
+using Plots
+
+b = DynamicVehicleSchedulingBenchmark()
+````
+
+## Observable input
+
+Generate one environment and roll it out with the greedy policy to collect a sample
+trajectory. At each step the agent observes customer positions, start times, and which
+customers have reached their dispatch deadline:
+
+````@example 03_dvsp
+policies = generate_baseline_policies(b)
+env = generate_environments(b, 1)[1]
+_, trajectory = evaluate_policy!(policies.greedy, env)
+````
+
+The observable state at step 1: depot (green square), must-dispatch customers
+(red stars; deadline reached), postponable customers (blue triangles):
+
+````@example 03_dvsp
+plot_instance(b, trajectory[1])
+````
+
+## A training sample
+
+Each step in a trajectory is a labeled tuple `(x, θ, y)` plus state and reward:
+- `x`: 27-dimensional feature vector per customer (schedule slack, travel times, reachability)
+- `θ`: prize per customer (predicted by the model; used as optimization input)
+- `y`: routes dispatched at this step
+- `instance`: full DVSP state (customer positions, deadlines, current epoch)
+- `reward`: negative travel cost incurred at this step
+
+One step with dispatched routes:
+
+````@example 03_dvsp
+plot_sample(b, trajectory[1])
+````
+
+Multiple steps side by side: customers accumulate and routes change over time:
+
+````@example 03_dvsp
+plot_trajectory(b, trajectory[1:min(3, length(trajectory))])
+````
+
+## DFL pipeline components
+
+The DFL agent chains two components: a neural network predicting a prize per customer:
+
+````@example 03_dvsp
+model = generate_statistical_model(b)     # Dense(27 → 1) per customer: state features → prize
+````
+
+and a maximizer selecting routes that balance collected prizes against travel costs:
+
+````@example 03_dvsp
+maximizer = generate_maximizer(b)         # prize-collecting VSP solver
+````
+
+At each step, the model assigns a prize to each postponable customer. The solver then
+selects routes maximizing collected prizes minus travel costs, deciding which customers
+to serve now and which to defer.
+
+---
+## Problem Description
+
+### Overview
+
+In the **Dynamic Vehicle Scheduling Problem (DVSP)**, a fleet operator must decide at
+each time step which customers to serve immediately and which to postpone. The goal is
+to serve all customers by end of the planning horizon while minimizing total travel time.
+
+The problem is characterized by:
+- **Exogenous noise**: customer arrivals are stochastic and follow a fixed distribution
+- **Combinatorial action space**: routes are built over a large set of customers
+
+### Mathematical Formulation
+
+**State** ``s_t = (R_t, D_t, t)`` where:
+- ``R_t``: pending customers, each with coordinates, start time, service time
+- ``D_t``: must-dispatch customers (cannot be postponed further)
+- ``t``: current time step
+
+**Action** ``a_t``: a set of vehicle routes ``\{r_1, r_2, \ldots, r_k\}``, each starting
+and ending at the depot, satisfying time constraints.
+
+**Reward:**
+```math
+r(s_t, a_t) = -\sum_{r \in a_t} \sum_{(i,j) \in r} d_{ij}
+```
+
+**Objective:**
+```math
+\max_\pi \; \mathbb{E}\!\left[\sum_{t=1}^T r(s_t, \pi(s_t))\right]
+```
+
+## Key Components
+
+### [`DynamicVehicleSchedulingBenchmark`](@ref)
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `max_requests_per_epoch` | Maximum new customers per time step | 10 |
+| `Δ_dispatch` | Time delay between decision and dispatch | 1.0 |
+| `epoch_duration` | Duration of each time step | 1.0 |
+| `two_dimensional_features` | Use 2D instead of full 27D features | `false` |
+
+### Features
+
+**Full features (27D per customer):** start/end times, depot travel times, slack,
+reachability ratios, quantile-based travel times to other customers.
+
+**2D features:** travel time from depot + mean travel time to others.
+
+## Baseline Policies
+
+| Policy | Description |
+|--------|-------------|
+| Lazy | Postpones all possible customers; serves only must-dispatch |
+| Greedy | Serves all pending customers immediately |
+
+## DFL Policy
+
+```math
+\xrightarrow[\text{State}]{s_t}
+\fbox{Neural network $\varphi_w$}
+\xrightarrow[\text{Prizes}]{\theta}
+\fbox{Prize-collecting VSP}
+\xrightarrow[\text{Routes}]{a_t}
+```
+
+The neural network predicts a prize ``\theta_i`` for each postponable customer.
+The prize-collecting VSP solver then maximizes collected prizes minus travel costs:
+```math
+\max_{a_t \in \mathcal{A}(s_t)} \sum_{r \in a_t} \left(\sum_{i \in r} \theta_i - \sum_{(i,j) \in r} d_{ij}\right)
+```
+
+**Model:**
+- 2D features: `Dense(2 → 1)` applied independently per customer
+- Full features: `Dense(27 → 1)` applied independently per customer
+
+!!! note "Reference"
+    This problem is a simplified version of the
+    [EURO-NeurIPS challenge 2022](https://euro-neurips-vrp-2022.challenges.ortec.com/),
+    and solved using DFL in [Combinatorial Optimization enriched Machine Learning to solve the
+    Dynamic Vehicle Routing Problem with Time Windows](https://arxiv.org/abs/2304.00789).
+
+---
+
+*This page was generated using [Literate.jl](https://github.com/fredrikekre/Literate.jl).*
+
diff --git a/docs/src/benchmarks/toy/argmax.jl b/docs/src/benchmarks/static/01_argmax.jl
similarity index 100%
rename from docs/src/benchmarks/toy/argmax.jl
rename to docs/src/benchmarks/static/01_argmax.jl
diff --git a/docs/src/benchmarks/static/01_argmax.md b/docs/src/benchmarks/static/01_argmax.md
new file mode 100644
index 0000000..3cb4c08
--- /dev/null
+++ b/docs/src/benchmarks/static/01_argmax.md
@@ -0,0 +1,115 @@
+```@meta
+EditURL = "01_argmax.jl"
+```
+
+# Argmax
+Select the single best item from a set of `n` items, given features correlated with hidden
+item scores. This is a minimalist DFL setting: equivalent to multiclass
+classification, but with an argmax layer instead of softmax. Useful as a minimal sandbox for
+understanding DFL concepts.
+
+````@example 01_argmax
+using DecisionFocusedLearningBenchmarks
+using Plots
+using Statistics
+
+b = ArgmaxBenchmark(; seed=0)
+````
+
+## Observable input
+
+At inference time the decision-maker observes only a feature matrix `x`
+(rows = features, columns = items):
+
+````@example 01_argmax
+dataset = generate_dataset(b, 100; seed=0)
+sample = first(dataset)
+plot_instance(b, sample)
+````
+
+## A training sample
+
+Each sample is a labeled triple `(x, θ, y)`:
+- `x`: feature matrix (observable at train and test time)
+- `θ`: true item scores (training supervision only, hidden at test time)
+- `y`: optimal one-hot decision derived from `θ`
+
+The full training triple (features, true scores, and optimal decision):
+
+````@example 01_argmax
+plot_sample(b, sample)
+````
+
+## Untrained policy
+
+A DFL policy chains two components: a statistical model predicting scores from features:
+
+````@example 01_argmax
+model = generate_statistical_model(b)     # linear map: features → predicted scores
+````
+
+and a maximizer turning those scores into a decision:
+
+````@example 01_argmax
+maximizer = generate_maximizer(b)         # one-hot argmax
+````
+
+A randomly initialized policy makes essentially random decisions:
+
+````@example 01_argmax
+θ_pred = model(sample.x)
+y_pred = maximizer(θ_pred)
+````
+
+````@example 01_argmax
+plot_sample(b, DataSample(sample; θ=θ_pred, y=y_pred))
+````
+
+The goal of training is to find parameters that maximize accuracy.
+Current accuracy on the dataset:
+
+````@example 01_argmax
+mean(maximizer(model(s.x)) == s.y for s in dataset)
+````
+
+---
+## Problem Description
+
+In the **Argmax benchmark**, a feature matrix ``x \in \mathbb{R}^{p \times n}`` is
+observed. A hidden linear encoder maps ``x`` to a score vector
+``\theta = \text{encoder}(x) \in \mathbb{R}^n``. The task is to select the item with
+the highest score:
+```math
+y = \mathrm{argmax}(\theta)
+```
+The solution ``y`` is encoded as a one-hot vector.
+The score vector ``\theta`` is never observed (only features ``x`` are available).
+The DFL pipeline trains a model ``f_w`` so that ``\mathrm{argmax}(f_w(x))`` matches
+``\mathrm{argmax}(\theta)`` at decision time.
+
+## Key Parameters
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `instance_dim` | Number of items | 10 |
+| `nb_features` | Feature dimension `p` | 5 |
+
+## DFL Policy
+
+```math
+\xrightarrow[\text{Features}]{x \in \mathbb{R}^{p \times n}}
+\fbox{Linear model $f_w$}
+\xrightarrow[\text{Predicted scores}]{\theta \in \mathbb{R}^n}
+\fbox{argmax}
+\xrightarrow[\text{Selection}]{y \in \{0,1\}^n}
+```
+
+**Model:** `Chain(Dense(nb_features → 1; bias=false), vec)`: a single linear layer
+predicting one score per item.
+
+**Maximizer:** `one_hot_argmax`: returns a one-hot vector at the argmax index.
+
+---
+
+*This page was generated using [Literate.jl](https://github.com/fredrikekre/Literate.jl).*
+
diff --git a/docs/src/benchmarks/toy/argmax2d.jl b/docs/src/benchmarks/static/02_argmax2d.jl
similarity index 100%
rename from docs/src/benchmarks/toy/argmax2d.jl
rename to docs/src/benchmarks/static/02_argmax2d.jl
diff --git a/docs/src/benchmarks/static/02_argmax2d.md b/docs/src/benchmarks/static/02_argmax2d.md
new file mode 100644
index 0000000..4bab0ed
--- /dev/null
+++ b/docs/src/benchmarks/static/02_argmax2d.md
@@ -0,0 +1,104 @@
+```@meta
+EditURL = "02_argmax2d.jl"
+```
+
+# Argmax on a 2D polytope
+Select the best vertex of a random convex polytope in 2D: predict a cost direction θ from
+features, then return the vertex `v` maximizing `θᵀv`. The 2D setting makes this benchmark
+visual: the cost direction and selected vertex can be plotted directly, and the loss
+landscape can be shown as a contour plot over the 2D θ space.
+
+````@example 02_argmax2d
+using DecisionFocusedLearningBenchmarks
+using Plots
+
+b = Argmax2DBenchmark(; seed=0)
+````
+
+## Observable input
+
+At inference time the decision-maker observes the feature vector `x` and the polytope shape,
+but not the cost direction hidden `θ`:
+
+````@example 02_argmax2d
+dataset = generate_dataset(b, 50; seed=0)
+sample = first(dataset)
+plot_instance(b, sample)
+````
+
+## A training sample
+
+Each sample is a labeled triple `(x, θ, y)`:
+- `x`: feature vector (observable at train and test time)
+- `θ`: 2D cost direction (training supervision only, hidden at test time)
+- `y`: polytope vertex maximizing `θᵀv` (optimal decision)
+- `instance` (in `context`): polytope vertices (observable problem structure)
+
+The full training triple (polytope, cost direction θ, optimal vertex y):
+
+````@example 02_argmax2d
+plot_sample(b, sample)
+````
+
+## Untrained policy
+
+A DFL policy chains two components: a statistical model predicting a 2D cost direction:
+
+````@example 02_argmax2d
+model = generate_statistical_model(b)     # linear map: features → 2D cost vector
+````
+
+and a maximizer selecting the best polytope vertex for that direction:
+
+````@example 02_argmax2d
+maximizer = generate_maximizer(b)         # vertex maximizing θᵀv over polytope vertices
+````
+
+A randomly initialized policy predicts an arbitrary cost direction:
+
+````@example 02_argmax2d
+θ_pred = model(sample.x)
+y_pred = maximizer(θ_pred; sample.context...)
+plot_sample(b, DataSample(sample; θ=θ_pred, y=y_pred))
+````
+
+---
+## Problem Description
+
+In the **Argmax2D benchmark**, each instance defines a random convex polytope
+``\mathcal{Y}(x) = \mathrm{conv}(v_1, \ldots, v_m)`` in ``\mathbb{R}^2``.
+A hidden encoder maps features ``x \in \mathbb{R}^p`` to a 2D cost vector
+``\theta \in \mathbb{R}^2``. The task is to find the polytope vertex maximizing
+the dot product:
+```math
+y^* = \mathrm{argmax}_{v \in \mathcal{Y}(x)} \; \theta^\top v
+```
+
+This is a toy 2D combinatorial optimization problem useful for visualizing
+how well a model learns the cost direction.
+
+## Key Parameters
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `nb_features` | Feature dimension `p` | 5 |
+| `polytope_vertex_range` | Number of polytope vertices (list; one value drawn at random per instance) | `[6]` |
+
+## DFL Policy
+
+```math
+\xrightarrow[\text{Features}]{x}
+\fbox{Linear model}
+\xrightarrow{\theta \in \mathbb{R}^2}
+\fbox{Polytope argmax}
+\xrightarrow{y}
+```
+
+**Model:** `Dense(nb_features → 2; bias=false)` — predicts a 2D cost direction.
+
+**Maximizer:** finds the vertex of the instance polytope with maximum dot product with θ.
+
+---
+
+*This page was generated using [Literate.jl](https://github.com/fredrikekre/Literate.jl).*
+
diff --git a/docs/src/benchmarks/static/ranking.jl b/docs/src/benchmarks/static/03_ranking.jl
similarity index 100%
rename from docs/src/benchmarks/static/ranking.jl
rename to docs/src/benchmarks/static/03_ranking.jl
diff --git a/docs/src/benchmarks/static/03_ranking.md b/docs/src/benchmarks/static/03_ranking.md
new file mode 100644
index 0000000..5c2ced2
--- /dev/null
+++ b/docs/src/benchmarks/static/03_ranking.md
@@ -0,0 +1,104 @@
+```@meta
+EditURL = "03_ranking.jl"
+```
+
+# Ranking
+Rank a set of items. Each item has a hidden score, correlated with observable input
+features. The goal is to learn to sort items by their hidden scores, using observable
+features alone.
+
+````@example 03_ranking
+using DecisionFocusedLearningBenchmarks
+using Plots
+
+b = RankingBenchmark()
+````
+
+## Observable input
+
+At inference time the decision-maker observes only the feature matrix `x`
+(rows = features, columns = items):
+
+````@example 03_ranking
+dataset = generate_dataset(b, 50; seed=0)
+sample = first(dataset)
+plot_instance(b, sample)
+````
+
+## A training sample
+
+Each sample is a labeled triple `(x, θ, y)`:
+- `x`: feature matrix (rows = features, columns = items; observable at train and test time)
+- `θ`: true item costs (training supervision only, hidden at test time)
+- `y`: ordinal ranks derived from `θ` (`y[i] = 1` means item `i` has the lowest cost)
+
+The full training triple (features, true costs, and derived ranking):
+
+````@example 03_ranking
+plot_sample(b, sample)
+````
+
+## Untrained policy
+
+A DFL policy chains two components: a statistical model predicting item scores:
+
+````@example 03_ranking
+model = generate_statistical_model(b)     # linear map: features → predicted costs
+````
+
+and a maximizer ranking items by those scores:
+
+````@example 03_ranking
+maximizer = generate_maximizer(b)         # ordinal ranking via sortperm
+````
+
+A randomly initialized policy produces an arbitrary ranking:
+
+````@example 03_ranking
+θ_pred = model(sample.x)
+plot_sample(b, DataSample(; sample.context..., x=sample.x, θ=θ_pred, y=maximizer(θ_pred)))
+````
+
+Optimality gap on the dataset (0 = optimal, higher is worse):
+
+````@example 03_ranking
+compute_gap(b, dataset, model, maximizer)
+````
+
+---
+## Problem Description
+
+In the **Ranking benchmark**, a feature matrix ``x \in \mathbb{R}^{p \times n}`` is
+observed. A hidden linear encoder maps ``x`` to a cost vector
+``\theta \in \mathbb{R}^n``. The task is to compute the ordinal ranking of the items
+by cost:
+```math
+y_i = \mathrm{rank}(\theta_i \mid \theta_1, \ldots, \theta_n)
+```
+where ``y_i = 1`` means item ``i`` has the highest cost.
+
+## Key Parameters
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `instance_dim` | Number of items to rank | 10 |
+| `nb_features` | Feature dimension `p` | 5 |
+
+## DFL Policy
+
+```math
+\xrightarrow[\text{Features}]{x}
+\fbox{Linear model}
+\xrightarrow{\theta}
+\fbox{ranking}
+\xrightarrow{y}
+```
+
+**Model:** `Chain(Dense(nb_features → 1; bias=false), vec)` — predicts one score per item.
+
+**Maximizer:** `ranking(θ)` — returns a vector of ordinal ranks via `invperm(sortperm(θ))`.
+
+---
+
+*This page was generated using [Literate.jl](https://github.com/fredrikekre/Literate.jl).*
+
diff --git a/docs/src/benchmarks/static/portfolio_optimization.jl b/docs/src/benchmarks/static/04_portfolio_optimization.jl
similarity index 100%
rename from docs/src/benchmarks/static/portfolio_optimization.jl
rename to docs/src/benchmarks/static/04_portfolio_optimization.jl
diff --git a/docs/src/benchmarks/static/04_portfolio_optimization.md b/docs/src/benchmarks/static/04_portfolio_optimization.md
new file mode 100644
index 0000000..f0e4c15
--- /dev/null
+++ b/docs/src/benchmarks/static/04_portfolio_optimization.md
@@ -0,0 +1,118 @@
+```@meta
+EditURL = "04_portfolio_optimization.jl"
+```
+
+# Portfolio Optimization
+Allocate wealth across assets to maximize expected return subject to a risk constraint:
+asset returns are unknown and must be predicted from contextual features.
+
+````@example 04_portfolio_optimization
+using DecisionFocusedLearningBenchmarks
+using Plots
+
+b = PortfolioOptimizationBenchmark()
+````
+
+## Observable input
+
+At inference time the decision-maker observes only the contextual feature vector `x`:
+
+````@example 04_portfolio_optimization
+dataset = generate_dataset(b, 20; seed=0)
+sample = first(dataset)
+plot_instance(b, sample)
+````
+
+## A training sample
+
+Each sample is a labeled triple `(x, θ, y)`:
+- `x`: contextual feature vector (observable at train and test time)
+- `θ`: true expected asset returns (training supervision only, hidden at test time)
+- `y`: optimal portfolio weights solving the Markowitz QP given `θ`
+
+Top: feature vector x. Bottom left: true returns θ. Bottom right: optimal weights y:
+
+````@example 04_portfolio_optimization
+plot_sample(b, sample)
+````
+
+## Untrained policy
+
+A DFL policy chains two components: a statistical model predicting expected asset returns:
+
+````@example 04_portfolio_optimization
+model = generate_statistical_model(b)     # linear map: features → predicted returns
+````
+
+and a maximizer allocating the optimal portfolio given those returns:
+
+````@example 04_portfolio_optimization
+maximizer = generate_maximizer(b)         # Markowitz QP solver (Ipopt via JuMP)
+````
+
+A randomly initialized policy predicts arbitrary returns, leading to a suboptimal allocation:
+
+````@example 04_portfolio_optimization
+θ_pred = model(sample.x)
+plot_sample(b, DataSample(; sample.context..., x=sample.x, θ=θ_pred, y=maximizer(θ_pred)))
+````
+
+Optimality gap on the dataset (0 = optimal, higher is worse):
+
+````@example 04_portfolio_optimization
+compute_gap(b, dataset, model, maximizer)
+````
+
+---
+## Problem Description
+
+A **Markowitz portfolio optimization** problem where asset expected returns are unknown.
+Given contextual features ``x \in \mathbb{R}^p``, the learner predicts returns
+``\theta \in \mathbb{R}^d`` and solves:
+
+```math
+\begin{aligned}
+\max_{y} \quad & \theta^\top y \\
+\text{s.t.} \quad & y^\top \Sigma y \leq \gamma \\
+& \mathbf{1}^\top y \leq 1 \\
+& y \geq 0
+\end{aligned}
+```
+
+where ``\Sigma`` is the asset covariance matrix and ``\gamma`` is the risk budget.
+The solver uses [Ipopt.jl](https://github.com/jump-dev/Ipopt.jl) via JuMP.
+
+## Key Parameters
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `d` | Number of assets | 50 |
+| `p` | Feature dimension | 5 |
+| `deg` | Polynomial degree for data generation | 1 |
+| `ν` | Noise hyperparameter | 1.0 |
+
+Data is generated following the process in
+[Mandi et al., 2023](https://arxiv.org/abs/2307.13565).
+
+## DFL Policy
+
+```math
+\xrightarrow[\text{Features}]{x \in \mathbb{R}^p}
+\fbox{Linear model}
+\xrightarrow[\text{Predicted returns}]{\hat{\theta} \in \mathbb{R}^d}
+\fbox{QP solver (Ipopt)}
+\xrightarrow[\text{Portfolio}]{y \in \mathbb{R}^d}
+```
+
+**Model:** `Dense(p → d)` — predicts one expected return per asset.
+
+**Maximizer:** Ipopt QP solver enforcing the variance and budget constraints.
+
+!!! note "Reference"
+    Mandi et al. (2023), Decision-Focused Learning: Foundations, State of the Art, Benchmark and Future Opportunities.
+    [arXiv:2307.13565](https://arxiv.org/abs/2307.13565)
+
+---
+
+*This page was generated using [Literate.jl](https://github.com/fredrikekre/Literate.jl).*
+
diff --git a/docs/src/benchmarks/static/subset_selection.jl b/docs/src/benchmarks/static/05_subset_selection.jl
similarity index 100%
rename from docs/src/benchmarks/static/subset_selection.jl
rename to docs/src/benchmarks/static/05_subset_selection.jl
diff --git a/docs/src/benchmarks/static/05_subset_selection.md b/docs/src/benchmarks/static/05_subset_selection.md
new file mode 100644
index 0000000..a865f74
--- /dev/null
+++ b/docs/src/benchmarks/static/05_subset_selection.md
@@ -0,0 +1,109 @@
+```@meta
+EditURL = "05_subset_selection.jl"
+```
+
+# Subset Selection
+Select the `k` most valuable items from a set of `n`: items with unknown values
+must be identified from observable features alone.
+
+````@example 05_subset_selection
+using DecisionFocusedLearningBenchmarks
+using Plots
+
+b = SubsetSelectionBenchmark(; identity_mapping=false)
+````
+
+## Observable input
+
+At inference time the decision-maker observes only the feature vector `x`:
+
+````@example 05_subset_selection
+dataset = generate_dataset(b, 50; seed=0)
+sample = first(dataset)
+plot_instance(b, sample)
+````
+
+## A training sample
+
+Each sample is a labeled triple `(x, θ, y)`:
+- `x`: item feature vector (observable at train and test time)
+- `θ`: true item values, derived from `x` via a hidden encoder (training supervision only)
+- `y`: selection indicator (`y[i] = 1` for the `k` highest-value items, 0 otherwise)
+
+The full training triple (features, hidden values, and selection):
+
+````@example 05_subset_selection
+plot_sample(b, sample)
+````
+
+## Untrained policy
+
+A DFL policy chains two components: a statistical model predicting item scores:
+
+````@example 05_subset_selection
+model = generate_statistical_model(b)     # linear map: features → predicted item scores
+````
+
+and a maximizer selecting the top-k items by those scores:
+
+````@example 05_subset_selection
+maximizer = generate_maximizer(b)         # top-k selection
+````
+
+A randomly initialized policy selects items with no relation to their true values:
+
+````@example 05_subset_selection
+θ_pred = model(sample.x)
+plot_sample(b, DataSample(; sample.context..., x=sample.x, θ=θ_pred, y=maximizer(θ_pred)))
+````
+
+Optimality gap on the dataset (0 = optimal, higher is worse):
+
+````@example 05_subset_selection
+compute_gap(b, dataset, model, maximizer)
+````
+
+---
+## Problem Description
+
+In the **Subset Selection benchmark**, ``n`` items have unknown values ``\theta_i``.
+A feature vector ``x \in \mathbb{R}^n`` is observed (identity mapping by default).
+The task is to select the ``k`` items with the highest values:
+```math
+y = \mathrm{top}_k(\theta)
+```
+where ``y \in \{0,1\}^n`` with exactly ``k`` ones.
+
+## Key Parameters
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `n` | Total number of items | 25 |
+| `k` | Number of items to select | 5 |
+| `identity_mapping` | Use identity as the hidden mapping | `true` |
+
+When `identity_mapping=true`, features equal item values directly (`x = θ`).
+When `false`, a random linear layer is used as the hidden mapping.
+
+## DFL Policy
+
+```math
+\xrightarrow[\text{Features}]{x}
+\fbox{Linear model}
+\xrightarrow{\theta}
+\fbox{top-k}
+\xrightarrow{y}
+```
+
+**Model:** `Dense(n → n; bias=false)` — predicts a score per item.
+
+**Maximizer:** `top_k(θ, k)` — returns a boolean vector with `true` at the `k`
+highest-scoring positions.
+
+!!! note "Reference"
+    Setting from [Decision-Focused Learning: Foundations, State of the Art, Benchmark and Future Opportunities](https://arxiv.org/abs/2307.13565)
+
+---
+
+*This page was generated using [Literate.jl](https://github.com/fredrikekre/Literate.jl).*
+
diff --git a/docs/src/benchmarks/static/fixed_size_shortest_path.jl b/docs/src/benchmarks/static/06_fixed_size_shortest_path.jl
similarity index 100%
rename from docs/src/benchmarks/static/fixed_size_shortest_path.jl
rename to docs/src/benchmarks/static/06_fixed_size_shortest_path.jl
diff --git a/docs/src/benchmarks/static/06_fixed_size_shortest_path.md b/docs/src/benchmarks/static/06_fixed_size_shortest_path.md
new file mode 100644
index 0000000..db7ea96
--- /dev/null
+++ b/docs/src/benchmarks/static/06_fixed_size_shortest_path.md
@@ -0,0 +1,115 @@
+```@meta
+EditURL = "06_fixed_size_shortest_path.jl"
+```
+
+# Shortest Path
+Find the cheapest path from the top-left to the bottom-right of a grid graph:
+edge costs are unknown and must be predicted from instance features.
+
+````@example 06_fixed_size_shortest_path
+using DecisionFocusedLearningBenchmarks
+using Plots
+
+b = FixedSizeShortestPathBenchmark()
+````
+
+## Observable input
+
+At inference time the decision-maker observes the feature vector `x` and the fixed grid
+structure (source top-left, sink bottom-right):
+
+````@example 06_fixed_size_shortest_path
+dataset = generate_dataset(b, 50; seed=0)
+sample = first(dataset)
+plot_instance(b, sample)
+````
+
+## A training sample
+
+Each sample is a labeled triple `(x, θ, y)`:
+- `x`: instance feature vector (observable at train and test time)
+- `θ`: true edge costs (training supervision only, hidden at test time)
+- `y`: path indicator vector (`y[e] = 1` if edge `e` is on the optimal path)
+
+Top: feature vector x. Bottom left: edge costs θ. Bottom right: optimal path y (white dots):
+
+````@example 06_fixed_size_shortest_path
+plot_sample(b, sample)
+````
+
+## Untrained policy
+
+A DFL policy chains two components: a statistical model predicting edge costs:
+
+````@example 06_fixed_size_shortest_path
+model = generate_statistical_model(b)     # linear map: features → predicted edge costs
+````
+
+and a maximizer finding the shortest path given those costs:
+
+````@example 06_fixed_size_shortest_path
+maximizer = generate_maximizer(b)         # Dijkstra shortest path on the grid graph
+````
+
+A randomly initialized policy predicts arbitrary costs, yielding a near-straight path:
+
+````@example 06_fixed_size_shortest_path
+θ_pred = model(sample.x)
+plot_sample(b, DataSample(; sample.context..., x=sample.x, θ=θ_pred, y=maximizer(θ_pred)))
+````
+
+Optimality gap on the dataset (0 = optimal, higher is worse):
+
+````@example 06_fixed_size_shortest_path
+compute_gap(b, dataset, model, maximizer)
+````
+
+---
+## Problem Description
+
+A **fixed-size grid shortest path** problem. The graph is a directed acyclic grid of
+size ``(\text{rows} \times \text{cols})``, with edges pointing right and downward.
+Edge costs ``\theta \in \mathbb{R}^E`` are unknown; only a feature vector
+``x \in \mathbb{R}^p`` is observed. The task is to find the minimum-cost path from
+vertex 1 (top-left) to vertex ``V`` (bottom-right):
+```math
+y^* = \mathrm{argmin}_{y \in \mathcal{P}} \; \theta^\top y
+```
+where ``y \in \{0,1\}^E`` indicates selected edges and ``\mathcal{P}`` is the set of
+valid source-to-sink paths.
+
+Data is generated following the process in
+[Mandi et al., 2023](https://arxiv.org/abs/2307.13565).
+
+## Key Parameters
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `grid_size` | Grid dimensions `(rows, cols)` | `(5, 5)` |
+| `p` | Feature dimension | 5 |
+| `deg` | Polynomial degree for cost generation | 1 |
+| `ν` | Multiplicative noise level (0 = no noise) | 0.0 |
+
+## DFL Policy
+
+```math
+\xrightarrow[\text{Features}]{x \in \mathbb{R}^p}
+\fbox{Linear model}
+\xrightarrow[\text{Predicted costs}]{\theta \in \mathbb{R}^E}
+\fbox{Dijkstra / Bellman-Ford}
+\xrightarrow[\text{Path}]{y \in \{0,1\}^E}
+```
+
+**Model:** `Chain(Dense(p → E))` — predicts one cost per edge.
+
+**Maximizer:** Dijkstra (default) or Bellman-Ford on negated weights to find the
+longest (maximum-weight) path.
+
+!!! note "Reference"
+    Mandi et al. (2023), Decision-Focused Learning: Foundations, State of the Art, Benchmark and Future Opportunities.
+    [arXiv:2307.13565](https://arxiv.org/abs/2307.13565)
+
+---
+
+*This page was generated using [Literate.jl](https://github.com/fredrikekre/Literate.jl).*
+
diff --git a/docs/src/benchmarks/static/warcraft.jl b/docs/src/benchmarks/static/07_warcraft.jl
similarity index 100%
rename from docs/src/benchmarks/static/warcraft.jl
rename to docs/src/benchmarks/static/07_warcraft.jl
diff --git a/docs/src/benchmarks/static/07_warcraft.md b/docs/src/benchmarks/static/07_warcraft.md
new file mode 100644
index 0000000..c246cc9
--- /dev/null
+++ b/docs/src/benchmarks/static/07_warcraft.md
@@ -0,0 +1,119 @@
+```@meta
+EditURL = "07_warcraft.jl"
+```
+
+# Warcraft
+Find the cheapest path on a 12×12 terrain map: cell travel costs are unknown and must
+be inferred from the RGB terrain image using a neural network.
+
+````@example 07_warcraft
+using DecisionFocusedLearningBenchmarks
+using Plots
+
+b = WarcraftBenchmark()
+````
+
+## Observable input
+
+At inference time the decision-maker observes only the terrain image `x` (not the costs `θ`):
+
+````@example 07_warcraft
+sample = generate_dataset(b, 1)[1]
+plot_instance(b, sample)
+````
+
+## A training sample
+
+Each sample is a labeled triple `(x, θ, y)`:
+- `x`: terrain image (12×12×3 RGB array; observable at train and test time)
+- `θ`: true cell travel costs (training supervision only, hidden at test time)
+- `y`: optimal path indicator (`y[i,j] = 1` if cell `(i,j)` is on the path)
+
+Left: terrain image. Middle: true costs θ. Right: optimal path y:
+
+````@example 07_warcraft
+plot_sample(b, sample)
+````
+
+## Untrained policy
+
+A DFL policy chains two components: a CNN predicting cell travel costs from the terrain image:
+
+````@example 07_warcraft
+model = generate_statistical_model(b)     # ResNet18 CNN: terrain image → 12×12 cost map
+````
+
+and a maximizer finding the shortest path given those costs:
+
+````@example 07_warcraft
+maximizer = generate_maximizer(b)         # Dijkstra shortest path on the 12×12 grid
+````
+
+An untrained CNN produces a near-uniform cost map, yielding a near-straight path:
+
+````@example 07_warcraft
+θ_pred = model(sample.x)
+plot_sample(b, DataSample(; sample.context..., x=sample.x, θ=θ_pred, y=maximizer(θ_pred)))
+````
+
+Optimality gap on this sample (0 = optimal, higher is worse):
+
+````@example 07_warcraft
+compute_gap(b, [sample], model, maximizer)
+````
+
+---
+## Problem Description
+
+In the **Warcraft benchmark**, each instance is a 12×12 grid representing a Warcraft
+terrain map. Each cell has an unknown travel cost depending on its terrain type (forest,
+mountain, water, etc.). The task is to find the path from the top-left to the
+bottom-right corner that minimizes total travel cost.
+
+Formally, let ``\theta_{ij}`` be the (unknown) cost of cell ``(i,j)`` and
+``y_{ij} \in \{0,1\}`` indicate whether cell ``(i,j)`` is on the path. The objective is:
+```math
+y^* = \mathrm{argmin}_{y \in \mathcal{P}} \sum_{(i,j)} \theta_{ij} \, y_{ij}
+```
+where ``\mathcal{P}`` is the set of valid grid paths (4-connected, source to sink).
+
+The dataset contains 10 000 labeled terrain images from the Warcraft II tileset.
+It is downloaded automatically on first use via
+[DataDeps.jl](https://github.com/oxinabox/DataDeps.jl).
+
+## Key Components
+
+**[`WarcraftBenchmark`](@ref)** has no parameters.
+
+| Method | Description |
+|--------|-------------|
+| `generate_dataset(b, n)` | Downloads and loads `n` terrain images with true costs and paths |
+| `generate_statistical_model(b)` | ResNet18 CNN (first 5 layers + adaptive maxpool + neg) |
+| `generate_maximizer(b; dijkstra=true)` | Dijkstra or Bellman-Ford shortest path |
+
+## DFL Policy
+
+```math
+\xrightarrow[\text{Terrain image}]{x \in \mathbb{R}^{12 \times 12 \times 3}}
+\fbox{ResNet18 CNN}
+\xrightarrow[\text{Cell costs}]{\theta \in \mathbb{R}^{12 \times 12}}
+\fbox{Dijkstra}
+\xrightarrow[\text{Path}]{y \in \{0,1\}^{12 \times 12}}
+```
+
+The CNN maps terrain pixel values to predicted cell costs, which are then passed to a
+shortest-path solver. Training end-to-end with
+[InferOpt.jl](https://github.com/JuliaDecisionFocusedLearning/InferOpt.jl) teaches
+the network to produce costs that lead to good paths, not just accurate cost estimates.
+
+!!! tip
+    See the [Warcraft tutorial](../../warcraft_tutorial.md) for a complete end-to-end training
+    example using `PerturbedMultiplicative` and `FenchelYoungLoss`.
+
+!!! note "Reference"
+    Vlastelica et al. (2020), Differentiation of Blackbox Combinatorial Solvers, ICLR 2020.
+
+---
+
+*This page was generated using [Literate.jl](https://github.com/fredrikekre/Literate.jl).*
+
diff --git a/docs/src/benchmarks/toy/contextual_stochastic_argmax.jl b/docs/src/benchmarks/stochastic/01_contextual_stochastic_argmax.jl
similarity index 100%
rename from docs/src/benchmarks/toy/contextual_stochastic_argmax.jl
rename to docs/src/benchmarks/stochastic/01_contextual_stochastic_argmax.jl
diff --git a/docs/src/benchmarks/stochastic/01_contextual_stochastic_argmax.md b/docs/src/benchmarks/stochastic/01_contextual_stochastic_argmax.md
new file mode 100644
index 0000000..7caf43d
--- /dev/null
+++ b/docs/src/benchmarks/stochastic/01_contextual_stochastic_argmax.md
@@ -0,0 +1,138 @@
+```@meta
+EditURL = "01_contextual_stochastic_argmax.jl"
+```
+
+# Contextual Stochastic Argmax
+Select the best item from a set of `n` items with stochastic utilities: each scenario draws
+a different utility vector, but utilities depend on observable context features. This is a
+toy benchmark designed so that a linear model can exactly recover the optimal
+context-to-utility mapping.
+
+````@example 01_contextual_stochastic_argmax
+using DecisionFocusedLearningBenchmarks
+using Plots
+
+b = ContextualStochasticArgmaxBenchmark()
+````
+
+`generate_dataset` returns unlabeled samples (`y = nothing`) for this benchmark.
+A `target_policy` must be provided to attach labels. Here we use the anticipative
+oracle: it returns the item with the highest realized utility for each scenario,
+giving one labeled sample per scenario per instance.
+
+````@example 01_contextual_stochastic_argmax
+anticipative = generate_anticipative_solver(b)
+policy =
+    (ctx, scenarios) -> [
+        DataSample(; ctx.context..., x=ctx.x, y=anticipative(ξ), extra=(; scenario=ξ))
+        for ξ in scenarios
+    ]
+dataset = generate_dataset(b, 20; target_policy=policy, seed=0)
+sample = first(dataset)
+````
+
+## Observable input
+
+At inference time the model observes `x = [c_base; x_raw]`. `plot_instance` shows both
+components: base utilities `c_base` (left) and context features `x_raw` (right):
+
+````@example 01_contextual_stochastic_argmax
+plot_instance(b, sample)
+````
+
+## A training sample
+
+Stochastic benchmarks have no single ground-truth label: the optimal item depends on
+which utility scenario is realized. We label each sample with the anticipative oracle,
+which returns the best item given the realized scenario ξ.
+
+Each labeled sample contains:
+- `x`: feature vector `[c_base; x_raw]` (observable at train and test time)
+- `y`: optimal item for the realized scenario ξ (one-hot; anticipative oracle label)
+- `extra.scenario`: realized utility vector ξ (available only during training)
+
+Top: feature vector x. Bottom: realized scenario ξ acting as the cost vector,
+with the anticipative-optimal item in red:
+
+````@example 01_contextual_stochastic_argmax
+plot_sample(b, DataSample(sample; θ=sample.scenario))
+````
+
+## Untrained policy
+
+A DFL policy chains two components: a statistical model predicting expected item utilities:
+
+````@example 01_contextual_stochastic_argmax
+model = generate_statistical_model(b)     # linear map: features → predicted expected utilities
+````
+
+and a maximizer selecting the item with the highest predicted utility:
+
+````@example 01_contextual_stochastic_argmax
+maximizer = generate_maximizer(b)         # one-hot argmax
+````
+
+A randomly initialized policy selects items with no relation to their expected utilities.
+Top: feature vector x. Bottom: predicted utilities θ̂ with the selected item in red:
+
+````@example 01_contextual_stochastic_argmax
+θ_pred = model(sample.x)
+plot_sample(b, DataSample(sample; θ=θ_pred, y=maximizer(θ_pred)))
+````
+
+---
+## Problem Description
+
+### Overview
+
+In the **Contextual Stochastic Argmax benchmark**, ``n`` items have random utilities
+that depend on observable context. Per instance:
+- ``c_\text{base} \sim U[0,1]^n``: base utilities (stored in `context`)
+- ``x_\text{raw} \sim \mathcal{N}(0, I_d)``: observable context features
+- Full features: ``x = [c_\text{base}; x_\text{raw}] \in \mathbb{R}^{n+d}``
+
+The realized utility (scenario) is drawn as:
+```math
+\xi = c_\text{base} + W \, x_\text{raw} + \varepsilon, \quad \varepsilon \sim \mathcal{N}(0, \sigma^2 I)
+```
+where ``W \in \mathbb{R}^{n \times d}`` is a fixed unknown perturbation matrix.
+
+The task is to select the item with the highest realized utility:
+```math
+y^* = \mathrm{argmax}(\xi)
+```
+
+A linear model ``\theta = [I \mid W] \cdot x`` can exactly recover the optimal
+solution in expectation.
+
+## Key Parameters
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `n` | Number of items | 10 |
+| `d` | Context feature dimension | 5 |
+| `noise_std` | Noise standard deviation σ | 0.1 |
+
+## Baseline Policies
+
+- **SAA**: selects the item with highest mean utility over available scenarios.
+
+## DFL Policy
+
+```math
+\xrightarrow[\text{Features}]{x = [c_\text{base}; x_\text{raw}]}
+\fbox{Linear model}
+\xrightarrow{\theta \in \mathbb{R}^n}
+\fbox{argmax}
+\xrightarrow{y}
+```
+
+**Model:** `Dense(n+d → n; bias=false)` — can in principle recover the exact mapping
+``[I \mid W]`` from training data.
+
+**Maximizer:** `one_hot_argmax`.
+
+---
+
+*This page was generated using [Literate.jl](https://github.com/fredrikekre/Literate.jl).*
+
diff --git a/docs/src/benchmarks/stochastic/vsp.jl b/docs/src/benchmarks/stochastic/02_vsp.jl
similarity index 100%
rename from docs/src/benchmarks/stochastic/vsp.jl
rename to docs/src/benchmarks/stochastic/02_vsp.jl
diff --git a/docs/src/benchmarks/stochastic/02_vsp.md b/docs/src/benchmarks/stochastic/02_vsp.md
new file mode 100644
index 0000000..1cf2512
--- /dev/null
+++ b/docs/src/benchmarks/stochastic/02_vsp.md
@@ -0,0 +1,151 @@
+```@meta
+EditURL = "02_vsp.jl"
+```
+
+# Stochastic Vehicle Scheduling
+Assign vehicles to cover a set of tasks while minimizing costs under stochastic delays:
+the DFL agent learns to predict adjusted costs that implicitly hedge against uncertainty.
+
+````@example 02_vsp
+using DecisionFocusedLearningBenchmarks
+using Plots
+
+b = StochasticVehicleSchedulingBenchmark()
+````
+
+## Observable input
+
+Each instance is a city with task locations and scheduled times. Task spatial positions
+and scheduled times are observable at inference time.
+`store_city=true` is required to visualize the map (not needed for training):
+
+````@example 02_vsp
+sample = generate_dataset(b, 1; store_city=true)[1]
+plot_instance(b, sample)
+````
+
+## A training sample
+
+Each sample is a labeled triple `(x, θ, y)`:
+- `x`: 20-dimensional feature vector per edge, encoding schedule slack and travel times
+- `θ`: adjusted edge costs (training supervision only, hidden at test time)
+- `y`: binary assignment (`y[(u,v)] = 1` if a vehicle travels edge `(u, v)` in the schedule)
+
+Unlike static benchmarks, `y` labels are not available by default and must be attached
+via a `target_policy` (e.g., the deterministic VSP solver). Routes are visualized
+in the untrained policy section below.
+
+## Untrained policy
+
+A DFL policy chains two components: a statistical model predicting adjusted edge costs:
+
+````@example 02_vsp
+model = generate_statistical_model(b)     # linear map: task features -> adjusted edge costs
+````
+
+and a maximizer solving the deterministic VSP given those costs:
+
+````@example 02_vsp
+maximizer = generate_maximizer(b)         # deterministic VSP solver (HiGHS MIP)
+````
+
+The untrained model predicts random edge costs; the resulting schedule is arbitrary:
+
+````@example 02_vsp
+θ_pred = model(sample.x)
+y_pred = maximizer(θ_pred; sample.context...)
+plot_sample(
+    b, DataSample(; sample.context..., x=sample.x, θ=θ_pred, y=y_pred, extra=sample.extra)
+)
+````
+
+---
+## Problem Description
+
+### Overview
+
+In the **Vehicle Scheduling Problem (VSP)**, we consider a set of tasks ``V``. Each
+task ``v \in V`` has a scheduled beginning time ``t_v^b`` and end time ``t_v^e``, with
+``t_v^e > t_v^b``. We denote ``t^{tr}_{(u,v)}`` the travel time from task ``u`` to task
+``v``. A task ``v`` can follow ``u`` only if:
+```math
+t_v^b \geq t_u^e + t^{tr}_{(u,v)}
+```
+
+An instance of VSP can be modeled as an acyclic directed graph where nodes are tasks
+and edges represent feasible successions. A solution is a set of disjoint paths such
+that all tasks are fulfilled exactly once to minimize total costs.
+
+In the **Stochastic VSP (StoVSP)**, after the scheduling decision is set, random delays
+propagate along vehicle tours. The objective becomes minimizing base costs plus expected
+total delay costs over scenarios.
+
+### Mathematical Formulation
+
+**Variables:** Let ``y_{u,v} \in \{0,1\}`` indicate if a vehicle performs task ``v``
+immediately after task ``u``.
+
+**Delay Propagation:** For each task ``v`` in scenario ``s``:
+- ``\gamma_v^s``: intrinsic delay of task ``v``
+- ``d_v^s``: total accumulated delay
+- ``\delta_{u,v}^s = t_v^b - (t_u^e + t^{tr}_{(u,v)})``: slack time
+
+```math
+d_v^s = \gamma_v^s + \max(d_u^s - \delta_{u,v}^s,\; 0)
+```
+
+**Objective:**
+```math
+\min_{y} \; \sum_{(u,v)} c_{u,v} \, y_{u,v} + \mathbb{E}_{s \in S}\!\left[\sum_v C_d \, d_v^s\right]
+```
+
+## Key Components
+
+### [`StochasticVehicleSchedulingBenchmark`](@ref)
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `nb_tasks` | Number of tasks per instance | 25 |
+| `nb_scenarios` | Number of scenarios for objective evaluation | 10 |
+
+### Instance Generation
+
+Each instance simulates a geographic city with depots and task locations. Tasks have
+realistic scheduled start/end times. Scenarios are random intrinsic delays ``\gamma``
+drawn from a Log-Normal distribution. Feature vectors are 20-dimensional.
+
+## Baseline Policies
+
+| Policy | Description |
+|--------|-------------|
+| `svs_deterministic_policy` | Solves the deterministic VSP, ignoring delays |
+| `svs_saa_policy` | SAA via column generation over ``K`` scenarios |
+| `svs_saa_mip_policy` | Exact SAA via compact MIP formulation |
+| `svs_local_search_policy` | Heuristic local search over sampled scenarios |
+
+## DFL Policy
+
+```math
+\xrightarrow[\text{Features}]{x \in \mathbb{R}^{20}}
+\fbox{Linear model $\varphi_w$}
+\xrightarrow[\text{Predicted cost}]{c}
+\fbox{Deterministic VSP solver}
+\xrightarrow[\text{Routes}]{y}
+```
+
+By training end-to-end with the deterministic solver, the linear model learns adjusted
+costs ``c`` that implicitly account for expected stochastic delays, while keeping
+the fast deterministic solver at inference time.
+
+**Model:** `Chain(Dense(20 -> 1; bias=false), vec)`: predicts one adjusted cost per edge.
+
+**Maximizer:** `StochasticVehicleSchedulingMaximizer`: HiGHS MIP solver on the
+deterministic VSP instance.
+
+!!! note "Reference"
+    [Learning to Approximate Industrial Problems by Operations Research Classic Problems](https://hal.science/hal-02396091/document)
+
+---
+
+*This page was generated using [Literate.jl](https://github.com/fredrikekre/Literate.jl).*
+

From f65748300d08ff6ee00c7a05ffe6e2b5e1c48a18 Mon Sep 17 00:00:00 2001
From: BatyLeo <leo.baty67@gmail.com>
Date: Tue, 5 May 2026 09:40:12 +0200
Subject: [PATCH 14/21] docs: split API into public methods and internals

---
 docs/make.jl                                  |   1 -
 docs/src/api.md                               |  80 ++-------
 .../dynamic/01_dynamic_assortment.md          | 155 -----------------
 docs/src/benchmarks/dynamic/02_maintenance.md | 144 ----------------
 docs/src/benchmarks/dynamic/03_dvsp.md        | 161 ------------------
 docs/src/benchmarks/static/01_argmax.md       | 115 -------------
 docs/src/benchmarks/static/02_argmax2d.md     | 104 -----------
 docs/src/benchmarks/static/03_ranking.md      | 104 -----------
 .../static/04_portfolio_optimization.md       | 118 -------------
 .../benchmarks/static/05_subset_selection.md  | 109 ------------
 .../static/06_fixed_size_shortest_path.md     | 115 -------------
 docs/src/benchmarks/static/07_warcraft.md     | 119 -------------
 .../01_contextual_stochastic_argmax.md        | 138 ---------------
 docs/src/benchmarks/stochastic/02_vsp.md      | 151 ----------------
 14 files changed, 18 insertions(+), 1596 deletions(-)
 delete mode 100644 docs/src/benchmarks/dynamic/01_dynamic_assortment.md
 delete mode 100644 docs/src/benchmarks/dynamic/02_maintenance.md
 delete mode 100644 docs/src/benchmarks/dynamic/03_dvsp.md
 delete mode 100644 docs/src/benchmarks/static/01_argmax.md
 delete mode 100644 docs/src/benchmarks/static/02_argmax2d.md
 delete mode 100644 docs/src/benchmarks/static/03_ranking.md
 delete mode 100644 docs/src/benchmarks/static/04_portfolio_optimization.md
 delete mode 100644 docs/src/benchmarks/static/05_subset_selection.md
 delete mode 100644 docs/src/benchmarks/static/06_fixed_size_shortest_path.md
 delete mode 100644 docs/src/benchmarks/static/07_warcraft.md
 delete mode 100644 docs/src/benchmarks/stochastic/01_contextual_stochastic_argmax.md
 delete mode 100644 docs/src/benchmarks/stochastic/02_vsp.md

diff --git a/docs/make.jl b/docs/make.jl
index b33f305..5b851fd 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -10,7 +10,6 @@ tutorial_files = readdir(tutorial_dir)
 md_tutorial_files = [split(file, ".")[1] * ".md" for file in tutorial_files]
 
 categories = [
-    "Toy problems" => "toy",
     "Static problems" => "static",
     "Stochastic problems" => "stochastic",
     "Dynamic problems" => "dynamic",
diff --git a/docs/src/api.md b/docs/src/api.md
index 0cff868..d615913 100644
--- a/docs/src/api.md
+++ b/docs/src/api.md
@@ -7,11 +7,6 @@ Modules = [DecisionFocusedLearningBenchmarks.Utils]
 Private = false
 ```
 
-```@autodocs
-Modules = [DecisionFocusedLearningBenchmarks.Utils]
-Public = false
-```
-
 ## Argmax2D
 
 ```@autodocs
@@ -19,11 +14,6 @@ Modules = [DecisionFocusedLearningBenchmarks.Argmax2D]
 Private = false
 ```
 
-```@autodocs
-Modules = [DecisionFocusedLearningBenchmarks.Argmax2D]
-Public = false
-```
-
 ## Argmax
 
 ```@autodocs
@@ -31,11 +21,6 @@ Modules = [DecisionFocusedLearningBenchmarks.Argmax]
 Private = false
 ```
 
-```@autodocs
-Modules = [DecisionFocusedLearningBenchmarks.Argmax]
-Public = false
-```
-
 ## Contextual Stochastic Argmax
 
 ```@autodocs
@@ -43,11 +28,6 @@ Modules = [DecisionFocusedLearningBenchmarks.ContextualStochasticArgmax]
 Private = false
 ```
 
-```@autodocs
-Modules = [DecisionFocusedLearningBenchmarks.ContextualStochasticArgmax]
-Public = false
-```
-
 ## Dynamic Vehicle Scheduling
 
 ```@autodocs
@@ -55,11 +35,6 @@ Modules = [DecisionFocusedLearningBenchmarks.DynamicVehicleScheduling]
 Private = false
 ```
 
-```@autodocs
-Modules = [DecisionFocusedLearningBenchmarks.DynamicVehicleScheduling]
-Public = false
-```
-
 ## Dynamic Assortment
 
 ```@autodocs
@@ -67,11 +42,6 @@ Modules = [DecisionFocusedLearningBenchmarks.DynamicAssortment]
 Private = false
 ```
 
-```@autodocs
-Modules = [DecisionFocusedLearningBenchmarks.DynamicAssortment]
-Public = false
-```
-
 ## Fixed-size shortest path
 
 ```@autodocs
@@ -79,11 +49,6 @@ Modules = [DecisionFocusedLearningBenchmarks.FixedSizeShortestPath]
 Private = false
 ```
 
-```@autodocs
-Modules = [DecisionFocusedLearningBenchmarks.FixedSizeShortestPath]
-Public = false
-```
-
 ## Maintenance
 
 ```@autodocs
@@ -91,11 +56,6 @@ Modules = [DecisionFocusedLearningBenchmarks.Maintenance]
 Private = false
 ```
 
-```@autodocs
-Modules = [DecisionFocusedLearningBenchmarks.Maintenance]
-Public = false
-```
-
 ## Portfolio Optimization
 
 ```@autodocs
@@ -103,11 +63,6 @@ Modules = [DecisionFocusedLearningBenchmarks.PortfolioOptimization]
 Private = false
 ```
 
-```@autodocs
-Modules = [DecisionFocusedLearningBenchmarks.PortfolioOptimization]
-Public = false
-```
-
 ## Ranking
 
 ```@autodocs
@@ -115,11 +70,6 @@ Modules = [DecisionFocusedLearningBenchmarks.Ranking]
 Private = false
 ```
 
-```@autodocs
-Modules = [DecisionFocusedLearningBenchmarks.Ranking]
-Public = false
-```
-
 ## Subset selection
 
 ```@autodocs
@@ -127,11 +77,6 @@ Modules = [DecisionFocusedLearningBenchmarks.SubsetSelection]
 Private = false
 ```
 
-```@autodocs
-Modules = [DecisionFocusedLearningBenchmarks.SubsetSelection]
-Public = false
-```
-
 ## Stochastic Vehicle Scheduling
 
 ```@autodocs
@@ -139,11 +84,6 @@ Modules = [DecisionFocusedLearningBenchmarks.StochasticVehicleScheduling]
 Private = false
 ```
 
-```@autodocs
-Modules = [DecisionFocusedLearningBenchmarks.StochasticVehicleScheduling]
-Public = false
-```
-
 ## Warcraft
 
 ```@autodocs
@@ -151,7 +91,23 @@ Modules = [DecisionFocusedLearningBenchmarks.Warcraft]
 Private = false
 ```
 
-```@autodocs
-Modules = [DecisionFocusedLearningBenchmarks.Warcraft]
+## Internals
+
+```@autodocs
+Modules = [
+    DecisionFocusedLearningBenchmarks.Utils,
+    DecisionFocusedLearningBenchmarks.Argmax,
+    DecisionFocusedLearningBenchmarks.Argmax2D,
+    DecisionFocusedLearningBenchmarks.ContextualStochasticArgmax,
+    DecisionFocusedLearningBenchmarks.DynamicVehicleScheduling,
+    DecisionFocusedLearningBenchmarks.DynamicAssortment,
+    DecisionFocusedLearningBenchmarks.FixedSizeShortestPath,
+    DecisionFocusedLearningBenchmarks.Maintenance,
+    DecisionFocusedLearningBenchmarks.PortfolioOptimization,
+    DecisionFocusedLearningBenchmarks.Ranking,
+    DecisionFocusedLearningBenchmarks.SubsetSelection,
+    DecisionFocusedLearningBenchmarks.StochasticVehicleScheduling,
+    DecisionFocusedLearningBenchmarks.Warcraft,
+]
 Public = false
 ```
diff --git a/docs/src/benchmarks/dynamic/01_dynamic_assortment.md b/docs/src/benchmarks/dynamic/01_dynamic_assortment.md
deleted file mode 100644
index 27820a2..0000000
--- a/docs/src/benchmarks/dynamic/01_dynamic_assortment.md
+++ /dev/null
@@ -1,155 +0,0 @@
-```@meta
-EditURL = "01_dynamic_assortment.jl"
-```
-
-# Dynamic Assortment
-Select which K items to offer at each step to maximize revenue: customer preferences
-evolve dynamically based on purchase history (hype and saturation effects).
-
-````@example 01_dynamic_assortment
-using DecisionFocusedLearningBenchmarks
-using Plots
-
-b = DynamicAssortmentBenchmark()
-````
-
-## Observable input
-
-Generate one environment and roll it out with the greedy policy to collect a sample
-trajectory. At each step the agent observes item prices, hype levels, saturation, and
-purchase history:
-
-````@example 01_dynamic_assortment
-policies = generate_baseline_policies(b)
-env = generate_environments(b, 1)[1]
-_, trajectory = evaluate_policy!(policies.greedy, env)
-````
-
-The observable state at step 1: item prices (fixed across steps):
-
-````@example 01_dynamic_assortment
-plot_instance(b, trajectory[1])
-````
-
-## A training sample
-
-Each step in a trajectory is a labeled tuple `(x, θ, y)` plus state and reward:
-- `x`: `(d+8) × N` feature matrix per step (prices, hype, saturation, history, time)
-- `θ`: predicted utility score per item
-- `y`: offered assortment at this step (BitVector of length N, true = offered)
-- `instance`: full state tuple (features matrix, purchase history)
-- `reward`: price of the purchased item (0 if no purchase)
-
-One step with the offered assortment highlighted (green = offered):
-
-````@example 01_dynamic_assortment
-plot_sample(b, trajectory[1])
-````
-
-A few steps side by side (prices are fixed; assortment composition changes over time):
-
-````@example 01_dynamic_assortment
-plot_trajectory(b, trajectory[1:min(4, length(trajectory))])
-````
-
-## DFL pipeline components
-
-The DFL agent chains two components: a neural network predicting utility scores per item:
-
-````@example 01_dynamic_assortment
-model = generate_statistical_model(b)     # MLP: state features → predicted utility per item
-````
-
-and a maximizer offering the K items with the highest predicted utilities:
-
-````@example 01_dynamic_assortment
-maximizer = generate_maximizer(b)         # top-K selection by predicted utility
-````
-
-At each step, the model maps the current state (prices, hype, saturation, history) to a
-utility score per item. The maximizer selects the K items with the highest scores.
-
----
-## Problem Description
-
-### Overview
-
-In the **Dynamic Assortment problem**, a retailer has ``N`` items and must select
-``K`` to offer at each time step. Customer preferences evolve based on purchase history
-through **hype** (recent purchases increase demand) and **saturation** (repeated
-purchases slightly decrease demand).
-
-### Mathematical Formulation
-
-**State** ``s_t = (p, f, h_t, \sigma_t, t, \mathcal{H}_t)`` where:
-- ``p``: fixed item prices
-- ``f``: static item features
-- ``h_t, \sigma_t``: current hype and saturation levels
-- ``t``: current time step
-- ``\mathcal{H}_t``: purchase history (last 5 purchases)
-
-**Action:** ``a_t \subseteq \{1,\ldots,N\}`` with ``|a_t| = K``
-
-**Customer choice** (multinomial logit):
-```math
-\mathbb{P}(i \mid a_t, s_t) = \frac{\exp(\theta_i(s_t))}{\sum_{j \in a_t} \exp(\theta_j(s_t)) + 1}
-```
-
-**Transition dynamics:**
-- Hype: ``h_{t+1}^{(i)} = h_t^{(i)} \times m^{(i)}`` where the multiplier reflects recent purchases
-- Saturation: increases by ×1.01 for the purchased item
-
-**Reward:** ``r(s_t, a_t) = p_{i^\star}`` (price of the purchased item, 0 if no purchase)
-
-**Objective:**
-```math
-\max_\pi \; \mathbb{E}\!\left[\sum_{t=1}^T r(s_t, \pi(s_t))\right]
-```
-
-## Key Components
-
-### [`DynamicAssortmentBenchmark`](@ref)
-
-| Parameter | Description | Default |
-|-----------|-------------|---------|
-| `N` | Number of items in catalog | 20 |
-| `d` | Static feature dimension per item | 2 |
-| `K` | Assortment size | 4 |
-| `max_steps` | Steps per episode | 80 |
-| `exogenous` | Whether dynamics are exogenous | `false` |
-
-### State Observation
-
-Agents observe a ``(d+8) \times N`` normalized feature matrix per step containing:
-current prices, hype, saturation, static features, change in hype/saturation from
-previous step and from initial state, and normalized time step.
-
-## Baseline Policies
-
-| Policy | Description |
-|--------|-------------|
-| Expert | Brute-force enumeration of all ``\binom{N}{K}`` subsets; optimal but slow |
-| Greedy | Selects the ``K`` items with highest prices |
-
-## DFL Policy
-
-```math
-\xrightarrow[\text{State}]{s_t}
-\fbox{Neural network $\varphi_w$}
-\xrightarrow[\text{Utilities}]{\theta \in \mathbb{R}^N}
-\fbox{Top-K}
-\xrightarrow[\text{Assortment}]{a_t}
-```
-
-**Model:** `Chain(Dense(d+8 → 5), Dense(5 → 1), vec)`: predicts one utility score
-per item from the current state features.
-
-**Maximizer:** `TopKMaximizer(K)`: selects the top ``K`` items by predicted utility.
-
-!!! note "Reference"
-    [Structured Reinforcement Learning for Combinatorial Decision-Making](https://arxiv.org/abs/2505.19053)
-
----
-
-*This page was generated using [Literate.jl](https://github.com/fredrikekre/Literate.jl).*
-
diff --git a/docs/src/benchmarks/dynamic/02_maintenance.md b/docs/src/benchmarks/dynamic/02_maintenance.md
deleted file mode 100644
index 89a7902..0000000
--- a/docs/src/benchmarks/dynamic/02_maintenance.md
+++ /dev/null
@@ -1,144 +0,0 @@
-```@meta
-EditURL = "02_maintenance.jl"
-```
-
-# Maintenance
-Decide which components to maintain at each step to minimize failure and maintenance costs:
-components degrade stochastically and the agent has limited maintenance capacity.
-
-````@example 02_maintenance
-using DecisionFocusedLearningBenchmarks
-using Plots
-
-b = MaintenanceBenchmark(; N=5, K=2)  # 5 components, maintain up to 2 per step
-````
-
-## Observable input
-
-Generate one environment and roll it out with the greedy policy to collect a sample
-trajectory. At each step the agent observes the degradation level of each component:
-
-````@example 02_maintenance
-policies = generate_baseline_policies(b)
-env = generate_environments(b, 1)[1]
-_, trajectory = evaluate_policy!(policies.greedy, env)
-````
-
-The observable state at step 1: degradation levels per component (1 = new, n = failed):
-
-````@example 02_maintenance
-plot_instance(b, trajectory[1])
-````
-
-## A training sample
-
-Each step in a trajectory is a labeled tuple `(x, θ, y)` plus state and reward:
-- `x`: degradation state vector (values in `1..n` per component)
-- `θ`: urgency score per component (predicted by model)
-- `y`: which components are maintained at this step (BitVector of length N)
-- `instance`: degradation state vector
-- `reward`: negative cost (maintenance and failure costs) at this step
-
-One step with maintenance decisions (green = maintained, red = failed):
-
-````@example 02_maintenance
-plot_sample(b, trajectory[1])
-````
-
-A few steps side by side showing degradation evolving over time:
-
-````@example 02_maintenance
-plot_trajectory(b, trajectory[1:min(4, length(trajectory))])
-````
-
-## DFL pipeline components
-
-The DFL agent chains two components: a neural network predicting urgency scores per component:
-
-````@example 02_maintenance
-model = generate_statistical_model(b)     # two-layer MLP: degradation state → urgency scores
-````
-
-and a maximizer selecting the most urgent components for maintenance:
-
-````@example 02_maintenance
-maximizer = generate_maximizer(b)         # top-K selection among components with positive scores
-````
-
-At each step, the model maps the current degradation state to an urgency score per component.
-The maximizer selects up to K components with the highest positive scores for maintenance.
-
----
-## Problem Description
-
-### Overview
-
-In the **Maintenance benchmark**, a system has ``N`` identical components, each with
-``n`` discrete degradation states (1 = new, ``n`` = failed). At each step, the agent
-can maintain up to ``K`` components. Maintained components are reset to state 1.
-Unmaintained components degrade stochastically.
-
-### Mathematical Formulation
-
-**State** ``s_t \in \{1,\ldots,n\}^N``: degradation level of each component.
-
-**Action** ``a_t \subseteq \{1,\ldots,N\}`` with ``|a_t| \leq K``
-
-**Transition dynamics:** For each component ``i``:
-- If maintained: ``s_{t+1}^i = 1``
-- If not maintained: ``s_{t+1}^i = \min(s_t^i + 1, n)`` with probability ``p``, else ``s_t^i``
-
-**Cost:**
-```math
-c(s_t, a_t) = c_m \cdot |a_t| + c_f \cdot \#\{i : s_t^i = n\}
-```
-
-**Objective:**
-```math
-\min_\pi \; \mathbb{E}\!\left[\sum_{t=1}^T c(s_t, \pi(s_t))\right]
-```
-
-## Key Components
-
-### [`MaintenanceBenchmark`](@ref)
-
-| Parameter | Description | Default |
-|-----------|-------------|---------|
-| `N` | Number of components | 2 |
-| `K` | Max simultaneous maintenance operations | 1 |
-| `n` | Degradation levels per component | 3 |
-| `p` | Degradation probability per step | 0.2 |
-| `c_f` | Failure cost per failed component | 10.0 |
-| `c_m` | Maintenance cost per maintained component | 3.0 |
-| `max_steps` | Steps per episode | 80 |
-
-### Instance Generation
-
-Each instance has random starting degradation states uniformly drawn from ``\{1,\ldots,n\}``.
-
-## Baseline Policies
-
-| Policy | Description |
-|--------|-------------|
-| Greedy | Maintains components in the last degradation state before failure, up to capacity |
-
-## DFL Policy
-
-```math
-\xrightarrow[\text{State}]{s_t \in \{1,\ldots,n\}^N}
-\fbox{Neural network $\varphi_w$}
-\xrightarrow[\text{Scores}]{\theta \in \mathbb{R}^N}
-\fbox{Top-K (positive)}
-\xrightarrow[\text{Maintenance}]{a_t}
-```
-
-**Model:** `Chain(Dense(N → N), Dense(N → N), vec)`: two-layer MLP predicting one
-urgency score per component.
-
-**Maximizer:** `TopKPositiveMaximizer(K)`: selects the ``K`` components with the
-highest positive scores for maintenance.
-
----
-
-*This page was generated using [Literate.jl](https://github.com/fredrikekre/Literate.jl).*
-
diff --git a/docs/src/benchmarks/dynamic/03_dvsp.md b/docs/src/benchmarks/dynamic/03_dvsp.md
deleted file mode 100644
index 0bb16e5..0000000
--- a/docs/src/benchmarks/dynamic/03_dvsp.md
+++ /dev/null
@@ -1,161 +0,0 @@
-```@meta
-EditURL = "03_dvsp.jl"
-```
-
-# Dynamic Vehicle Scheduling
-Dispatch vehicles to customers arriving over time: at each step the agent decides which
-customers to serve now and which to postpone, minimizing total travel cost.
-
-````@example 03_dvsp
-using DecisionFocusedLearningBenchmarks
-using Plots
-
-b = DynamicVehicleSchedulingBenchmark()
-````
-
-## Observable input
-
-Generate one environment and roll it out with the greedy policy to collect a sample
-trajectory. At each step the agent observes customer positions, start times, and which
-customers have reached their dispatch deadline:
-
-````@example 03_dvsp
-policies = generate_baseline_policies(b)
-env = generate_environments(b, 1)[1]
-_, trajectory = evaluate_policy!(policies.greedy, env)
-````
-
-The observable state at step 1: depot (green square), must-dispatch customers
-(red stars; deadline reached), postponable customers (blue triangles):
-
-````@example 03_dvsp
-plot_instance(b, trajectory[1])
-````
-
-## A training sample
-
-Each step in a trajectory is a labeled tuple `(x, θ, y)` plus state and reward:
-- `x`: 27-dimensional feature vector per customer (schedule slack, travel times, reachability)
-- `θ`: prize per customer (predicted by the model; used as optimization input)
-- `y`: routes dispatched at this step
-- `instance`: full DVSP state (customer positions, deadlines, current epoch)
-- `reward`: negative travel cost incurred at this step
-
-One step with dispatched routes:
-
-````@example 03_dvsp
-plot_sample(b, trajectory[1])
-````
-
-Multiple steps side by side: customers accumulate and routes change over time:
-
-````@example 03_dvsp
-plot_trajectory(b, trajectory[1:min(3, length(trajectory))])
-````
-
-## DFL pipeline components
-
-The DFL agent chains two components: a neural network predicting a prize per customer:
-
-````@example 03_dvsp
-model = generate_statistical_model(b)     # Dense(27 → 1) per customer: state features → prize
-````
-
-and a maximizer selecting routes that balance collected prizes against travel costs:
-
-````@example 03_dvsp
-maximizer = generate_maximizer(b)         # prize-collecting VSP solver
-````
-
-At each step, the model assigns a prize to each postponable customer. The solver then
-selects routes maximizing collected prizes minus travel costs, deciding which customers
-to serve now and which to defer.
-
----
-## Problem Description
-
-### Overview
-
-In the **Dynamic Vehicle Scheduling Problem (DVSP)**, a fleet operator must decide at
-each time step which customers to serve immediately and which to postpone. The goal is
-to serve all customers by end of the planning horizon while minimizing total travel time.
-
-The problem is characterized by:
-- **Exogenous noise**: customer arrivals are stochastic and follow a fixed distribution
-- **Combinatorial action space**: routes are built over a large set of customers
-
-### Mathematical Formulation
-
-**State** ``s_t = (R_t, D_t, t)`` where:
-- ``R_t``: pending customers, each with coordinates, start time, service time
-- ``D_t``: must-dispatch customers (cannot be postponed further)
-- ``t``: current time step
-
-**Action** ``a_t``: a set of vehicle routes ``\{r_1, r_2, \ldots, r_k\}``, each starting
-and ending at the depot, satisfying time constraints.
-
-**Reward:**
-```math
-r(s_t, a_t) = -\sum_{r \in a_t} \sum_{(i,j) \in r} d_{ij}
-```
-
-**Objective:**
-```math
-\max_\pi \; \mathbb{E}\!\left[\sum_{t=1}^T r(s_t, \pi(s_t))\right]
-```
-
-## Key Components
-
-### [`DynamicVehicleSchedulingBenchmark`](@ref)
-
-| Parameter | Description | Default |
-|-----------|-------------|---------|
-| `max_requests_per_epoch` | Maximum new customers per time step | 10 |
-| `Δ_dispatch` | Time delay between decision and dispatch | 1.0 |
-| `epoch_duration` | Duration of each time step | 1.0 |
-| `two_dimensional_features` | Use 2D instead of full 27D features | `false` |
-
-### Features
-
-**Full features (27D per customer):** start/end times, depot travel times, slack,
-reachability ratios, quantile-based travel times to other customers.
-
-**2D features:** travel time from depot + mean travel time to others.
-
-## Baseline Policies
-
-| Policy | Description |
-|--------|-------------|
-| Lazy | Postpones all possible customers; serves only must-dispatch |
-| Greedy | Serves all pending customers immediately |
-
-## DFL Policy
-
-```math
-\xrightarrow[\text{State}]{s_t}
-\fbox{Neural network $\varphi_w$}
-\xrightarrow[\text{Prizes}]{\theta}
-\fbox{Prize-collecting VSP}
-\xrightarrow[\text{Routes}]{a_t}
-```
-
-The neural network predicts a prize ``\theta_i`` for each postponable customer.
-The prize-collecting VSP solver then maximizes collected prizes minus travel costs:
-```math
-\max_{a_t \in \mathcal{A}(s_t)} \sum_{r \in a_t} \left(\sum_{i \in r} \theta_i - \sum_{(i,j) \in r} d_{ij}\right)
-```
-
-**Model:**
-- 2D features: `Dense(2 → 1)` applied independently per customer
-- Full features: `Dense(27 → 1)` applied independently per customer
-
-!!! note "Reference"
-    This problem is a simplified version of the
-    [EURO-NeurIPS challenge 2022](https://euro-neurips-vrp-2022.challenges.ortec.com/),
-    and solved using DFL in [Combinatorial Optimization enriched Machine Learning to solve the
-    Dynamic Vehicle Routing Problem with Time Windows](https://arxiv.org/abs/2304.00789).
-
----
-
-*This page was generated using [Literate.jl](https://github.com/fredrikekre/Literate.jl).*
-
diff --git a/docs/src/benchmarks/static/01_argmax.md b/docs/src/benchmarks/static/01_argmax.md
deleted file mode 100644
index 3cb4c08..0000000
--- a/docs/src/benchmarks/static/01_argmax.md
+++ /dev/null
@@ -1,115 +0,0 @@
-```@meta
-EditURL = "01_argmax.jl"
-```
-
-# Argmax
-Select the single best item from a set of `n` items, given features correlated with hidden
-item scores. This is a minimalist DFL setting: equivalent to multiclass
-classification, but with an argmax layer instead of softmax. Useful as a minimal sandbox for
-understanding DFL concepts.
-
-````@example 01_argmax
-using DecisionFocusedLearningBenchmarks
-using Plots
-using Statistics
-
-b = ArgmaxBenchmark(; seed=0)
-````
-
-## Observable input
-
-At inference time the decision-maker observes only a feature matrix `x`
-(rows = features, columns = items):
-
-````@example 01_argmax
-dataset = generate_dataset(b, 100; seed=0)
-sample = first(dataset)
-plot_instance(b, sample)
-````
-
-## A training sample
-
-Each sample is a labeled triple `(x, θ, y)`:
-- `x`: feature matrix (observable at train and test time)
-- `θ`: true item scores (training supervision only, hidden at test time)
-- `y`: optimal one-hot decision derived from `θ`
-
-The full training triple (features, true scores, and optimal decision):
-
-````@example 01_argmax
-plot_sample(b, sample)
-````
-
-## Untrained policy
-
-A DFL policy chains two components: a statistical model predicting scores from features:
-
-````@example 01_argmax
-model = generate_statistical_model(b)     # linear map: features → predicted scores
-````
-
-and a maximizer turning those scores into a decision:
-
-````@example 01_argmax
-maximizer = generate_maximizer(b)         # one-hot argmax
-````
-
-A randomly initialized policy makes essentially random decisions:
-
-````@example 01_argmax
-θ_pred = model(sample.x)
-y_pred = maximizer(θ_pred)
-````
-
-````@example 01_argmax
-plot_sample(b, DataSample(sample; θ=θ_pred, y=y_pred))
-````
-
-The goal of training is to find parameters that maximize accuracy.
-Current accuracy on the dataset:
-
-````@example 01_argmax
-mean(maximizer(model(s.x)) == s.y for s in dataset)
-````
-
----
-## Problem Description
-
-In the **Argmax benchmark**, a feature matrix ``x \in \mathbb{R}^{p \times n}`` is
-observed. A hidden linear encoder maps ``x`` to a score vector
-``\theta = \text{encoder}(x) \in \mathbb{R}^n``. The task is to select the item with
-the highest score:
-```math
-y = \mathrm{argmax}(\theta)
-```
-The solution ``y`` is encoded as a one-hot vector.
-The score vector ``\theta`` is never observed (only features ``x`` are available).
-The DFL pipeline trains a model ``f_w`` so that ``\mathrm{argmax}(f_w(x))`` matches
-``\mathrm{argmax}(\theta)`` at decision time.
-
-## Key Parameters
-
-| Parameter | Description | Default |
-|-----------|-------------|---------|
-| `instance_dim` | Number of items | 10 |
-| `nb_features` | Feature dimension `p` | 5 |
-
-## DFL Policy
-
-```math
-\xrightarrow[\text{Features}]{x \in \mathbb{R}^{p \times n}}
-\fbox{Linear model $f_w$}
-\xrightarrow[\text{Predicted scores}]{\theta \in \mathbb{R}^n}
-\fbox{argmax}
-\xrightarrow[\text{Selection}]{y \in \{0,1\}^n}
-```
-
-**Model:** `Chain(Dense(nb_features → 1; bias=false), vec)`: a single linear layer
-predicting one score per item.
-
-**Maximizer:** `one_hot_argmax`: returns a one-hot vector at the argmax index.
-
----
-
-*This page was generated using [Literate.jl](https://github.com/fredrikekre/Literate.jl).*
-
diff --git a/docs/src/benchmarks/static/02_argmax2d.md b/docs/src/benchmarks/static/02_argmax2d.md
deleted file mode 100644
index 4bab0ed..0000000
--- a/docs/src/benchmarks/static/02_argmax2d.md
+++ /dev/null
@@ -1,104 +0,0 @@
-```@meta
-EditURL = "02_argmax2d.jl"
-```
-
-# Argmax on a 2D polytope
-Select the best vertex of a random convex polytope in 2D: predict a cost direction θ from
-features, then return the vertex `v` maximizing `θᵀv`. The 2D setting makes this benchmark
-visual: the cost direction and selected vertex can be plotted directly, and the loss
-landscape can be shown as a contour plot over the 2D θ space.
-
-````@example 02_argmax2d
-using DecisionFocusedLearningBenchmarks
-using Plots
-
-b = Argmax2DBenchmark(; seed=0)
-````
-
-## Observable input
-
-At inference time the decision-maker observes the feature vector `x` and the polytope shape,
-but not the cost direction hidden `θ`:
-
-````@example 02_argmax2d
-dataset = generate_dataset(b, 50; seed=0)
-sample = first(dataset)
-plot_instance(b, sample)
-````
-
-## A training sample
-
-Each sample is a labeled triple `(x, θ, y)`:
-- `x`: feature vector (observable at train and test time)
-- `θ`: 2D cost direction (training supervision only, hidden at test time)
-- `y`: polytope vertex maximizing `θᵀv` (optimal decision)
-- `instance` (in `context`): polytope vertices (observable problem structure)
-
-The full training triple (polytope, cost direction θ, optimal vertex y):
-
-````@example 02_argmax2d
-plot_sample(b, sample)
-````
-
-## Untrained policy
-
-A DFL policy chains two components: a statistical model predicting a 2D cost direction:
-
-````@example 02_argmax2d
-model = generate_statistical_model(b)     # linear map: features → 2D cost vector
-````
-
-and a maximizer selecting the best polytope vertex for that direction:
-
-````@example 02_argmax2d
-maximizer = generate_maximizer(b)         # vertex maximizing θᵀv over polytope vertices
-````
-
-A randomly initialized policy predicts an arbitrary cost direction:
-
-````@example 02_argmax2d
-θ_pred = model(sample.x)
-y_pred = maximizer(θ_pred; sample.context...)
-plot_sample(b, DataSample(sample; θ=θ_pred, y=y_pred))
-````
-
----
-## Problem Description
-
-In the **Argmax2D benchmark**, each instance defines a random convex polytope
-``\mathcal{Y}(x) = \mathrm{conv}(v_1, \ldots, v_m)`` in ``\mathbb{R}^2``.
-A hidden encoder maps features ``x \in \mathbb{R}^p`` to a 2D cost vector
-``\theta \in \mathbb{R}^2``. The task is to find the polytope vertex maximizing
-the dot product:
-```math
-y^* = \mathrm{argmax}_{v \in \mathcal{Y}(x)} \; \theta^\top v
-```
-
-This is a toy 2D combinatorial optimization problem useful for visualizing
-how well a model learns the cost direction.
-
-## Key Parameters
-
-| Parameter | Description | Default |
-|-----------|-------------|---------|
-| `nb_features` | Feature dimension `p` | 5 |
-| `polytope_vertex_range` | Number of polytope vertices (list; one value drawn at random per instance) | `[6]` |
-
-## DFL Policy
-
-```math
-\xrightarrow[\text{Features}]{x}
-\fbox{Linear model}
-\xrightarrow{\theta \in \mathbb{R}^2}
-\fbox{Polytope argmax}
-\xrightarrow{y}
-```
-
-**Model:** `Dense(nb_features → 2; bias=false)` — predicts a 2D cost direction.
-
-**Maximizer:** finds the vertex of the instance polytope with maximum dot product with θ.
-
----
-
-*This page was generated using [Literate.jl](https://github.com/fredrikekre/Literate.jl).*
-
diff --git a/docs/src/benchmarks/static/03_ranking.md b/docs/src/benchmarks/static/03_ranking.md
deleted file mode 100644
index 5c2ced2..0000000
--- a/docs/src/benchmarks/static/03_ranking.md
+++ /dev/null
@@ -1,104 +0,0 @@
-```@meta
-EditURL = "03_ranking.jl"
-```
-
-# Ranking
-Rank a set of items. Each item has a hidden score, correlated with observable input
-features. The goal is to learn to sort items by their hidden scores, using observable
-features alone.
-
-````@example 03_ranking
-using DecisionFocusedLearningBenchmarks
-using Plots
-
-b = RankingBenchmark()
-````
-
-## Observable input
-
-At inference time the decision-maker observes only the feature matrix `x`
-(rows = features, columns = items):
-
-````@example 03_ranking
-dataset = generate_dataset(b, 50; seed=0)
-sample = first(dataset)
-plot_instance(b, sample)
-````
-
-## A training sample
-
-Each sample is a labeled triple `(x, θ, y)`:
-- `x`: feature matrix (rows = features, columns = items; observable at train and test time)
-- `θ`: true item costs (training supervision only, hidden at test time)
-- `y`: ordinal ranks derived from `θ` (`y[i] = 1` means item `i` has the lowest cost)
-
-The full training triple (features, true costs, and derived ranking):
-
-````@example 03_ranking
-plot_sample(b, sample)
-````
-
-## Untrained policy
-
-A DFL policy chains two components: a statistical model predicting item scores:
-
-````@example 03_ranking
-model = generate_statistical_model(b)     # linear map: features → predicted costs
-````
-
-and a maximizer ranking items by those scores:
-
-````@example 03_ranking
-maximizer = generate_maximizer(b)         # ordinal ranking via sortperm
-````
-
-A randomly initialized policy produces an arbitrary ranking:
-
-````@example 03_ranking
-θ_pred = model(sample.x)
-plot_sample(b, DataSample(; sample.context..., x=sample.x, θ=θ_pred, y=maximizer(θ_pred)))
-````
-
-Optimality gap on the dataset (0 = optimal, higher is worse):
-
-````@example 03_ranking
-compute_gap(b, dataset, model, maximizer)
-````
-
----
-## Problem Description
-
-In the **Ranking benchmark**, a feature matrix ``x \in \mathbb{R}^{p \times n}`` is
-observed. A hidden linear encoder maps ``x`` to a cost vector
-``\theta \in \mathbb{R}^n``. The task is to compute the ordinal ranking of the items
-by cost:
-```math
-y_i = \mathrm{rank}(\theta_i \mid \theta_1, \ldots, \theta_n)
-```
-where ``y_i = 1`` means item ``i`` has the highest cost.
-
-## Key Parameters
-
-| Parameter | Description | Default |
-|-----------|-------------|---------|
-| `instance_dim` | Number of items to rank | 10 |
-| `nb_features` | Feature dimension `p` | 5 |
-
-## DFL Policy
-
-```math
-\xrightarrow[\text{Features}]{x}
-\fbox{Linear model}
-\xrightarrow{\theta}
-\fbox{ranking}
-\xrightarrow{y}
-```
-
-**Model:** `Chain(Dense(nb_features → 1; bias=false), vec)` — predicts one score per item.
-
-**Maximizer:** `ranking(θ)` — returns a vector of ordinal ranks via `invperm(sortperm(θ))`.
-
----
-
-*This page was generated using [Literate.jl](https://github.com/fredrikekre/Literate.jl).*
-
diff --git a/docs/src/benchmarks/static/04_portfolio_optimization.md b/docs/src/benchmarks/static/04_portfolio_optimization.md
deleted file mode 100644
index f0e4c15..0000000
--- a/docs/src/benchmarks/static/04_portfolio_optimization.md
+++ /dev/null
@@ -1,118 +0,0 @@
-```@meta
-EditURL = "04_portfolio_optimization.jl"
-```
-
-# Portfolio Optimization
-Allocate wealth across assets to maximize expected return subject to a risk constraint:
-asset returns are unknown and must be predicted from contextual features.
-
-````@example 04_portfolio_optimization
-using DecisionFocusedLearningBenchmarks
-using Plots
-
-b = PortfolioOptimizationBenchmark()
-````
-
-## Observable input
-
-At inference time the decision-maker observes only the contextual feature vector `x`:
-
-````@example 04_portfolio_optimization
-dataset = generate_dataset(b, 20; seed=0)
-sample = first(dataset)
-plot_instance(b, sample)
-````
-
-## A training sample
-
-Each sample is a labeled triple `(x, θ, y)`:
-- `x`: contextual feature vector (observable at train and test time)
-- `θ`: true expected asset returns (training supervision only, hidden at test time)
-- `y`: optimal portfolio weights solving the Markowitz QP given `θ`
-
-Top: feature vector x. Bottom left: true returns θ. Bottom right: optimal weights y:
-
-````@example 04_portfolio_optimization
-plot_sample(b, sample)
-````
-
-## Untrained policy
-
-A DFL policy chains two components: a statistical model predicting expected asset returns:
-
-````@example 04_portfolio_optimization
-model = generate_statistical_model(b)     # linear map: features → predicted returns
-````
-
-and a maximizer allocating the optimal portfolio given those returns:
-
-````@example 04_portfolio_optimization
-maximizer = generate_maximizer(b)         # Markowitz QP solver (Ipopt via JuMP)
-````
-
-A randomly initialized policy predicts arbitrary returns, leading to a suboptimal allocation:
-
-````@example 04_portfolio_optimization
-θ_pred = model(sample.x)
-plot_sample(b, DataSample(; sample.context..., x=sample.x, θ=θ_pred, y=maximizer(θ_pred)))
-````
-
-Optimality gap on the dataset (0 = optimal, higher is worse):
-
-````@example 04_portfolio_optimization
-compute_gap(b, dataset, model, maximizer)
-````
-
----
-## Problem Description
-
-A **Markowitz portfolio optimization** problem where asset expected returns are unknown.
-Given contextual features ``x \in \mathbb{R}^p``, the learner predicts returns
-``\theta \in \mathbb{R}^d`` and solves:
-
-```math
-\begin{aligned}
-\max_{y} \quad & \theta^\top y \\
-\text{s.t.} \quad & y^\top \Sigma y \leq \gamma \\
-& \mathbf{1}^\top y \leq 1 \\
-& y \geq 0
-\end{aligned}
-```
-
-where ``\Sigma`` is the asset covariance matrix and ``\gamma`` is the risk budget.
-The solver uses [Ipopt.jl](https://github.com/jump-dev/Ipopt.jl) via JuMP.
-
-## Key Parameters
-
-| Parameter | Description | Default |
-|-----------|-------------|---------|
-| `d` | Number of assets | 50 |
-| `p` | Feature dimension | 5 |
-| `deg` | Polynomial degree for data generation | 1 |
-| `ν` | Noise hyperparameter | 1.0 |
-
-Data is generated following the process in
-[Mandi et al., 2023](https://arxiv.org/abs/2307.13565).
-
-## DFL Policy
-
-```math
-\xrightarrow[\text{Features}]{x \in \mathbb{R}^p}
-\fbox{Linear model}
-\xrightarrow[\text{Predicted returns}]{\hat{\theta} \in \mathbb{R}^d}
-\fbox{QP solver (Ipopt)}
-\xrightarrow[\text{Portfolio}]{y \in \mathbb{R}^d}
-```
-
-**Model:** `Dense(p → d)` — predicts one expected return per asset.
-
-**Maximizer:** Ipopt QP solver enforcing the variance and budget constraints.
-
-!!! note "Reference"
-    Mandi et al. (2023), Decision-Focused Learning: Foundations, State of the Art, Benchmark and Future Opportunities.
-    [arXiv:2307.13565](https://arxiv.org/abs/2307.13565)
-
----
-
-*This page was generated using [Literate.jl](https://github.com/fredrikekre/Literate.jl).*
-
diff --git a/docs/src/benchmarks/static/05_subset_selection.md b/docs/src/benchmarks/static/05_subset_selection.md
deleted file mode 100644
index a865f74..0000000
--- a/docs/src/benchmarks/static/05_subset_selection.md
+++ /dev/null
@@ -1,109 +0,0 @@
-```@meta
-EditURL = "05_subset_selection.jl"
-```
-
-# Subset Selection
-Select the `k` most valuable items from a set of `n`: items with unknown values
-must be identified from observable features alone.
-
-````@example 05_subset_selection
-using DecisionFocusedLearningBenchmarks
-using Plots
-
-b = SubsetSelectionBenchmark(; identity_mapping=false)
-````
-
-## Observable input
-
-At inference time the decision-maker observes only the feature vector `x`:
-
-````@example 05_subset_selection
-dataset = generate_dataset(b, 50; seed=0)
-sample = first(dataset)
-plot_instance(b, sample)
-````
-
-## A training sample
-
-Each sample is a labeled triple `(x, θ, y)`:
-- `x`: item feature vector (observable at train and test time)
-- `θ`: true item values, derived from `x` via a hidden encoder (training supervision only)
-- `y`: selection indicator (`y[i] = 1` for the `k` highest-value items, 0 otherwise)
-
-The full training triple (features, hidden values, and selection):
-
-````@example 05_subset_selection
-plot_sample(b, sample)
-````
-
-## Untrained policy
-
-A DFL policy chains two components: a statistical model predicting item scores:
-
-````@example 05_subset_selection
-model = generate_statistical_model(b)     # linear map: features → predicted item scores
-````
-
-and a maximizer selecting the top-k items by those scores:
-
-````@example 05_subset_selection
-maximizer = generate_maximizer(b)         # top-k selection
-````
-
-A randomly initialized policy selects items with no relation to their true values:
-
-````@example 05_subset_selection
-θ_pred = model(sample.x)
-plot_sample(b, DataSample(; sample.context..., x=sample.x, θ=θ_pred, y=maximizer(θ_pred)))
-````
-
-Optimality gap on the dataset (0 = optimal, higher is worse):
-
-````@example 05_subset_selection
-compute_gap(b, dataset, model, maximizer)
-````
-
----
-## Problem Description
-
-In the **Subset Selection benchmark**, ``n`` items have unknown values ``\theta_i``.
-A feature vector ``x \in \mathbb{R}^n`` is observed (identity mapping by default).
-The task is to select the ``k`` items with the highest values:
-```math
-y = \mathrm{top}_k(\theta)
-```
-where ``y \in \{0,1\}^n`` with exactly ``k`` ones.
-
-## Key Parameters
-
-| Parameter | Description | Default |
-|-----------|-------------|---------|
-| `n` | Total number of items | 25 |
-| `k` | Number of items to select | 5 |
-| `identity_mapping` | Use identity as the hidden mapping | `true` |
-
-When `identity_mapping=true`, features equal item values directly (`x = θ`).
-When `false`, a random linear layer is used as the hidden mapping.
-
-## DFL Policy
-
-```math
-\xrightarrow[\text{Features}]{x}
-\fbox{Linear model}
-\xrightarrow{\theta}
-\fbox{top-k}
-\xrightarrow{y}
-```
-
-**Model:** `Dense(n → n; bias=false)` — predicts a score per item.
-
-**Maximizer:** `top_k(θ, k)` — returns a boolean vector with `true` at the `k`
-highest-scoring positions.
-
-!!! note "Reference"
-    Setting from [Decision-Focused Learning: Foundations, State of the Art, Benchmark and Future Opportunities](https://arxiv.org/abs/2307.13565)
-
----
-
-*This page was generated using [Literate.jl](https://github.com/fredrikekre/Literate.jl).*
-
diff --git a/docs/src/benchmarks/static/06_fixed_size_shortest_path.md b/docs/src/benchmarks/static/06_fixed_size_shortest_path.md
deleted file mode 100644
index db7ea96..0000000
--- a/docs/src/benchmarks/static/06_fixed_size_shortest_path.md
+++ /dev/null
@@ -1,115 +0,0 @@
-```@meta
-EditURL = "06_fixed_size_shortest_path.jl"
-```
-
-# Shortest Path
-Find the cheapest path from the top-left to the bottom-right of a grid graph:
-edge costs are unknown and must be predicted from instance features.
-
-````@example 06_fixed_size_shortest_path
-using DecisionFocusedLearningBenchmarks
-using Plots
-
-b = FixedSizeShortestPathBenchmark()
-````
-
-## Observable input
-
-At inference time the decision-maker observes the feature vector `x` and the fixed grid
-structure (source top-left, sink bottom-right):
-
-````@example 06_fixed_size_shortest_path
-dataset = generate_dataset(b, 50; seed=0)
-sample = first(dataset)
-plot_instance(b, sample)
-````
-
-## A training sample
-
-Each sample is a labeled triple `(x, θ, y)`:
-- `x`: instance feature vector (observable at train and test time)
-- `θ`: true edge costs (training supervision only, hidden at test time)
-- `y`: path indicator vector (`y[e] = 1` if edge `e` is on the optimal path)
-
-Top: feature vector x. Bottom left: edge costs θ. Bottom right: optimal path y (white dots):
-
-````@example 06_fixed_size_shortest_path
-plot_sample(b, sample)
-````
-
-## Untrained policy
-
-A DFL policy chains two components: a statistical model predicting edge costs:
-
-````@example 06_fixed_size_shortest_path
-model = generate_statistical_model(b)     # linear map: features → predicted edge costs
-````
-
-and a maximizer finding the shortest path given those costs:
-
-````@example 06_fixed_size_shortest_path
-maximizer = generate_maximizer(b)         # Dijkstra shortest path on the grid graph
-````
-
-A randomly initialized policy predicts arbitrary costs, yielding a near-straight path:
-
-````@example 06_fixed_size_shortest_path
-θ_pred = model(sample.x)
-plot_sample(b, DataSample(; sample.context..., x=sample.x, θ=θ_pred, y=maximizer(θ_pred)))
-````
-
-Optimality gap on the dataset (0 = optimal, higher is worse):
-
-````@example 06_fixed_size_shortest_path
-compute_gap(b, dataset, model, maximizer)
-````
-
----
-## Problem Description
-
-A **fixed-size grid shortest path** problem. The graph is a directed acyclic grid of
-size ``(\text{rows} \times \text{cols})``, with edges pointing right and downward.
-Edge costs ``\theta \in \mathbb{R}^E`` are unknown; only a feature vector
-``x \in \mathbb{R}^p`` is observed. The task is to find the minimum-cost path from
-vertex 1 (top-left) to vertex ``V`` (bottom-right):
-```math
-y^* = \mathrm{argmin}_{y \in \mathcal{P}} \; \theta^\top y
-```
-where ``y \in \{0,1\}^E`` indicates selected edges and ``\mathcal{P}`` is the set of
-valid source-to-sink paths.
-
-Data is generated following the process in
-[Mandi et al., 2023](https://arxiv.org/abs/2307.13565).
-
-## Key Parameters
-
-| Parameter | Description | Default |
-|-----------|-------------|---------|
-| `grid_size` | Grid dimensions `(rows, cols)` | `(5, 5)` |
-| `p` | Feature dimension | 5 |
-| `deg` | Polynomial degree for cost generation | 1 |
-| `ν` | Multiplicative noise level (0 = no noise) | 0.0 |
-
-## DFL Policy
-
-```math
-\xrightarrow[\text{Features}]{x \in \mathbb{R}^p}
-\fbox{Linear model}
-\xrightarrow[\text{Predicted costs}]{\theta \in \mathbb{R}^E}
-\fbox{Dijkstra / Bellman-Ford}
-\xrightarrow[\text{Path}]{y \in \{0,1\}^E}
-```
-
-**Model:** `Chain(Dense(p → E))` — predicts one cost per edge.
-
-**Maximizer:** Dijkstra (default) or Bellman-Ford on negated weights to find the
-longest (maximum-weight) path.
-
-!!! note "Reference"
-    Mandi et al. (2023), Decision-Focused Learning: Foundations, State of the Art, Benchmark and Future Opportunities.
-    [arXiv:2307.13565](https://arxiv.org/abs/2307.13565)
-
----
-
-*This page was generated using [Literate.jl](https://github.com/fredrikekre/Literate.jl).*
-
diff --git a/docs/src/benchmarks/static/07_warcraft.md b/docs/src/benchmarks/static/07_warcraft.md
deleted file mode 100644
index c246cc9..0000000
--- a/docs/src/benchmarks/static/07_warcraft.md
+++ /dev/null
@@ -1,119 +0,0 @@
-```@meta
-EditURL = "07_warcraft.jl"
-```
-
-# Warcraft
-Find the cheapest path on a 12×12 terrain map: cell travel costs are unknown and must
-be inferred from the RGB terrain image using a neural network.
-
-````@example 07_warcraft
-using DecisionFocusedLearningBenchmarks
-using Plots
-
-b = WarcraftBenchmark()
-````
-
-## Observable input
-
-At inference time the decision-maker observes only the terrain image `x` (not the costs `θ`):
-
-````@example 07_warcraft
-sample = generate_dataset(b, 1)[1]
-plot_instance(b, sample)
-````
-
-## A training sample
-
-Each sample is a labeled triple `(x, θ, y)`:
-- `x`: terrain image (12×12×3 RGB array; observable at train and test time)
-- `θ`: true cell travel costs (training supervision only, hidden at test time)
-- `y`: optimal path indicator (`y[i,j] = 1` if cell `(i,j)` is on the path)
-
-Left: terrain image. Middle: true costs θ. Right: optimal path y:
-
-````@example 07_warcraft
-plot_sample(b, sample)
-````
-
-## Untrained policy
-
-A DFL policy chains two components: a CNN predicting cell travel costs from the terrain image:
-
-````@example 07_warcraft
-model = generate_statistical_model(b)     # ResNet18 CNN: terrain image → 12×12 cost map
-````
-
-and a maximizer finding the shortest path given those costs:
-
-````@example 07_warcraft
-maximizer = generate_maximizer(b)         # Dijkstra shortest path on the 12×12 grid
-````
-
-An untrained CNN produces a near-uniform cost map, yielding a near-straight path:
-
-````@example 07_warcraft
-θ_pred = model(sample.x)
-plot_sample(b, DataSample(; sample.context..., x=sample.x, θ=θ_pred, y=maximizer(θ_pred)))
-````
-
-Optimality gap on this sample (0 = optimal, higher is worse):
-
-````@example 07_warcraft
-compute_gap(b, [sample], model, maximizer)
-````
-
----
-## Problem Description
-
-In the **Warcraft benchmark**, each instance is a 12×12 grid representing a Warcraft
-terrain map. Each cell has an unknown travel cost depending on its terrain type (forest,
-mountain, water, etc.). The task is to find the path from the top-left to the
-bottom-right corner that minimizes total travel cost.
-
-Formally, let ``\theta_{ij}`` be the (unknown) cost of cell ``(i,j)`` and
-``y_{ij} \in \{0,1\}`` indicate whether cell ``(i,j)`` is on the path. The objective is:
-```math
-y^* = \mathrm{argmin}_{y \in \mathcal{P}} \sum_{(i,j)} \theta_{ij} \, y_{ij}
-```
-where ``\mathcal{P}`` is the set of valid grid paths (4-connected, source to sink).
-
-The dataset contains 10 000 labeled terrain images from the Warcraft II tileset.
-It is downloaded automatically on first use via
-[DataDeps.jl](https://github.com/oxinabox/DataDeps.jl).
-
-## Key Components
-
-**[`WarcraftBenchmark`](@ref)** has no parameters.
-
-| Method | Description |
-|--------|-------------|
-| `generate_dataset(b, n)` | Downloads and loads `n` terrain images with true costs and paths |
-| `generate_statistical_model(b)` | ResNet18 CNN (first 5 layers + adaptive maxpool + neg) |
-| `generate_maximizer(b; dijkstra=true)` | Dijkstra or Bellman-Ford shortest path |
-
-## DFL Policy
-
-```math
-\xrightarrow[\text{Terrain image}]{x \in \mathbb{R}^{12 \times 12 \times 3}}
-\fbox{ResNet18 CNN}
-\xrightarrow[\text{Cell costs}]{\theta \in \mathbb{R}^{12 \times 12}}
-\fbox{Dijkstra}
-\xrightarrow[\text{Path}]{y \in \{0,1\}^{12 \times 12}}
-```
-
-The CNN maps terrain pixel values to predicted cell costs, which are then passed to a
-shortest-path solver. Training end-to-end with
-[InferOpt.jl](https://github.com/JuliaDecisionFocusedLearning/InferOpt.jl) teaches
-the network to produce costs that lead to good paths, not just accurate cost estimates.
-
-!!! tip
-    See the [Warcraft tutorial](../../warcraft_tutorial.md) for a complete end-to-end training
-    example using `PerturbedMultiplicative` and `FenchelYoungLoss`.
-
-!!! note "Reference"
-    Vlastelica et al. (2020), Differentiation of Blackbox Combinatorial Solvers, ICLR 2020.
-
----
-
-*This page was generated using [Literate.jl](https://github.com/fredrikekre/Literate.jl).*
-
diff --git a/docs/src/benchmarks/stochastic/01_contextual_stochastic_argmax.md b/docs/src/benchmarks/stochastic/01_contextual_stochastic_argmax.md
deleted file mode 100644
index 7caf43d..0000000
--- a/docs/src/benchmarks/stochastic/01_contextual_stochastic_argmax.md
+++ /dev/null
@@ -1,138 +0,0 @@
-```@meta
-EditURL = "01_contextual_stochastic_argmax.jl"
-```
-
-# Contextual Stochastic Argmax
-Select the best item from a set of `n` items with stochastic utilities: each scenario draws
-a different utility vector, but utilities depend on observable context features. This is a
-toy benchmark designed so that a linear model can exactly recover the optimal
-context-to-utility mapping.
-
-````@example 01_contextual_stochastic_argmax
-using DecisionFocusedLearningBenchmarks
-using Plots
-
-b = ContextualStochasticArgmaxBenchmark()
-````
-
-`generate_dataset` returns unlabeled samples (`y = nothing`) for this benchmark.
-A `target_policy` must be provided to attach labels. Here we use the anticipative
-oracle: it returns the item with the highest realized utility for each scenario,
-giving one labeled sample per scenario per instance.
-
-````@example 01_contextual_stochastic_argmax
-anticipative = generate_anticipative_solver(b)
-policy =
-    (ctx, scenarios) -> [
-        DataSample(; ctx.context..., x=ctx.x, y=anticipative(ξ), extra=(; scenario=ξ))
-        for ξ in scenarios
-    ]
-dataset = generate_dataset(b, 20; target_policy=policy, seed=0)
-sample = first(dataset)
-````
-
-## Observable input
-
-At inference time the model observes `x = [c_base; x_raw]`. `plot_instance` shows both
-components: base utilities `c_base` (left) and context features `x_raw` (right):
-
-````@example 01_contextual_stochastic_argmax
-plot_instance(b, sample)
-````
-
-## A training sample
-
-Stochastic benchmarks have no single ground-truth label: the optimal item depends on
-which utility scenario is realized. We label each sample with the anticipative oracle,
-which returns the best item given the realized scenario ξ.
-
-Each labeled sample contains:
-- `x`: feature vector `[c_base; x_raw]` (observable at train and test time)
-- `y`: optimal item for the realized scenario ξ (one-hot; anticipative oracle label)
-- `extra.scenario`: realized utility vector ξ (available only during training)
-
-Top: feature vector x. Bottom: realized scenario ξ acting as the cost vector,
-with the anticipative-optimal item in red:
-
-````@example 01_contextual_stochastic_argmax
-plot_sample(b, DataSample(sample; θ=sample.scenario))
-````
-
-## Untrained policy
-
-A DFL policy chains two components: a statistical model predicting expected item utilities:
-
-````@example 01_contextual_stochastic_argmax
-model = generate_statistical_model(b)     # linear map: features → predicted expected utilities
-````
-
-and a maximizer selecting the item with the highest predicted utility:
-
-````@example 01_contextual_stochastic_argmax
-maximizer = generate_maximizer(b)         # one-hot argmax
-````
-
-A randomly initialized policy selects items with no relation to their expected utilities.
-Top: feature vector x. Bottom: predicted utilities θ̂ with the selected item in red:
-
-````@example 01_contextual_stochastic_argmax
-θ_pred = model(sample.x)
-plot_sample(b, DataSample(sample; θ=θ_pred, y=maximizer(θ_pred)))
-````
-
----
-## Problem Description
-
-### Overview
-
-In the **Contextual Stochastic Argmax benchmark**, ``n`` items have random utilities
-that depend on observable context. Per instance:
-- ``c_\text{base} \sim U[0,1]^n``: base utilities (stored in `context`)
-- ``x_\text{raw} \sim \mathcal{N}(0, I_d)``: observable context features
-- Full features: ``x = [c_\text{base}; x_\text{raw}] \in \mathbb{R}^{n+d}``
-
-The realized utility (scenario) is drawn as:
-```math
-\xi = c_\text{base} + W \, x_\text{raw} + \varepsilon, \quad \varepsilon \sim \mathcal{N}(0, \sigma^2 I)
-```
-where ``W \in \mathbb{R}^{n \times d}`` is a fixed unknown perturbation matrix.
-
-The task is to select the item with the highest realized utility:
-```math
-y^* = \mathrm{argmax}(\xi)
-```
-
-A linear model ``\theta = [I \mid W] \cdot x`` can exactly recover the optimal
-solution in expectation.
-
-## Key Parameters
-
-| Parameter | Description | Default |
-|-----------|-------------|---------|
-| `n` | Number of items | 10 |
-| `d` | Context feature dimension | 5 |
-| `noise_std` | Noise standard deviation σ | 0.1 |
-
-## Baseline Policies
-
-- **SAA**: selects the item with highest mean utility over available scenarios.
-
-## DFL Policy
-
-```math
-\xrightarrow[\text{Features}]{x = [c_\text{base}; x_\text{raw}]}
-\fbox{Linear model}
-\xrightarrow{\theta \in \mathbb{R}^n}
-\fbox{argmax}
-\xrightarrow{y}
-```
-
-**Model:** `Dense(n+d → n; bias=false)` — can in principle recover the exact mapping
-``[I \mid W]`` from training data.
-
-**Maximizer:** `one_hot_argmax`.
-
----
-
-*This page was generated using [Literate.jl](https://github.com/fredrikekre/Literate.jl).*
-
diff --git a/docs/src/benchmarks/stochastic/02_vsp.md b/docs/src/benchmarks/stochastic/02_vsp.md
deleted file mode 100644
index 1cf2512..0000000
--- a/docs/src/benchmarks/stochastic/02_vsp.md
+++ /dev/null
@@ -1,151 +0,0 @@
-```@meta
-EditURL = "02_vsp.jl"
-```
-
-# Stochastic Vehicle Scheduling
-Assign vehicles to cover a set of tasks while minimizing costs under stochastic delays:
-the DFL agent learns to predict adjusted costs that implicitly hedge against uncertainty.
-
-````@example 02_vsp
-using DecisionFocusedLearningBenchmarks
-using Plots
-
-b = StochasticVehicleSchedulingBenchmark()
-````
-
-## Observable input
-
-Each instance is a city with task locations and scheduled times. Task spatial positions
-and scheduled times are observable at inference time.
-`store_city=true` is required to visualize the map (not needed for training):
-
-````@example 02_vsp
-sample = generate_dataset(b, 1; store_city=true)[1]
-plot_instance(b, sample)
-````
-
-## A training sample
-
-Each sample is a labeled triple `(x, θ, y)`:
-- `x`: 20-dimensional feature vector per edge, encoding schedule slack and travel times
-- `θ`: adjusted edge costs (training supervision only, hidden at test time)
-- `y`: binary assignment (`y[(u,v)] = 1` if a vehicle travels edge `(u, v)` in the schedule)
-
-Unlike static benchmarks, `y` labels are not available by default and must be attached
-via a `target_policy` (e.g., the deterministic VSP solver). Routes are visualized
-in the untrained policy section below.
-
-## Untrained policy
-
-A DFL policy chains two components: a statistical model predicting adjusted edge costs:
-
-````@example 02_vsp
-model = generate_statistical_model(b)     # linear map: task features -> adjusted edge costs
-````
-
-and a maximizer solving the deterministic VSP given those costs:
-
-````@example 02_vsp
-maximizer = generate_maximizer(b)         # deterministic VSP solver (HiGHS MIP)
-````
-
-The untrained model predicts random edge costs; the resulting schedule is arbitrary:
-
-````@example 02_vsp
-θ_pred = model(sample.x)
-y_pred = maximizer(θ_pred; sample.context...)
-plot_sample(
-    b, DataSample(; sample.context..., x=sample.x, θ=θ_pred, y=y_pred, extra=sample.extra)
-)
-````
-
----
-## Problem Description
-
-### Overview
-
-In the **Vehicle Scheduling Problem (VSP)**, we consider a set of tasks ``V``. Each
-task ``v \in V`` has a scheduled beginning time ``t_v^b`` and end time ``t_v^e``, with
-``t_v^e > t_v^b``. We denote ``t^{tr}_{(u,v)}`` the travel time from task ``u`` to task
-``v``. A task ``v`` can follow ``u`` only if:
-```math
-t_v^b \geq t_u^e + t^{tr}_{(u,v)}
-```
-
-An instance of VSP can be modeled as an acyclic directed graph where nodes are tasks
-and edges represent feasible successions. A solution is a set of disjoint paths such
-that all tasks are fulfilled exactly once to minimize total costs.
-
-In the **Stochastic VSP (StoVSP)**, after the scheduling decision is set, random delays
-propagate along vehicle tours. The objective becomes minimizing base costs plus expected
-total delay costs over scenarios.
-
-### Mathematical Formulation
-
-**Variables:** Let ``y_{u,v} \in \{0,1\}`` indicate if a vehicle performs task ``v``
-immediately after task ``u``.
-
-**Delay Propagation:** For each task ``v`` in scenario ``s``:
-- ``\gamma_v^s``: intrinsic delay of task ``v``
-- ``d_v^s``: total accumulated delay
-- ``\delta_{u,v}^s = t_v^b - (t_u^e + t^{tr}_{(u,v)})``: slack time
-
-```math
-d_v^s = \gamma_v^s + \max(d_u^s - \delta_{u,v}^s,\; 0)
-```
-
-**Objective:**
-```math
-\min_{y} \; \sum_{(u,v)} c_{u,v} \, y_{u,v} + \mathbb{E}_{s \in S}\!\left[\sum_v C_d \, d_v^s\right]
-```
-
-## Key Components
-
-### [`StochasticVehicleSchedulingBenchmark`](@ref)
-
-| Parameter | Description | Default |
-|-----------|-------------|---------|
-| `nb_tasks` | Number of tasks per instance | 25 |
-| `nb_scenarios` | Number of scenarios for objective evaluation | 10 |
-
-### Instance Generation
-
-Each instance simulates a geographic city with depots and task locations. Tasks have
-realistic scheduled start/end times. Scenarios are random intrinsic delays ``\gamma``
-drawn from a Log-Normal distribution. Feature vectors are 20-dimensional.
-
-## Baseline Policies
-
-| Policy | Description |
-|--------|-------------|
-| `svs_deterministic_policy` | Solves the deterministic VSP, ignoring delays |
-| `svs_saa_policy` | SAA via column generation over ``K`` scenarios |
-| `svs_saa_mip_policy` | Exact SAA via compact MIP formulation |
-| `svs_local_search_policy` | Heuristic local search over sampled scenarios |
-
-## DFL Policy
-
-```math
-\xrightarrow[\text{Features}]{x \in \mathbb{R}^{20}}
-\fbox{Linear model $\varphi_w$}
-\xrightarrow[\text{Predicted cost}]{c}
-\fbox{Deterministic VSP solver}
-\xrightarrow[\text{Routes}]{y}
-```
-
-By training end-to-end with the deterministic solver, the linear model learns adjusted
-costs ``c`` that implicitly account for expected stochastic delays, while keeping
-the fast deterministic solver at inference time.
-
-**Model:** `Chain(Dense(20 -> 1; bias=false), vec)`: predicts one adjusted cost per edge.
-
-**Maximizer:** `StochasticVehicleSchedulingMaximizer`: HiGHS MIP solver on the
-deterministic VSP instance.
-
-!!! note "Reference"
-    [Learning to Approximate Industrial Problems by Operations Research Classic Problems](https://hal.science/hal-02396091/document)
-
----
-
-*This page was generated using [Literate.jl](https://github.com/fredrikekre/Literate.jl).*
-

From 37bd38809f3750b5d691387d20ebe5a62e589d99 Mon Sep 17 00:00:00 2001
From: BatyLeo <leo.baty67@gmail.com>
Date: Tue, 5 May 2026 09:41:33 +0200
Subject: [PATCH 15/21] docs: rename API headers

---
 docs/src/api.md | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/docs/src/api.md b/docs/src/api.md
index d615913..873c3ab 100644
--- a/docs/src/api.md
+++ b/docs/src/api.md
@@ -1,90 +1,92 @@
 # API Reference
 
-## Interface
+## Public
+
+### Interface
 
 ```@autodocs
 Modules = [DecisionFocusedLearningBenchmarks.Utils]
 Private = false
 ```
 
-## Argmax2D
+### Argmax2D
 
 ```@autodocs
 Modules = [DecisionFocusedLearningBenchmarks.Argmax2D]
 Private = false
 ```
 
-## Argmax
+### Argmax
 
 ```@autodocs
 Modules = [DecisionFocusedLearningBenchmarks.Argmax]
 Private = false
 ```
 
-## Contextual Stochastic Argmax
+### Contextual Stochastic Argmax
 
 ```@autodocs
 Modules = [DecisionFocusedLearningBenchmarks.ContextualStochasticArgmax]
 Private = false
 ```
 
-## Dynamic Vehicle Scheduling
+### Dynamic Vehicle Scheduling
 
 ```@autodocs
 Modules = [DecisionFocusedLearningBenchmarks.DynamicVehicleScheduling]
 Private = false
 ```
 
-## Dynamic Assortment
+### Dynamic Assortment
 
 ```@autodocs
 Modules = [DecisionFocusedLearningBenchmarks.DynamicAssortment]
 Private = false
 ```
 
-## Fixed-size shortest path
+### Fixed-size shortest path
 
 ```@autodocs
 Modules = [DecisionFocusedLearningBenchmarks.FixedSizeShortestPath]
 Private = false
 ```
 
-## Maintenance
+### Maintenance
 
 ```@autodocs
 Modules = [DecisionFocusedLearningBenchmarks.Maintenance]
 Private = false
 ```
 
-## Portfolio Optimization
+### Portfolio Optimization
 
 ```@autodocs
 Modules = [DecisionFocusedLearningBenchmarks.PortfolioOptimization]
 Private = false
 ```
 
-## Ranking
+### Ranking
 
 ```@autodocs
 Modules = [DecisionFocusedLearningBenchmarks.Ranking]
 Private = false
 ```
 
-## Subset selection
+### Subset selection
 
 ```@autodocs
 Modules = [DecisionFocusedLearningBenchmarks.SubsetSelection]
 Private = false
 ```
 
-## Stochastic Vehicle Scheduling
+### Stochastic Vehicle Scheduling
 
 ```@autodocs
 Modules = [DecisionFocusedLearningBenchmarks.StochasticVehicleScheduling]
 Private = false
 ```
 
-## Warcraft
+### Warcraft
 
 ```@autodocs
 Modules = [DecisionFocusedLearningBenchmarks.Warcraft]

From fcdc40f4f04f093ba414dde39308edd4c905633d Mon Sep 17 00:00:00 2001
From: BatyLeo <leo.baty67@gmail.com>
Date: Tue, 5 May 2026 10:31:55 +0200
Subject: [PATCH 16/21] BREAKING CHANGE: rename plot_instance to plot_context

---
 docs/src/benchmarks/dynamic/01_dynamic_assortment.jl      | 2 +-
 docs/src/benchmarks/dynamic/02_maintenance.jl             | 2 +-
 docs/src/benchmarks/dynamic/03_dvsp.jl                    | 2 +-
 docs/src/benchmarks/static/01_argmax.jl                   | 2 +-
 docs/src/benchmarks/static/02_argmax2d.jl                 | 2 +-
 docs/src/benchmarks/static/03_ranking.jl                  | 2 +-
 docs/src/benchmarks/static/04_portfolio_optimization.jl   | 2 +-
 docs/src/benchmarks/static/05_subset_selection.jl         | 2 +-
 docs/src/benchmarks/static/06_fixed_size_shortest_path.jl | 2 +-
 docs/src/benchmarks/static/07_warcraft.jl                 | 2 +-
 .../stochastic/01_contextual_stochastic_argmax.jl         | 4 ++--
 docs/src/benchmarks/stochastic/02_vsp.jl                  | 2 +-
 docs/src/custom_benchmarks.md                             | 2 +-
 docs/src/using_benchmarks.md                              | 4 ++--
 ext/DFLBenchmarksPlotsExt.jl                              | 2 +-
 ext/plots/argmax2d_plots.jl                               | 2 +-
 ext/plots/argmax_plots.jl                                 | 2 +-
 ext/plots/contextual_stochastic_argmax_plots.jl           | 2 +-
 ext/plots/dvs_plots.jl                                    | 2 +-
 ext/plots/dynamic_assortment_plots.jl                     | 2 +-
 ext/plots/maintenance_plots.jl                            | 2 +-
 ext/plots/portfolio_plots.jl                              | 2 +-
 ext/plots/ranking_plots.jl                                | 2 +-
 ext/plots/shortest_path_plots.jl                          | 2 +-
 ext/plots/subset_selection_plots.jl                       | 2 +-
 ext/plots/svs_plots.jl                                    | 4 +---
 ext/plots/warcraft_plots.jl                               | 2 +-
 src/DecisionFocusedLearningBenchmarks.jl                  | 2 +-
 src/DynamicVehicleScheduling/plot.jl                      | 2 +-
 src/Utils/Utils.jl                                        | 2 +-
 src/Utils/interface/abstract_benchmark.jl                 | 8 ++++----
 src/Utils/interface/static_benchmark.jl                   | 2 +-
 test/argmax.jl                                            | 2 +-
 test/argmax_2d.jl                                         | 2 +-
 test/contextual_stochastic_argmax.jl                      | 2 +-
 test/dynamic_assortment.jl                                | 2 +-
 test/dynamic_vsp_plots.jl                                 | 4 ++--
 test/fixed_size_shortest_path.jl                          | 2 +-
 test/maintenance.jl                                       | 2 +-
 test/portfolio_optimization.jl                            | 2 +-
 test/ranking.jl                                           | 2 +-
 test/subset_selection.jl                                  | 2 +-
 test/vsp.jl                                               | 2 +-
 test/warcraft.jl                                          | 2 +-
 44 files changed, 50 insertions(+), 52 deletions(-)

diff --git a/docs/src/benchmarks/dynamic/01_dynamic_assortment.jl b/docs/src/benchmarks/dynamic/01_dynamic_assortment.jl
index d230c44..f0bf611 100644
--- a/docs/src/benchmarks/dynamic/01_dynamic_assortment.jl
+++ b/docs/src/benchmarks/dynamic/01_dynamic_assortment.jl
@@ -17,7 +17,7 @@ env = generate_environments(b, 1)[1]
 _, trajectory = evaluate_policy!(policies.greedy, env)
 
 # The observable state at step 1: item prices (fixed across steps):
-plot_instance(b, trajectory[1])
+plot_context(b, trajectory[1])
 
 # ## A training sample
 #
diff --git a/docs/src/benchmarks/dynamic/02_maintenance.jl b/docs/src/benchmarks/dynamic/02_maintenance.jl
index 7b542f0..be710aa 100644
--- a/docs/src/benchmarks/dynamic/02_maintenance.jl
+++ b/docs/src/benchmarks/dynamic/02_maintenance.jl
@@ -16,7 +16,7 @@ env = generate_environments(b, 1)[1]
 _, trajectory = evaluate_policy!(policies.greedy, env)
 
 # The observable state at step 1: degradation levels per component (1 = new, n = failed):
-plot_instance(b, trajectory[1])
+plot_context(b, trajectory[1])
 
 # ## A training sample
 #
diff --git a/docs/src/benchmarks/dynamic/03_dvsp.jl b/docs/src/benchmarks/dynamic/03_dvsp.jl
index bbc87fe..c40025f 100644
--- a/docs/src/benchmarks/dynamic/03_dvsp.jl
+++ b/docs/src/benchmarks/dynamic/03_dvsp.jl
@@ -18,7 +18,7 @@ _, trajectory = evaluate_policy!(policies.greedy, env)
 
 # The observable state at step 1: depot (green square), must-dispatch customers
 # (red stars; deadline reached), postponable customers (blue triangles):
-plot_instance(b, trajectory[1])
+plot_context(b, trajectory[1])
 
 # ## A training sample
 #
diff --git a/docs/src/benchmarks/static/01_argmax.jl b/docs/src/benchmarks/static/01_argmax.jl
index ae00f8e..1071c24 100644
--- a/docs/src/benchmarks/static/01_argmax.jl
+++ b/docs/src/benchmarks/static/01_argmax.jl
@@ -16,7 +16,7 @@ b = ArgmaxBenchmark(; seed=0)
 # (rows = features, columns = items):
 dataset = generate_dataset(b, 100; seed=0)
 sample = first(dataset)
-plot_instance(b, sample)
+plot_context(b, sample)
 
 # ## A training sample
 #
diff --git a/docs/src/benchmarks/static/02_argmax2d.jl b/docs/src/benchmarks/static/02_argmax2d.jl
index 7bbabd3..3e09428 100644
--- a/docs/src/benchmarks/static/02_argmax2d.jl
+++ b/docs/src/benchmarks/static/02_argmax2d.jl
@@ -15,7 +15,7 @@ b = Argmax2DBenchmark(; seed=0)
 # but not the cost direction hidden `θ`:
 dataset = generate_dataset(b, 50; seed=0)
 sample = first(dataset)
-plot_instance(b, sample)
+plot_context(b, sample)
 
 # ## A training sample
 #
diff --git a/docs/src/benchmarks/static/03_ranking.jl b/docs/src/benchmarks/static/03_ranking.jl
index c5c7092..9dda421 100644
--- a/docs/src/benchmarks/static/03_ranking.jl
+++ b/docs/src/benchmarks/static/03_ranking.jl
@@ -14,7 +14,7 @@ b = RankingBenchmark()
 # (rows = features, columns = items):
 dataset = generate_dataset(b, 50; seed=0)
 sample = first(dataset)
-plot_instance(b, sample)
+plot_context(b, sample)
 
 # ## A training sample
 #
diff --git a/docs/src/benchmarks/static/04_portfolio_optimization.jl b/docs/src/benchmarks/static/04_portfolio_optimization.jl
index d85bcf4..d953ca4 100644
--- a/docs/src/benchmarks/static/04_portfolio_optimization.jl
+++ b/docs/src/benchmarks/static/04_portfolio_optimization.jl
@@ -12,7 +12,7 @@ b = PortfolioOptimizationBenchmark()
 # At inference time the decision-maker observes only the contextual feature vector `x`:
 dataset = generate_dataset(b, 20; seed=0)
 sample = first(dataset)
-plot_instance(b, sample)
+plot_context(b, sample)
 
 # ## A training sample
 #
diff --git a/docs/src/benchmarks/static/05_subset_selection.jl b/docs/src/benchmarks/static/05_subset_selection.jl
index 1bd7c2b..afad90b 100644
--- a/docs/src/benchmarks/static/05_subset_selection.jl
+++ b/docs/src/benchmarks/static/05_subset_selection.jl
@@ -12,7 +12,7 @@ b = SubsetSelectionBenchmark(; identity_mapping=false)
 # At inference time the decision-maker observes only the feature vector `x`:
 dataset = generate_dataset(b, 50; seed=0)
 sample = first(dataset)
-plot_instance(b, sample)
+plot_context(b, sample)
 
 # ## A training sample
 #
diff --git a/docs/src/benchmarks/static/06_fixed_size_shortest_path.jl b/docs/src/benchmarks/static/06_fixed_size_shortest_path.jl
index b7d170b..673aa3c 100644
--- a/docs/src/benchmarks/static/06_fixed_size_shortest_path.jl
+++ b/docs/src/benchmarks/static/06_fixed_size_shortest_path.jl
@@ -13,7 +13,7 @@ b = FixedSizeShortestPathBenchmark()
 # structure (source top-left, sink bottom-right):
 dataset = generate_dataset(b, 50; seed=0)
 sample = first(dataset)
-plot_instance(b, sample)
+plot_context(b, sample)
 
 # ## A training sample
 #
diff --git a/docs/src/benchmarks/static/07_warcraft.jl b/docs/src/benchmarks/static/07_warcraft.jl
index 0bbb5eb..dd003b0 100644
--- a/docs/src/benchmarks/static/07_warcraft.jl
+++ b/docs/src/benchmarks/static/07_warcraft.jl
@@ -11,7 +11,7 @@ b = WarcraftBenchmark()
 #
 # At inference time the decision-maker observes only the terrain image `x` (not the costs `θ`):
 sample = generate_dataset(b, 1)[1]
-plot_instance(b, sample)
+plot_context(b, sample)
 
 # ## A training sample
 #
diff --git a/docs/src/benchmarks/stochastic/01_contextual_stochastic_argmax.jl b/docs/src/benchmarks/stochastic/01_contextual_stochastic_argmax.jl
index 32971be..3848164 100644
--- a/docs/src/benchmarks/stochastic/01_contextual_stochastic_argmax.jl
+++ b/docs/src/benchmarks/stochastic/01_contextual_stochastic_argmax.jl
@@ -24,9 +24,9 @@ sample = first(dataset)
 
 # ## Observable input
 #
-# At inference time the model observes `x = [c_base; x_raw]`. `plot_instance` shows both
+# At inference time the model observes `x = [c_base; x_raw]`. `plot_context` shows both
 # components: base utilities `c_base` (left) and context features `x_raw` (right):
-plot_instance(b, sample)
+plot_context(b, sample)
 
 # ## A training sample
 #
diff --git a/docs/src/benchmarks/stochastic/02_vsp.jl b/docs/src/benchmarks/stochastic/02_vsp.jl
index 1fc4155..3cf3d55 100644
--- a/docs/src/benchmarks/stochastic/02_vsp.jl
+++ b/docs/src/benchmarks/stochastic/02_vsp.jl
@@ -13,7 +13,7 @@ b = StochasticVehicleSchedulingBenchmark()
 # and scheduled times are observable at inference time.
 # `store_city=true` is required to visualize the map (not needed for training):
 sample = generate_dataset(b, 1; store_city=true)[1]
-plot_instance(b, sample)
+plot_context(b, sample)
 
 # ## A training sample
 #
diff --git a/docs/src/custom_benchmarks.md b/docs/src/custom_benchmarks.md
index 831895f..90a12c0 100644
--- a/docs/src/custom_benchmarks.md
+++ b/docs/src/custom_benchmarks.md
@@ -75,7 +75,7 @@ is_minimization_problem(bench::MyBenchmark) -> Bool   # default: true (minimizat
 objective_value(bench::MyBenchmark, sample::DataSample, y) -> Real
 compute_gap(bench::MyBenchmark, dataset, model, maximizer) -> Float64
 has_visualization(bench::MyBenchmark) -> Bool                            # default: false; return true when plot methods are implemented/available
-plot_instance(bench::MyBenchmark, sample::DataSample; kwargs...)
+plot_context(bench::MyBenchmark, sample::DataSample; kwargs...)
 plot_sample(bench::MyBenchmark, sample::DataSample; kwargs...)
 ```
 
diff --git a/docs/src/using_benchmarks.md b/docs/src/using_benchmarks.md
index bc394bd..e64f6bb 100644
--- a/docs/src/using_benchmarks.md
+++ b/docs/src/using_benchmarks.md
@@ -190,7 +190,7 @@ dataset = generate_dataset(bench, 10)
 sample = dataset[1]
 
 has_visualization(bench)           # true
-plot_instance(bench, sample)       # problem geometry only
+plot_context(bench, sample)       # problem geometry only
 plot_sample(bench, sample)       # sample.y overlaid on the instance
 plot_sample(bench, sample, y)    # convenience 3-arg form: override y before plotting
 
@@ -202,7 +202,7 @@ gif(anim, "episode.gif")
 ```
 
 - `has_visualization(bench)`: returns `true` for benchmarks that implement plot support (if Plots is loaded).
-- `plot_instance(bench, sample; kwargs...)`: renders the problem geometry without any solution.
+- `plot_context(bench, sample; kwargs...)`: renders the problem geometry without any solution.
 - `plot_sample(bench, sample; kwargs...)`: renders `sample.y` overlaid on the instance.
 - `plot_sample(bench, sample, y; kwargs...)`: 3-arg convenience form that overrides `y` before plotting.
 - `plot_trajectory(bench, traj; kwargs...)`: dynamic benchmarks only; produces a grid of per-epoch subplots.
diff --git a/ext/DFLBenchmarksPlotsExt.jl b/ext/DFLBenchmarksPlotsExt.jl
index a727d85..bed6b77 100644
--- a/ext/DFLBenchmarksPlotsExt.jl
+++ b/ext/DFLBenchmarksPlotsExt.jl
@@ -5,7 +5,7 @@ using DocStringExtensions: TYPEDSIGNATURES
 using LaTeXStrings: @L_str
 using Plots
 import DecisionFocusedLearningBenchmarks:
-    has_visualization, plot_instance, plot_sample, plot_trajectory, animate_trajectory
+    has_visualization, plot_context, plot_sample, plot_trajectory, animate_trajectory
 
 include("plots/argmax_plots.jl")
 include("plots/argmax2d_plots.jl")
diff --git a/ext/plots/argmax2d_plots.jl b/ext/plots/argmax2d_plots.jl
index 8454fb8..6f4d2d8 100644
--- a/ext/plots/argmax2d_plots.jl
+++ b/ext/plots/argmax2d_plots.jl
@@ -45,7 +45,7 @@ end
 
 has_visualization(::Argmax2DBenchmark) = true
 
-function plot_instance(::Argmax2DBenchmark, sample::DataSample; kwargs...)
+function plot_context(::Argmax2DBenchmark, sample::DataSample; kwargs...)
     pl = _init_plot(; kwargs...)
     _plot_polytope!(pl, sample.instance)
     return pl
diff --git a/ext/plots/argmax_plots.jl b/ext/plots/argmax_plots.jl
index 94a104f..5b4ba13 100644
--- a/ext/plots/argmax_plots.jl
+++ b/ext/plots/argmax_plots.jl
@@ -5,7 +5,7 @@ $TYPEDSIGNATURES
 
 Plot the input features as a heatmap. Columns correspond to items, rows correspond to features.
 """
-function plot_instance(::ArgmaxBenchmark, sample::DataSample; kwargs...)
+function plot_context(::ArgmaxBenchmark, sample::DataSample; kwargs...)
     x = sample.x  # nb_features × n
     n = size(x, 2)
     return Plots.heatmap(
diff --git a/ext/plots/contextual_stochastic_argmax_plots.jl b/ext/plots/contextual_stochastic_argmax_plots.jl
index ac0b85c..c62234e 100644
--- a/ext/plots/contextual_stochastic_argmax_plots.jl
+++ b/ext/plots/contextual_stochastic_argmax_plots.jl
@@ -1,6 +1,6 @@
 has_visualization(::ContextualStochasticArgmaxBenchmark) = true
 
-function plot_instance(::ContextualStochasticArgmaxBenchmark, sample::DataSample; kwargs...)
+function plot_context(::ContextualStochasticArgmaxBenchmark, sample::DataSample; kwargs...)
     c_base = sample.c_base  # base utilities (first n components of x)
     x_raw = sample.x_raw    # context features (last d components of x)
     n = length(c_base)
diff --git a/ext/plots/dvs_plots.jl b/ext/plots/dvs_plots.jl
index 4bed8a8..ec90e2e 100644
--- a/ext/plots/dvs_plots.jl
+++ b/ext/plots/dvs_plots.jl
@@ -194,7 +194,7 @@ end
 
 # ── interface methods ────────────────────────────────────────────────────────
 
-function plot_instance(
+function plot_context(
     bench::DynamicVehicleSchedulingBenchmark, sample::DataSample; kwargs...
 )
     return plot_state(sample.instance; kwargs...)
diff --git a/ext/plots/dynamic_assortment_plots.jl b/ext/plots/dynamic_assortment_plots.jl
index b165774..8a0d9e9 100644
--- a/ext/plots/dynamic_assortment_plots.jl
+++ b/ext/plots/dynamic_assortment_plots.jl
@@ -1,6 +1,6 @@
 has_visualization(::DynamicAssortmentBenchmark) = true
 
-function plot_instance(::DynamicAssortmentBenchmark, sample::DataSample; kwargs...)
+function plot_context(::DynamicAssortmentBenchmark, sample::DataSample; kwargs...)
     # sample.instance = (env.features, purchase_history); row 1 of features = prices (×10 to undo normalization)
     prices = sample.instance[1][1, :] .* 10
     N = length(prices)
diff --git a/ext/plots/maintenance_plots.jl b/ext/plots/maintenance_plots.jl
index 1ae8514..688d7db 100644
--- a/ext/plots/maintenance_plots.jl
+++ b/ext/plots/maintenance_plots.jl
@@ -1,6 +1,6 @@
 has_visualization(::MaintenanceBenchmark) = true
 
-function plot_instance(bench::MaintenanceBenchmark, sample::DataSample; kwargs...)
+function plot_context(bench::MaintenanceBenchmark, sample::DataSample; kwargs...)
     # sample.instance = degradation_state (Vector{Int}, values 1..n)
     state = sample.instance
     N = length(state)
diff --git a/ext/plots/portfolio_plots.jl b/ext/plots/portfolio_plots.jl
index fe5d463..84df40a 100644
--- a/ext/plots/portfolio_plots.jl
+++ b/ext/plots/portfolio_plots.jl
@@ -1,6 +1,6 @@
 has_visualization(::PortfolioOptimizationBenchmark) = true
 
-function plot_instance(::PortfolioOptimizationBenchmark, sample::DataSample; kwargs...)
+function plot_context(::PortfolioOptimizationBenchmark, sample::DataSample; kwargs...)
     x = sample.x
     p = length(x)
     return Plots.bar(
diff --git a/ext/plots/ranking_plots.jl b/ext/plots/ranking_plots.jl
index bf5886c..2eda042 100644
--- a/ext/plots/ranking_plots.jl
+++ b/ext/plots/ranking_plots.jl
@@ -1,6 +1,6 @@
 has_visualization(::RankingBenchmark) = true
 
-function plot_instance(::RankingBenchmark, sample::DataSample; kwargs...)
+function plot_context(::RankingBenchmark, sample::DataSample; kwargs...)
     x = sample.x  # nb_features × n
     n = size(x, 2)
     return Plots.heatmap(
diff --git a/ext/plots/shortest_path_plots.jl b/ext/plots/shortest_path_plots.jl
index 4d4d34e..838d64f 100644
--- a/ext/plots/shortest_path_plots.jl
+++ b/ext/plots/shortest_path_plots.jl
@@ -37,7 +37,7 @@ function _grid_matrices(bench::FixedSizeShortestPathBenchmark, θ, y)
     return weight_grid, path_grid
 end
 
-function plot_instance(bench::FixedSizeShortestPathBenchmark, sample::DataSample; kwargs...)
+function plot_context(bench::FixedSizeShortestPathBenchmark, sample::DataSample; kwargs...)
     rows, cols = bench.grid_size
     # Show only the known graph structure (no edge costs)
     interior_xs = [
diff --git a/ext/plots/subset_selection_plots.jl b/ext/plots/subset_selection_plots.jl
index b4529ea..0b4eaba 100644
--- a/ext/plots/subset_selection_plots.jl
+++ b/ext/plots/subset_selection_plots.jl
@@ -1,6 +1,6 @@
 has_visualization(::SubsetSelectionBenchmark) = true
 
-function plot_instance(::SubsetSelectionBenchmark, sample::DataSample; kwargs...)
+function plot_context(::SubsetSelectionBenchmark, sample::DataSample; kwargs...)
     x = sample.x  # length n feature vector
     n = length(x)
     return Plots.bar(
diff --git a/ext/plots/svs_plots.jl b/ext/plots/svs_plots.jl
index fd73b77..f048b9a 100644
--- a/ext/plots/svs_plots.jl
+++ b/ext/plots/svs_plots.jl
@@ -3,9 +3,7 @@ import DecisionFocusedLearningBenchmarks.StochasticVehicleScheduling:
 
 has_visualization(::StochasticVehicleSchedulingBenchmark) = true
 
-function plot_instance(
-    ::StochasticVehicleSchedulingBenchmark, sample::DataSample; kwargs...
-)
+function plot_context(::StochasticVehicleSchedulingBenchmark, sample::DataSample; kwargs...)
     @assert hasproperty(sample.instance, :city) "Sample does not contain city information."
     (; tasks, district_width, width) = sample.instance.city
     ticks = 0:district_width:width
diff --git a/ext/plots/warcraft_plots.jl b/ext/plots/warcraft_plots.jl
index 04d5d7e..a88725e 100644
--- a/ext/plots/warcraft_plots.jl
+++ b/ext/plots/warcraft_plots.jl
@@ -3,7 +3,7 @@ using Images: Gray
 
 has_visualization(::WarcraftBenchmark) = true
 
-function plot_instance(::WarcraftBenchmark, sample::DataSample; kwargs...)
+function plot_context(::WarcraftBenchmark, sample::DataSample; kwargs...)
     im = dropdims(sample.x; dims=4)
     img = W.convert_image_for_plot(im)
     return Plots.plot(
diff --git a/src/DecisionFocusedLearningBenchmarks.jl b/src/DecisionFocusedLearningBenchmarks.jl
index fe2ee12..7380544 100644
--- a/src/DecisionFocusedLearningBenchmarks.jl
+++ b/src/DecisionFocusedLearningBenchmarks.jl
@@ -82,7 +82,7 @@ export is_exogenous, is_endogenous
 export is_minimization_problem
 
 export objective_value
-export has_visualization, plot_instance, plot_sample, plot_trajectory, animate_trajectory
+export has_visualization, plot_context, plot_sample, plot_trajectory, animate_trajectory
 export compute_gap
 
 # Export all benchmarks
diff --git a/src/DynamicVehicleScheduling/plot.jl b/src/DynamicVehicleScheduling/plot.jl
index f6e39ad..06e62d8 100644
--- a/src/DynamicVehicleScheduling/plot.jl
+++ b/src/DynamicVehicleScheduling/plot.jl
@@ -1,4 +1,4 @@
-function plot_instance(env::DVSPEnv; kwargs...)
+function plot_context(env::DVSPEnv; kwargs...)
     return plot_instance(env.instance.static_instance; kwargs...)
 end
 
diff --git a/src/Utils/Utils.jl b/src/Utils/Utils.jl
index fd5232f..50f5129 100644
--- a/src/Utils/Utils.jl
+++ b/src/Utils/Utils.jl
@@ -42,7 +42,7 @@ export generate_baseline_policies
 export generate_anticipative_solver, generate_parametric_anticipative_solver
 export is_minimization_problem
 
-export has_visualization, plot_instance, plot_sample, plot_trajectory, animate_trajectory
+export has_visualization, plot_context, plot_sample, plot_trajectory, animate_trajectory
 export compute_gap
 export grid_graph, get_path, path_to_matrix
 export neg_tensor, squeeze_last_dims, average_tensor
diff --git a/src/Utils/interface/abstract_benchmark.jl b/src/Utils/interface/abstract_benchmark.jl
index 2e420ff..b89ef91 100644
--- a/src/Utils/interface/abstract_benchmark.jl
+++ b/src/Utils/interface/abstract_benchmark.jl
@@ -59,17 +59,17 @@ function generate_baseline_policies end
 """
     has_visualization(::AbstractBenchmark) -> Bool
 
-Return `true` if `plot_instance` and `plot_sample` are implemented for this benchmark
+Return `true` if `plot_context` and `plot_sample` are implemented for this benchmark
 (requires `Plots` to be loaded). Default is `false`.
 """
 has_visualization(::AbstractBenchmark) = false
 
 """
-    plot_instance(bench::AbstractBenchmark, sample::DataSample; kwargs...)
+    plot_context(bench::AbstractBenchmark, sample::DataSample; kwargs...)
 
-Plot the problem instance (no solution). Only available when `Plots` is loaded.
+Plot the observable context before making a decision (no solution). Only available when `Plots` is loaded.
 """
-function plot_instance end
+function plot_context end
 
 """
     plot_sample(bench::AbstractBenchmark, sample::DataSample; kwargs...)
diff --git a/src/Utils/interface/static_benchmark.jl b/src/Utils/interface/static_benchmark.jl
index 8943fac..b7baf79 100644
--- a/src/Utils/interface/static_benchmark.jl
+++ b/src/Utils/interface/static_benchmark.jl
@@ -28,7 +28,7 @@ Also implement:
 - [`objective_value`](@ref)`(bench, sample, y)`: must be implemented by every static benchmark
 
 # Optional methods (no default, require `Plots` to be loaded)
-- [`plot_instance`](@ref), [`plot_sample`](@ref)
+- [`plot_context`](@ref), [`plot_sample`](@ref)
 - [`generate_baseline_policies`](@ref)
 """
 abstract type AbstractStaticBenchmark <: AbstractBenchmark end
diff --git a/test/argmax.jl b/test/argmax.jl
index 59dfd46..14598b9 100644
--- a/test/argmax.jl
+++ b/test/argmax.jl
@@ -37,7 +37,7 @@
     @testset "Plots" begin
         using Plots
         @test has_visualization(b)
-        fig1 = plot_instance(b, dataset[1])
+        fig1 = plot_context(b, dataset[1])
         @test fig1 isa Plots.Plot
         fig2 = plot_sample(b, dataset[1])
         @test fig2 isa Plots.Plot
diff --git a/test/argmax_2d.jl b/test/argmax_2d.jl
index 71e5d6b..683c582 100644
--- a/test/argmax_2d.jl
+++ b/test/argmax_2d.jl
@@ -19,7 +19,7 @@
     @test has_visualization(b)
     figure = plot_sample(b, dataset[1])
     @test figure isa Plots.Plot
-    figure2 = plot_instance(b, dataset[1])
+    figure2 = plot_context(b, dataset[1])
     @test figure2 isa Plots.Plot
     figure3 = plot_sample(b, dataset[1], dataset[2].y)
     @test figure3 isa Plots.Plot
diff --git a/test/contextual_stochastic_argmax.jl b/test/contextual_stochastic_argmax.jl
index d2004da..ed99e43 100644
--- a/test/contextual_stochastic_argmax.jl
+++ b/test/contextual_stochastic_argmax.jl
@@ -125,7 +125,7 @@ end
     sample = DataSample(dataset[1]; θ=model(dataset[1].x))
 
     @test has_visualization(b)
-    fig1 = plot_instance(b, dataset[1])
+    fig1 = plot_context(b, dataset[1])
     @test fig1 isa Plots.Plot
     fig2 = plot_sample(b, sample)
     @test fig2 isa Plots.Plot
diff --git a/test/dynamic_assortment.jl b/test/dynamic_assortment.jl
index 3c20e05..93dce32 100644
--- a/test/dynamic_assortment.jl
+++ b/test/dynamic_assortment.jl
@@ -367,7 +367,7 @@ end
     _, traj = evaluate_policy!(policies[1], envs)
 
     @test has_visualization(b)
-    fig1 = plot_instance(b, traj[1])
+    fig1 = plot_context(b, traj[1])
     @test fig1 isa Plots.Plot
     fig2 = plot_sample(b, traj[1])
     @test fig2 isa Plots.Plot
diff --git a/test/dynamic_vsp_plots.jl b/test/dynamic_vsp_plots.jl
index bd70546..46b8584 100644
--- a/test/dynamic_vsp_plots.jl
+++ b/test/dynamic_vsp_plots.jl
@@ -9,8 +9,8 @@
     # Get a trajectory via the anticipative solver
     y = generate_anticipative_solver(b)(env; nb_epochs=3)
 
-    # Test plot_instance (shows first epoch state)
-    fig1 = plot_instance(b, y[1])
+    # Test plot_context (shows first epoch state)
+    fig1 = plot_context(b, y[1])
     @test fig1 isa Plots.Plot
 
     # Test plot_trajectory (grid of epoch subplots)
diff --git a/test/fixed_size_shortest_path.jl b/test/fixed_size_shortest_path.jl
index 8d9c935..79d35da 100644
--- a/test/fixed_size_shortest_path.jl
+++ b/test/fixed_size_shortest_path.jl
@@ -36,7 +36,7 @@
     @testset "Plots" begin
         using Plots
         @test has_visualization(b)
-        fig1 = plot_instance(b, dataset[1])
+        fig1 = plot_context(b, dataset[1])
         @test fig1 isa Plots.Plot
         fig2 = plot_sample(b, dataset[1])
         @test fig2 isa Plots.Plot
diff --git a/test/maintenance.jl b/test/maintenance.jl
index 990d210..070c69b 100644
--- a/test/maintenance.jl
+++ b/test/maintenance.jl
@@ -236,7 +236,7 @@ end
     _, traj = evaluate_policy!(policies[1], envs)
 
     @test has_visualization(b)
-    fig1 = plot_instance(b, traj[1])
+    fig1 = plot_context(b, traj[1])
     @test fig1 isa Plots.Plot
     fig2 = plot_sample(b, traj[1])
     @test fig2 isa Plots.Plot
diff --git a/test/portfolio_optimization.jl b/test/portfolio_optimization.jl
index 88c9b6d..7f63ba2 100644
--- a/test/portfolio_optimization.jl
+++ b/test/portfolio_optimization.jl
@@ -33,7 +33,7 @@
     @testset "Plots" begin
         using Plots
         @test has_visualization(b)
-        fig1 = plot_instance(b, dataset[1])
+        fig1 = plot_context(b, dataset[1])
         @test fig1 isa Plots.Plot
         fig2 = plot_sample(b, dataset[1])
         @test fig2 isa Plots.Plot
diff --git a/test/ranking.jl b/test/ranking.jl
index d0c7ed4..2991ffd 100644
--- a/test/ranking.jl
+++ b/test/ranking.jl
@@ -38,7 +38,7 @@
     @testset "Plots" begin
         using Plots
         @test has_visualization(b)
-        fig1 = plot_instance(b, dataset[1])
+        fig1 = plot_context(b, dataset[1])
         @test fig1 isa Plots.Plot
         fig2 = plot_sample(b, dataset[1])
         @test fig2 isa Plots.Plot
diff --git a/test/subset_selection.jl b/test/subset_selection.jl
index 76fe95f..609eb0c 100644
--- a/test/subset_selection.jl
+++ b/test/subset_selection.jl
@@ -44,7 +44,7 @@
     @testset "Plots" begin
         using Plots
         @test has_visualization(b_identity)
-        fig1 = plot_instance(b_identity, dataset[1])
+        fig1 = plot_context(b_identity, dataset[1])
         @test fig1 isa Plots.Plot
         fig2 = plot_sample(b_identity, dataset[1])
         @test fig2 isa Plots.Plot
diff --git a/test/vsp.jl b/test/vsp.jl
index 56931fd..0d778d2 100644
--- a/test/vsp.jl
+++ b/test/vsp.jl
@@ -49,7 +49,7 @@
     @test length(ls_dataset[1].extra.scenarios) == K
 
     # Plots work unchanged
-    figure_1 = plot_instance(b, saa_dataset[1])
+    figure_1 = plot_context(b, saa_dataset[1])
     @test figure_1 isa Plots.Plot
     figure_2 = plot_sample(b, saa_dataset[1])
     @test figure_2 isa Plots.Plot
diff --git a/test/warcraft.jl b/test/warcraft.jl
index e23a6e5..a801e49 100644
--- a/test/warcraft.jl
+++ b/test/warcraft.jl
@@ -16,7 +16,7 @@
     @test has_visualization(b)
     figure = plot_sample(b, dataset[1])
     @test figure isa Plots.Plot
-    figure2 = plot_instance(b, dataset[1])
+    figure2 = plot_context(b, dataset[1])
     @test figure2 isa Plots.Plot
     figure3 = plot_sample(b, dataset[1], dataset[2].y)
     @test figure3 isa Plots.Plot

From 5fc4765a29fd9a66039b5c23f39d9ce16c490ed9 Mon Sep 17 00:00:00 2001
From: BatyLeo <leo.baty67@gmail.com>
Date: Tue, 5 May 2026 17:43:28 +0200
Subject: [PATCH 17/21] docs: Static benchmarks done

---
 docs/src/benchmarks/static/01_argmax.jl       |   2 +-
 docs/src/benchmarks/static/02_argmax2d.jl     |   4 +-
 docs/src/benchmarks/static/03_ranking.jl      |  13 +-
 .../static/04_portfolio_optimization.jl       |  11 +-
 .../benchmarks/static/05_subset_selection.jl  |  14 +-
 .../static/06_fixed_size_shortest_path.jl     |   9 +-
 docs/src/benchmarks/static/07_warcraft.jl     |   7 +-
 ext/plots/shortest_path_plots.jl              | 128 +++++++++---------
 .../FixedSizeShortestPath.jl                  |   2 +-
 9 files changed, 96 insertions(+), 94 deletions(-)

diff --git a/docs/src/benchmarks/static/01_argmax.jl b/docs/src/benchmarks/static/01_argmax.jl
index 1071c24..817ac70 100644
--- a/docs/src/benchmarks/static/01_argmax.jl
+++ b/docs/src/benchmarks/static/01_argmax.jl
@@ -53,7 +53,7 @@ mean(maximizer(model(s.x)) == s.y for s in dataset)
 # ``\theta = \text{encoder}(x) \in \mathbb{R}^n``. The task is to select the item with
 # the highest score:
 # ```math
-# y = \mathrm{argmax}(\theta)
+# y = \mathrm{argmax}(\theta) = \mathop{\mathrm{argmax}}\limits_{y\in\Delta^n} \theta^\top y
 # ```
 # The solution ``y`` is encoded as a one-hot vector.
 # The score vector ``\theta`` is never observed (only features ``x`` are available).
diff --git a/docs/src/benchmarks/static/02_argmax2d.jl b/docs/src/benchmarks/static/02_argmax2d.jl
index 3e09428..566c3ec 100644
--- a/docs/src/benchmarks/static/02_argmax2d.jl
+++ b/docs/src/benchmarks/static/02_argmax2d.jl
@@ -49,7 +49,7 @@ plot_sample(b, DataSample(sample; θ=θ_pred, y=y_pred))
 # ``\theta \in \mathbb{R}^2``. The task is to find the polytope vertex maximizing
 # the dot product:
 # ```math
-# y^* = \mathrm{argmax}_{v \in \mathcal{Y}(x)} \; \theta^\top v
+# y^* = \mathop{\mathrm{argmax}}\limits_{v \in \mathcal{Y}(x)} \; \theta^\top v
 # ```
 #
 # This is a toy 2D combinatorial optimization problem useful for visualizing
@@ -72,6 +72,6 @@ plot_sample(b, DataSample(sample; θ=θ_pred, y=y_pred))
 # \xrightarrow{y}
 # ```
 #
-# **Model:** `Dense(nb_features → 2; bias=false)` — predicts a 2D cost direction.
+# **Model:** `Dense(nb_features → 2; bias=false)`: predicts a 2D cost direction.
 #
 # **Maximizer:** finds the vertex of the instance polytope with maximum dot product with θ.
diff --git a/docs/src/benchmarks/static/03_ranking.jl b/docs/src/benchmarks/static/03_ranking.jl
index 9dda421..880344b 100644
--- a/docs/src/benchmarks/static/03_ranking.jl
+++ b/docs/src/benchmarks/static/03_ranking.jl
@@ -35,9 +35,10 @@ maximizer = generate_maximizer(b)         # ordinal ranking via sortperm
 
 # A randomly initialized policy produces an arbitrary ranking:
 θ_pred = model(sample.x)
-plot_sample(b, DataSample(; sample.context..., x=sample.x, θ=θ_pred, y=maximizer(θ_pred)))
+y_pred = maximizer(θ_pred)
+plot_sample(b, DataSample(sample; θ=θ_pred, y=y_pred))
 
-# Optimality gap on the dataset (0 = optimal, higher is worse):
+# Optimality gap on the dataset (lower is better):
 compute_gap(b, dataset, model, maximizer)
 
 # ---
@@ -48,9 +49,9 @@ compute_gap(b, dataset, model, maximizer)
 # ``\theta \in \mathbb{R}^n``. The task is to compute the ordinal ranking of the items
 # by cost:
 # ```math
-# y_i = \mathrm{rank}(\theta_i \mid \theta_1, \ldots, \theta_n)
+# y_i = \mathrm{rank}(\theta_i \mid \theta_1, \ldots, \theta_n) = \mathop{\mathrm{argmax}}\limits_{y\in\sigma(n)} \theta^\top y
 # ```
-# where ``y_i = 1`` means item ``i`` has the highest cost.
+# where ``y_i = 1`` means item ``i`` has the lowest cost.
 #
 # ## Key Parameters
 #
@@ -69,6 +70,6 @@ compute_gap(b, dataset, model, maximizer)
 # \xrightarrow{y}
 # ```
 #
-# **Model:** `Chain(Dense(nb_features → 1; bias=false), vec)` — predicts one score per item.
+# **Model:** `Chain(Dense(nb_features → 1; bias=false), vec)`: predicts one score per item.
 #
-# **Maximizer:** `ranking(θ)` — returns a vector of ordinal ranks via `invperm(sortperm(θ))`.
+# **Maximizer:** `ranking(θ)`: returns a vector of ordinal ranks via `invperm(sortperm(θ))`.
diff --git a/docs/src/benchmarks/static/04_portfolio_optimization.jl b/docs/src/benchmarks/static/04_portfolio_optimization.jl
index d953ca4..b1e9078 100644
--- a/docs/src/benchmarks/static/04_portfolio_optimization.jl
+++ b/docs/src/benchmarks/static/04_portfolio_optimization.jl
@@ -33,11 +33,12 @@ maximizer = generate_maximizer(b)         # Markowitz QP solver (Ipopt via JuMP)
 
 # A randomly initialized policy predicts arbitrary returns, leading to a suboptimal allocation:
 θ_pred = model(sample.x)
-plot_sample(b, DataSample(; sample.context..., x=sample.x, θ=θ_pred, y=maximizer(θ_pred)))
+y_pred = maximizer(θ_pred)
+plot_sample(b, DataSample(sample ; θ=θ_pred, y=y_pred))
 
-# Optimality gap on the dataset (0 = optimal, higher is worse):
+# Optimality gap on the dataset (lower is better):
 compute_gap(b, dataset, model, maximizer)
-
+    
 # ---
 # ## Problem Description
 #
@@ -74,12 +75,12 @@ compute_gap(b, dataset, model, maximizer)
 # ```math
 # \xrightarrow[\text{Features}]{x \in \mathbb{R}^p}
 # \fbox{Linear model}
-# \xrightarrow[\text{Predicted returns}]{\hat{\theta} \in \mathbb{R}^d}
+# \xrightarrow[\text{Predicted returns}]{\theta \in \mathbb{R}^d}
 # \fbox{QP solver (Ipopt)}
 # \xrightarrow[\text{Portfolio}]{y \in \mathbb{R}^d}
 # ```
 #
-# **Model:** `Dense(p → d)` — predicts one expected return per asset.
+# **Model:** `Dense(p → d)`, predicts one expected return per asset.
 #
 # **Maximizer:** Ipopt QP solver enforcing the variance and budget constraints.
 #
diff --git a/docs/src/benchmarks/static/05_subset_selection.jl b/docs/src/benchmarks/static/05_subset_selection.jl
index afad90b..2d1199e 100644
--- a/docs/src/benchmarks/static/05_subset_selection.jl
+++ b/docs/src/benchmarks/static/05_subset_selection.jl
@@ -33,9 +33,10 @@ maximizer = generate_maximizer(b)         # top-k selection
 
 # A randomly initialized policy selects items with no relation to their true values:
 θ_pred = model(sample.x)
-plot_sample(b, DataSample(; sample.context..., x=sample.x, θ=θ_pred, y=maximizer(θ_pred)))
+y_pred = maximizer(θ_pred)
+plot_sample(b, DataSample(sample; θ=θ_pred, y=y_pred))
 
-# Optimality gap on the dataset (0 = optimal, higher is worse):
+# Optimality gap on the dataset (lower is better):
 compute_gap(b, dataset, model, maximizer)
 
 # ---
@@ -45,7 +46,10 @@ compute_gap(b, dataset, model, maximizer)
 # A feature vector ``x \in \mathbb{R}^n`` is observed (identity mapping by default).
 # The task is to select the ``k`` items with the highest values:
 # ```math
-# y = \mathrm{top}_k(\theta)
+# \begin{aligned}
+# y = \mathrm{top}_k(\theta) = & \mathop{\mathrm{argmax}}\limits_{y \in \{0,1\}^n} \; \theta^\top y \\
+# & \quad\text{s.t.} \quad \sum_{i=1}^n y_i = k
+# \end{aligned}
 # ```
 # where ``y \in \{0,1\}^n`` with exactly ``k`` ones.
 #
@@ -70,9 +74,9 @@ compute_gap(b, dataset, model, maximizer)
 # \xrightarrow{y}
 # ```
 #
-# **Model:** `Dense(n → n; bias=false)` — predicts a score per item.
+# **Model:** `Dense(n → n; bias=false)`: predicts a score per item.
 #
-# **Maximizer:** `top_k(θ, k)` — returns a boolean vector with `true` at the `k`
+# **Maximizer:** `top_k(θ, k)`: returns a boolean vector with `true` at the `k`
 # highest-scoring positions.
 #
 # !!! note "Reference"
diff --git a/docs/src/benchmarks/static/06_fixed_size_shortest_path.jl b/docs/src/benchmarks/static/06_fixed_size_shortest_path.jl
index 673aa3c..593d356 100644
--- a/docs/src/benchmarks/static/06_fixed_size_shortest_path.jl
+++ b/docs/src/benchmarks/static/06_fixed_size_shortest_path.jl
@@ -34,9 +34,10 @@ maximizer = generate_maximizer(b)         # Dijkstra shortest path on the grid g
 
 # A randomly initialized policy predicts arbitrary costs, yielding a near-straight path:
 θ_pred = model(sample.x)
-plot_sample(b, DataSample(; sample.context..., x=sample.x, θ=θ_pred, y=maximizer(θ_pred)))
+y_pred = maximizer(θ_pred)
+plot_sample(b, DataSample(sample; θ=θ_pred, y=y_pred))
 
-# Optimality gap on the dataset (0 = optimal, higher is worse):
+# Optimality gap on the dataset (lower is better):
 compute_gap(b, dataset, model, maximizer)
 
 # ---
@@ -48,7 +49,7 @@ compute_gap(b, dataset, model, maximizer)
 # ``x \in \mathbb{R}^p`` is observed. The task is to find the minimum-cost path from
 # vertex 1 (top-left) to vertex ``V`` (bottom-right):
 # ```math
-# y^* = \mathrm{argmin}_{y \in \mathcal{P}} \; \theta^\top y
+# y^* = \mathop{\mathrm{argmax}}\limits_{y \in \mathcal{P}} \; -\theta^\top y
 # ```
 # where ``y \in \{0,1\}^E`` indicates selected edges and ``\mathcal{P}`` is the set of
 # valid source-to-sink paths.
@@ -75,7 +76,7 @@ compute_gap(b, dataset, model, maximizer)
 # \xrightarrow[\text{Path}]{y \in \{0,1\}^E}
 # ```
 #
-# **Model:** `Chain(Dense(p → E))` — predicts one cost per edge.
+# **Model:** `Chain(Dense(p → E))`: predicts one cost per edge.
 #
 # **Maximizer:** Dijkstra (default) or Bellman-Ford on negated weights to find the
 # longest (maximum-weight) path.
diff --git a/docs/src/benchmarks/static/07_warcraft.jl b/docs/src/benchmarks/static/07_warcraft.jl
index dd003b0..c4f5b04 100644
--- a/docs/src/benchmarks/static/07_warcraft.jl
+++ b/docs/src/benchmarks/static/07_warcraft.jl
@@ -32,9 +32,10 @@ maximizer = generate_maximizer(b)         # Dijkstra shortest path on the 12×12
 
 # An untrained CNN produces a near-uniform cost map, yielding a near-straight path:
 θ_pred = model(sample.x)
-plot_sample(b, DataSample(; sample.context..., x=sample.x, θ=θ_pred, y=maximizer(θ_pred)))
+y_pred = maximizer(θ_pred)
+plot_sample(b, DataSample(sample; θ=θ_pred, y=y_pred))
 
-# Optimality gap on this sample (0 = optimal, higher is worse):
+# Optimality gap on this sample (lower is better):
 compute_gap(b, [sample], model, maximizer)
 
 # ---
@@ -48,7 +49,7 @@ compute_gap(b, [sample], model, maximizer)
 # Formally, let ``\theta_{ij}`` be the (unknown) cost of cell ``(i,j)`` and
 # ``y_{ij} \in \{0,1\}`` indicate whether cell ``(i,j)`` is on the path. The objective is:
 # ```math
-# y^* = \mathrm{argmin}_{y \in \mathcal{P}} \sum_{(i,j)} \theta_{ij} \, y_{ij}
+# y^* = \mathop{\mathrm{argmin}}\limits_{y \in \mathcal{P}} \sum_{(i,j)} \theta_{ij} \, y_{ij}
 # ```
 # where ``\mathcal{P}`` is the set of valid grid paths (4-connected, source to sink).
 #
diff --git a/ext/plots/shortest_path_plots.jl b/ext/plots/shortest_path_plots.jl
index 838d64f..89e8126 100644
--- a/ext/plots/shortest_path_plots.jl
+++ b/ext/plots/shortest_path_plots.jl
@@ -37,65 +37,62 @@ function _grid_matrices(bench::FixedSizeShortestPathBenchmark, θ, y)
     return weight_grid, path_grid
 end
 
-function plot_context(bench::FixedSizeShortestPathBenchmark, sample::DataSample; kwargs...)
+function _plot_grid(
+    bench::FixedSizeShortestPathBenchmark;
+    grid=nothing,
+    title="",
+    colorbar=false,
+    color=:viridis,
+    path_grid=nothing,
+    kwargs...,
+)
     rows, cols = bench.grid_size
-    # Show only the known graph structure (no edge costs)
-    interior_xs = [
-        c for r in 1:rows for
-        c in 1:cols if !(r == 1 && c == 1) && !(r == rows && c == cols)
-    ]
-    interior_ys = [
-        r for r in 1:rows for
-        c in 1:cols if !(r == 1 && c == 1) && !(r == rows && c == cols)
-    ]
-    pl = Plots.plot(;
-        xlim=(0.5, cols + 0.5),
-        ylim=(0.5, rows + 0.5),
+    if isnothing(grid)
+        grid = ones(rows, cols)
+    end
+    pl = Plots.heatmap(
+        grid;
         yflip=true,
         aspect_ratio=:equal,
-        legend=:topright,
-        title="Grid graph ($(rows)×$(cols))",
-        framestyle=:box,
-        grid=false,
+        title=title,
+        colorbar=colorbar,
+        framestyle=:none,
+        color=color,
         kwargs...,
     )
+    Plots.vline!(pl, (0.5):1:(cols + 0.5); color=:gray, lw=0.5, label=false)
+    Plots.hline!(pl, (0.5):1:(rows + 0.5); color=:gray, lw=0.5, label=false)
+    if !isnothing(path_grid)
+        path_xs = Int[]
+        path_ys = Int[]
+        for r in 1:rows, c in 1:cols
+            if path_grid[r, c]
+                push!(path_xs, c)
+                push!(path_ys, r)
+            end
+        end
+        Plots.scatter!(
+            pl, path_xs, path_ys; color=:white, markersize=6, markerstrokewidth=0, label=false
+        )
+    end
     Plots.scatter!(
-        pl,
-        interior_xs,
-        interior_ys;
-        color=:lightgray,
-        markersize=8,
-        markerstrokecolor=:gray,
-        markerstrokewidth=1,
-        label=false,
-    )
-    Plots.scatter!(
-        pl,
-        [1],
-        [1];
-        color=:seagreen,
-        markersize=10,
-        markershape=:square,
-        label="source",
-        markerstrokewidth=0,
+        pl, [1], [1]; color=:seagreen, markersize=8, markershape=:square, label=false
     )
     Plots.scatter!(
         pl,
         [cols],
         [rows];
         color=:crimson,
-        markersize=10,
+        markersize=8,
         markershape=:square,
-        label="sink",
-        markerstrokewidth=0,
+        label=false,
     )
     return pl
 end
 
-function plot_sample(bench::FixedSizeShortestPathBenchmark, sample::DataSample; kwargs...)
+function plot_context(bench::FixedSizeShortestPathBenchmark, sample::DataSample; kwargs...)
     x = sample.x
     p_feat = length(x)
-    weight_grid, path_grid = _grid_matrices(bench, sample.θ, sample.y)
     rows, cols = bench.grid_size
 
     p_x = Plots.bar(
@@ -104,37 +101,34 @@ function plot_sample(bench::FixedSizeShortestPathBenchmark, sample::DataSample;
         legend=false,
         xlabel="Feature",
         ylabel="Value",
-        title="x (features, observable)",
+        title="x (features)",
         color=:steelblue,
         xticks=1:p_feat,
     )
-    p1 = Plots.heatmap(
-        weight_grid;
-        yflip=true,
-        aspect_ratio=:equal,
-        title="Edge weights θ",
-        colorbar=true,
-        framestyle=:none,
-    )
-    p2 = Plots.heatmap(
-        weight_grid;
-        yflip=true,
-        aspect_ratio=:equal,
-        title="Shortest path y",
-        colorbar=false,
-        framestyle=:none,
-        color=:Blues,
+    p_grid = _plot_grid(bench; title="Grid graph ($(rows)×$(cols))", color=:grays)
+
+    l = Plots.@layout [a{0.35w} b]
+    return Plots.plot(p_x, p_grid; layout=l, size=(700, 300), kwargs...)
+end
+
+function plot_sample(bench::FixedSizeShortestPathBenchmark, sample::DataSample; kwargs...)
+    x = sample.x
+    p_feat = length(x)
+    weight_grid, path_grid = _grid_matrices(bench, sample.θ, sample.y)
+
+    p_x = Plots.bar(
+        1:p_feat,
+        Float64.(x);
+        legend=false,
+        xlabel="Feature",
+        ylabel="Value",
+        title="x (features)",
+        color=:steelblue,
+        xticks=1:p_feat,
     )
-    path_xs = Int[]
-    path_ys = Int[]
-    for r in 1:rows, c in 1:cols
-        if path_grid[r, c]
-            push!(path_xs, c)
-            push!(path_ys, r)
-        end
-    end
-    Plots.scatter!(
-        p2, path_xs, path_ys; color=:white, markersize=6, markerstrokewidth=0, label=false
+    p1 = _plot_grid(bench; grid=weight_grid, title="Edge weights θ", colorbar=true)
+    p2 = _plot_grid(
+        bench; grid=weight_grid, title="Shortest path y", color=:Blues, path_grid=path_grid
     )
 
     l = Plots.@layout [a{0.25h}; [b c]]
diff --git a/src/FixedSizeShortestPath/FixedSizeShortestPath.jl b/src/FixedSizeShortestPath/FixedSizeShortestPath.jl
index 86d5bdb..f72ecc9 100644
--- a/src/FixedSizeShortestPath/FixedSizeShortestPath.jl
+++ b/src/FixedSizeShortestPath/FixedSizeShortestPath.jl
@@ -138,7 +138,7 @@ function Utils.generate_statistical_model(
 )
     Random.seed!(seed)
     (; p, graph) = bench
-    return Chain(Dense(p, ne(graph)))
+    return Dense(p, ne(graph))
 end
 
 export FixedSizeShortestPathBenchmark

From 585a3498a6dc577dbccd5c80e15f7d575ee8783b Mon Sep 17 00:00:00 2001
From: BatyLeo <leo.baty67@gmail.com>
Date: Tue, 5 May 2026 18:37:52 +0200
Subject: [PATCH 18/21] docs: Stochastic and dynamic done

---
 .../dynamic/01_dynamic_assortment.jl          |   2 +-
 docs/src/benchmarks/dynamic/03_dvsp.jl        |   2 +-
 .../static/04_portfolio_optimization.jl       |   4 +-
 .../01_contextual_stochastic_argmax.jl        |  13 +-
 docs/src/benchmarks/stochastic/02_vsp.jl      |   8 +-
 ext/plots/dvs_plots.jl                        | 155 ++++++------------
 ext/plots/dynamic_assortment_plots.jl         |  11 +-
 ext/plots/maintenance_plots.jl                |  23 ++-
 ext/plots/shortest_path_plots.jl              |  16 +-
 ext/plots/svs_plots.jl                        | 118 +++++++------
 src/Utils/policy.jl                           |   4 +-
 11 files changed, 162 insertions(+), 194 deletions(-)

diff --git a/docs/src/benchmarks/dynamic/01_dynamic_assortment.jl b/docs/src/benchmarks/dynamic/01_dynamic_assortment.jl
index f0bf611..fa38776 100644
--- a/docs/src/benchmarks/dynamic/01_dynamic_assortment.jl
+++ b/docs/src/benchmarks/dynamic/01_dynamic_assortment.jl
@@ -14,7 +14,7 @@ b = DynamicAssortmentBenchmark()
 # purchase history:
 policies = generate_baseline_policies(b)
 env = generate_environments(b, 1)[1]
-_, trajectory = evaluate_policy!(policies.greedy, env)
+_, trajectory = evaluate_policy!(policies.expert, env)
 
 # The observable state at step 1: item prices (fixed across steps):
 plot_context(b, trajectory[1])
diff --git a/docs/src/benchmarks/dynamic/03_dvsp.jl b/docs/src/benchmarks/dynamic/03_dvsp.jl
index c40025f..aa0f2c6 100644
--- a/docs/src/benchmarks/dynamic/03_dvsp.jl
+++ b/docs/src/benchmarks/dynamic/03_dvsp.jl
@@ -33,7 +33,7 @@ plot_context(b, trajectory[1])
 plot_sample(b, trajectory[1])
 
 # Multiple steps side by side: customers accumulate and routes change over time:
-plot_trajectory(b, trajectory[1:min(3, length(trajectory))])
+plot_trajectory(b, trajectory)
 
 # ## DFL pipeline components
 
diff --git a/docs/src/benchmarks/static/04_portfolio_optimization.jl b/docs/src/benchmarks/static/04_portfolio_optimization.jl
index b1e9078..a6a734b 100644
--- a/docs/src/benchmarks/static/04_portfolio_optimization.jl
+++ b/docs/src/benchmarks/static/04_portfolio_optimization.jl
@@ -34,11 +34,11 @@ maximizer = generate_maximizer(b)         # Markowitz QP solver (Ipopt via JuMP)
 # A randomly initialized policy predicts arbitrary returns, leading to a suboptimal allocation:
 θ_pred = model(sample.x)
 y_pred = maximizer(θ_pred)
-plot_sample(b, DataSample(sample ; θ=θ_pred, y=y_pred))
+plot_sample(b, DataSample(sample; θ=θ_pred, y=y_pred))
 
 # Optimality gap on the dataset (lower is better):
 compute_gap(b, dataset, model, maximizer)
-    
+
 # ---
 # ## Problem Description
 #
diff --git a/docs/src/benchmarks/stochastic/01_contextual_stochastic_argmax.jl b/docs/src/benchmarks/stochastic/01_contextual_stochastic_argmax.jl
index 3848164..ba90e41 100644
--- a/docs/src/benchmarks/stochastic/01_contextual_stochastic_argmax.jl
+++ b/docs/src/benchmarks/stochastic/01_contextual_stochastic_argmax.jl
@@ -9,16 +9,14 @@ using Plots
 
 b = ContextualStochasticArgmaxBenchmark()
 
-# `generate_dataset` returns unlabeled samples (`y = nothing`) for this benchmark.
+# By default, `generate_dataset` returns unlabeled samples (`y = nothing`) for this benchmark.
 # A `target_policy` must be provided to attach labels. Here we use the anticipative
 # oracle: it returns the item with the highest realized utility for each scenario,
 # giving one labeled sample per scenario per instance.
 anticipative = generate_anticipative_solver(b)
 policy =
-    (ctx, scenarios) -> [
-        DataSample(; ctx.context..., x=ctx.x, y=anticipative(ξ), extra=(; scenario=ξ))
-        for ξ in scenarios
-    ]
+    (ctx, scenarios) ->
+        [DataSample(ctx; y=anticipative(ξ), extra=(; scenario=ξ)) for ξ in scenarios]
 dataset = generate_dataset(b, 20; target_policy=policy, seed=0)
 sample = first(dataset)
 
@@ -53,7 +51,8 @@ maximizer = generate_maximizer(b)         # one-hot argmax
 # A randomly initialized policy selects items with no relation to their expected utilities.
 # Top: feature vector x. Bottom: predicted utilities θ̂ with the selected item in red:
 θ_pred = model(sample.x)
-plot_sample(b, DataSample(sample; θ=θ_pred, y=maximizer(θ_pred)))
+y_pred = maximizer(θ_pred)
+plot_sample(b, DataSample(sample; θ=θ_pred, y=y_pred))
 
 # ---
 # ## Problem Description
@@ -102,7 +101,7 @@ plot_sample(b, DataSample(sample; θ=θ_pred, y=maximizer(θ_pred)))
 # \xrightarrow{y}
 # ```
 #
-# **Model:** `Dense(n+d → n; bias=false)` — can in principle recover the exact mapping
+# **Model:** `Dense(n+d → n; bias=false)`: can in principle recover the exact mapping
 # ``[I \mid W]`` from training data.
 #
 # **Maximizer:** `one_hot_argmax`.
diff --git a/docs/src/benchmarks/stochastic/02_vsp.jl b/docs/src/benchmarks/stochastic/02_vsp.jl
index 3cf3d55..c01a373 100644
--- a/docs/src/benchmarks/stochastic/02_vsp.jl
+++ b/docs/src/benchmarks/stochastic/02_vsp.jl
@@ -5,7 +5,7 @@
 using DecisionFocusedLearningBenchmarks
 using Plots
 
-b = StochasticVehicleSchedulingBenchmark()
+b = StochasticVehicleSchedulingBenchmark(; nb_tasks=10)
 
 # ## Observable input
 #
@@ -36,9 +36,7 @@ maximizer = generate_maximizer(b)         # deterministic VSP solver (HiGHS MIP)
 # The untrained model predicts random edge costs; the resulting schedule is arbitrary:
 θ_pred = model(sample.x)
 y_pred = maximizer(θ_pred; sample.context...)
-plot_sample(
-    b, DataSample(; sample.context..., x=sample.x, θ=θ_pred, y=y_pred, extra=sample.extra)
-)
+plot_sample(b, DataSample(sample; θ=θ_pred, y=y_pred))
 
 # ---
 # ## Problem Description
@@ -124,4 +122,4 @@ plot_sample(
 # deterministic VSP instance.
 #
 # !!! note "Reference"
-#     [Learning to Approximate Industrial Problems by Operations Research Classic Problems](https://hal.science/hal-02396091/document)
+#     Full details on this problem can be found in [Learning to Approximate Industrial Problems by Operations Research Classic Problems](https://hal.science/hal-02396091/document)
diff --git a/ext/plots/dvs_plots.jl b/ext/plots/dvs_plots.jl
index ec90e2e..3786fd3 100644
--- a/ext/plots/dvs_plots.jl
+++ b/ext/plots/dvs_plots.jl
@@ -3,43 +3,24 @@ using Printf: @sprintf
 
 has_visualization(::DynamicVehicleSchedulingBenchmark) = true
 
-# ── helpers (moved from static_vsp/plot.jl) ─────────────────────────────────
-
-function _plot_static_instance(
-    x_depot,
-    y_depot,
-    x_customers,
-    y_customers;
-    customer_markersize=4,
-    depot_markersize=7,
-    alpha_depot=0.8,
-    customer_color=:lightblue,
-    depot_color=:lightgreen,
-    kwargs...,
-)
-    fig = Plots.plot(;
-        legend=:topleft, xlabel="x coordinate", ylabel="y coordinate", kwargs...
-    )
-    Plots.scatter!(
-        fig,
-        x_customers,
-        y_customers;
-        label="Customers",
-        markercolor=customer_color,
-        marker=:circle,
-        markersize=customer_markersize,
-    )
-    Plots.scatter!(
-        fig,
-        [x_depot],
-        [y_depot];
-        label="Depot",
-        markercolor=depot_color,
-        marker=:rect,
-        markersize=depot_markersize,
-        alpha=alpha_depot,
-    )
-    return fig
+# ── helpers ────────────────────────────────────────────────────────────────────
+
+function _compute_bounds(pd; margin=0.05, legend_margin_factor=0.15)
+    x_min = minimum(min(data.x_depot, minimum(data.x_customers)) for data in pd)
+    x_max = maximum(max(data.x_depot, maximum(data.x_customers)) for data in pd)
+    y_min = minimum(min(data.y_depot, minimum(data.y_customers)) for data in pd)
+    y_max = maximum(max(data.y_depot, maximum(data.y_customers)) for data in pd)
+
+    xlims = (x_min - margin, x_max + margin)
+    y_range = y_max - y_min + 2 * margin
+    legend_margin = y_range * legend_margin_factor
+    ylims = (y_min - margin, y_max + margin + legend_margin)
+
+    min_start_time = minimum(minimum(data.start_times) for data in pd)
+    max_start_time = maximum(maximum(data.start_times) for data in pd)
+    clims = (min_start_time, max_start_time)
+
+    return (; xlims, ylims, clims)
 end
 
 # ── plot_state ───────────────────────────────────────────────────────────────
@@ -65,25 +46,19 @@ function plot_state(
     show_colorbar=true,
     kwargs...,
 )
-    (; x_depot, y_depot, x_customers, y_customers, is_must_dispatch, start_times) = DVS.build_state_data(
-        state
-    )
+    (; x_depot, y_depot, x_customers, y_customers, is_must_dispatch, start_times) =
+        DVS.build_state_data(state)
 
-    plot_args = Dict(
-        :legend => :topleft, :title => "DVSP State - Epoch $(state.current_epoch)"
+    xlabel = show_axis_labels ? "x coordinate" : ""
+    ylabel = show_axis_labels ? "y coordinate" : ""
+    fig = Plots.plot(;
+        legend=:topleft,
+        title="DVSP State - Epoch $(state.current_epoch)",
+        xlabel=xlabel,
+        ylabel=ylabel,
+        kwargs...,
     )
 
-    if show_axis_labels
-        plot_args[:xlabel] = "x coordinate"
-        plot_args[:ylabel] = "y coordinate"
-    end
-
-    for (k, v) in kwargs
-        plot_args[k] = v
-    end
-
-    fig = Plots.plot(; plot_args...)
-
     Plots.scatter!(
         fig,
         [x_depot],
@@ -96,45 +71,39 @@ function plot_state(
         markerstrokewidth=markerstrokewidth,
     )
 
-    scatter_must_dispatch_args = Dict(
-        :label => "Must-dispatch customers",
-        :markercolor => must_dispatch_color,
-        :marker => must_dispatch_marker,
-        :markersize => customer_markersize,
-        :markerstrokewidth => markerstrokewidth,
-    )
-
-    scatter_postponable_args = Dict(
-        :label => "Postponable customers",
-        :markercolor => postponable_color,
-        :marker => postponable_marker,
-        :markersize => customer_markersize,
-        :markerstrokewidth => markerstrokewidth,
-    )
-    if show_colorbar
-        scatter_must_dispatch_args[:marker_z] = start_times[is_must_dispatch]
-        scatter_postponable_args[:marker_z] = start_times[.!is_must_dispatch]
-        scatter_postponable_args[:colormap] = :plasma
-        scatter_must_dispatch_args[:colormap] = :plasma
-        scatter_postponable_args[:colorbar] = :right
-        scatter_must_dispatch_args[:colorbar] = :right
+    colorbar_args = if show_colorbar
+        (; colormap=:plasma, colorbar=:right)
+    else
+        (;)
     end
 
-    if length(x_customers[is_must_dispatch]) > 0
+    if any(is_must_dispatch)
         Plots.scatter!(
             fig,
             x_customers[is_must_dispatch],
             y_customers[is_must_dispatch];
-            scatter_must_dispatch_args...,
+            label="Must-dispatch",
+            markercolor=must_dispatch_color,
+            marker=must_dispatch_marker,
+            markersize=customer_markersize,
+            markerstrokewidth=markerstrokewidth,
+            marker_z=show_colorbar ? start_times[is_must_dispatch] : nothing,
+            colorbar_args...,
         )
     end
 
-    if length(x_customers[.!is_must_dispatch]) > 0
+    if any(.!is_must_dispatch)
         Plots.scatter!(
             fig,
             x_customers[.!is_must_dispatch],
             y_customers[.!is_must_dispatch];
-            scatter_postponable_args...,
+            label="Postponable",
+            markercolor=postponable_color,
+            marker=postponable_marker,
+            markersize=customer_markersize,
+            markerstrokewidth=markerstrokewidth,
+            marker_z=show_colorbar ? start_times[.!is_must_dispatch] : nothing,
+            colorbar_args...,
         )
     end
 
@@ -234,19 +203,7 @@ function plot_trajectory(
     end
     rows = ceil(Int, n_epochs / cols)
 
-    x_min = minimum(min(data.x_depot, minimum(data.x_customers)) for data in pd)
-    x_max = maximum(max(data.x_depot, maximum(data.x_customers)) for data in pd)
-    y_min = minimum(min(data.y_depot, minimum(data.y_customers)) for data in pd)
-    y_max = maximum(max(data.y_depot, maximum(data.y_customers)) for data in pd)
-
-    xlims = (x_min - margin, x_max + margin)
-    y_range = y_max - y_min + 2 * margin
-    legend_margin = y_range * legend_margin_factor
-    ylims = (y_min - margin, y_max + margin + legend_margin)
-
-    min_start_time = minimum(minimum(data.start_times) for data in pd)
-    max_start_time = maximum(maximum(data.start_times) for data in pd)
-    clims = (min_start_time, max_start_time)
+    (; xlims, ylims, clims) = _compute_bounds(pd; margin, legend_margin_factor)
 
     plots = map(1:n_epochs) do i
         sample = traj[i]
@@ -316,19 +273,7 @@ function animate_trajectory(
     pd = DVS.build_plot_data(traj)
     epoch_costs = [-sample.reward for sample in traj]
 
-    x_min = minimum(min(data.x_depot, minimum(data.x_customers)) for data in pd)
-    x_max = maximum(max(data.x_depot, maximum(data.x_customers)) for data in pd)
-    y_min = minimum(min(data.y_depot, minimum(data.y_customers)) for data in pd)
-    y_max = maximum(max(data.y_depot, maximum(data.y_customers)) for data in pd)
-
-    xlims = (x_min - margin, x_max + margin)
-    y_range = y_max - y_min + 2 * margin
-    legend_margin = y_range * legend_margin_factor
-    ylims = (y_min - margin, y_max + margin + legend_margin)
-
-    min_start_time = minimum(minimum(data.start_times) for data in pd)
-    max_start_time = maximum(maximum(data.start_times) for data in pd)
-    clims = (min_start_time, max_start_time)
+    (; xlims, ylims, clims) = _compute_bounds(pd; margin, legend_margin_factor)
 
     if show_cost_bar
         x_min, x_max = xlims
diff --git a/ext/plots/dynamic_assortment_plots.jl b/ext/plots/dynamic_assortment_plots.jl
index 8a0d9e9..ef52d72 100644
--- a/ext/plots/dynamic_assortment_plots.jl
+++ b/ext/plots/dynamic_assortment_plots.jl
@@ -1,7 +1,10 @@
 has_visualization(::DynamicAssortmentBenchmark) = true
 
+function _step_str(sample::DataSample)
+    return hasproperty(sample, :step) ? " (step $(sample.step))" : ""
+end
+
 function plot_context(::DynamicAssortmentBenchmark, sample::DataSample; kwargs...)
-    # sample.instance = (env.features, purchase_history); row 1 of features = prices (×10 to undo normalization)
     prices = sample.instance[1][1, :] .* 10
     N = length(prices)
     return Plots.bar(
@@ -10,7 +13,7 @@ function plot_context(::DynamicAssortmentBenchmark, sample::DataSample; kwargs..
         legend=false,
         xlabel="Item",
         ylabel="Price",
-        title="Instance (item prices): step $(length(sample.instance[2]) + 1)",
+        title="Item prices$(_step_str(sample))",
         color=:steelblue,
         kwargs...,
     )
@@ -18,7 +21,7 @@ end
 
 function plot_sample(::DynamicAssortmentBenchmark, sample::DataSample; kwargs...)
     prices = sample.instance[1][1, :] .* 10
-    y = sample.y  # BitVector, selected items
+    y = sample.y
     N = length(prices)
     colors = [y[i] ? :seagreen : :lightgray for i in 1:N]
     return Plots.bar(
@@ -27,7 +30,7 @@ function plot_sample(::DynamicAssortmentBenchmark, sample::DataSample; kwargs...
         legend=false,
         xlabel="Item",
         ylabel="Price",
-        title="Assortment (green = offered): step $(length(sample.instance[2]) + 1)",
+        title="Assortment$(_step_str(sample))",
         color=colors,
         kwargs...,
     )
diff --git a/ext/plots/maintenance_plots.jl b/ext/plots/maintenance_plots.jl
index 688d7db..2d8f328 100644
--- a/ext/plots/maintenance_plots.jl
+++ b/ext/plots/maintenance_plots.jl
@@ -1,7 +1,14 @@
 has_visualization(::MaintenanceBenchmark) = true
 
+function _degradation_colors(state, n)
+    return [s == n ? :firebrick : :steelblue for s in state]
+end
+
+function _step_str(sample::DataSample)
+    return hasproperty(sample, :step) ? " (step $(sample.step))" : ""
+end
+
 function plot_context(bench::MaintenanceBenchmark, sample::DataSample; kwargs...)
-    # sample.instance = degradation_state (Vector{Int}, values 1..n)
     state = sample.instance
     N = length(state)
     n = bench.n
@@ -11,26 +18,26 @@ function plot_context(bench::MaintenanceBenchmark, sample::DataSample; kwargs...
         legend=false,
         xlabel="Component",
         ylabel="Degradation level",
-        title="Instance (degradation state)",
+        title="Degradation state$(_step_str(sample))",
         ylim=(0, n + 0.5),
-        color=:steelblue,
+        color=_degradation_colors(state, n),
         kwargs...,
     )
 end
 
 function plot_sample(bench::MaintenanceBenchmark, sample::DataSample; kwargs...)
     state = sample.instance
-    y = sample.y  # BitVector, maintained components
+    y = sample.y
     N = length(state)
     n = bench.n
-    colors = [y[i] ? :seagreen : (state[i] == n ? :firebrick : :steelblue) for i in 1:N]
-    labels = ["comp $i$(y[i] ? " ✓" : "")" for i in 1:N]
+    colors = [y[i] ? :seagreen : c for (i, c) in enumerate(_degradation_colors(state, n))]
     return Plots.bar(
-        labels,
+        1:N,
         state;
         legend=false,
+        xlabel="Component",
         ylabel="Degradation level",
-        title="Solution (green = maintained, red = failed)",
+        title="Maintenance$(_step_str(sample))",
         ylim=(0, n + 0.5),
         color=colors,
         kwargs...,
diff --git a/ext/plots/shortest_path_plots.jl b/ext/plots/shortest_path_plots.jl
index 89e8126..2649ce5 100644
--- a/ext/plots/shortest_path_plots.jl
+++ b/ext/plots/shortest_path_plots.jl
@@ -72,20 +72,20 @@ function _plot_grid(
             end
         end
         Plots.scatter!(
-            pl, path_xs, path_ys; color=:white, markersize=6, markerstrokewidth=0, label=false
+            pl,
+            path_xs,
+            path_ys;
+            color=:white,
+            markersize=6,
+            markerstrokewidth=0,
+            label=false,
         )
     end
     Plots.scatter!(
         pl, [1], [1]; color=:seagreen, markersize=8, markershape=:square, label=false
     )
     Plots.scatter!(
-        pl,
-        [cols],
-        [rows];
-        color=:crimson,
-        markersize=8,
-        markershape=:square,
-        label=false,
+        pl, [cols], [rows]; color=:crimson, markersize=8, markershape=:square, label=false
     )
     return pl
 end
diff --git a/ext/plots/svs_plots.jl b/ext/plots/svs_plots.jl
index f048b9a..f660ff2 100644
--- a/ext/plots/svs_plots.jl
+++ b/ext/plots/svs_plots.jl
@@ -3,15 +3,17 @@ import DecisionFocusedLearningBenchmarks.StochasticVehicleScheduling:
 
 has_visualization(::StochasticVehicleSchedulingBenchmark) = true
 
-function plot_context(::StochasticVehicleSchedulingBenchmark, sample::DataSample; kwargs...)
-    @assert hasproperty(sample.instance, :city) "Sample does not contain city information."
-    (; tasks, district_width, width) = sample.instance.city
+# ── helpers ────────────────────────────────────────────────────────────────────
+
+function _plot_city(city; colormap=:turbo, task_markersize=7, depot_markersize=9, kwargs...)
+    (; tasks, district_width, width) = city
     ticks = 0:district_width:width
-    max_time = maximum(t.end_time for t in sample.instance.city.tasks[1:(end - 1)])
+    max_time = maximum(t.end_time for t in tasks[2:(end - 1)])
     fig = Plots.plot(;
         xlabel="x",
         ylabel="y",
-        gridlinewidth=3,
+        gridlinewidth=1,
+        gridlinealpha=0.3,
         aspect_ratio=:equal,
         size=(500, 500),
         xticks=ticks,
@@ -23,78 +25,90 @@ function plot_context(::StochasticVehicleSchedulingBenchmark, sample::DataSample
         colorbar_title="Time",
         kwargs...,
     )
-    Plots.scatter!(
-        fig,
-        [tasks[1].start_point.x],
-        [tasks[1].start_point.y];
-        label=nothing,
-        marker=:rect,
-        markersize=10,
-    )
-    Plots.annotate!(
-        fig, (tasks[1].start_point.x, tasks[1].start_point.y, Plots.text("0", 10))
-    )
-    for (i_task, task) in enumerate(tasks[2:(end - 1)])
+    for task in tasks[2:(end - 1)]
         (; start_point, end_point) = task
-        points = [(start_point.x, start_point.y), (end_point.x, end_point.y)]
-        Plots.plot!(fig, points; color=:black, label=nothing)
+        Plots.plot!(
+            fig,
+            [start_point.x, end_point.x],
+            [start_point.y, end_point.y];
+            color=:gray70,
+            linewidth=1,
+            label=nothing,
+        )
         Plots.scatter!(
             fig,
-            points[1];
-            markersize=10,
+            [start_point.x],
+            [start_point.y];
+            markersize=task_markersize,
             marker=:rect,
             marker_z=task.start_time,
-            colormap=:turbo,
+            colormap=colormap,
             label=nothing,
         )
         Plots.scatter!(
             fig,
-            points[2];
-            markersize=10,
+            [end_point.x],
+            [end_point.y];
+            markersize=task_markersize,
             marker=:rect,
             marker_z=task.end_time,
-            colormap=:turbo,
+            colormap=colormap,
             label=nothing,
         )
-        Plots.annotate!(fig, (points[1]..., Plots.text("$(i_task)", 10)))
     end
+    Plots.scatter!(
+        fig,
+        [tasks[1].start_point.x],
+        [tasks[1].start_point.y];
+        label=nothing,
+        marker=:rect,
+        markersize=depot_markersize,
+        markercolor=:black,
+    )
     return fig
 end
 
-function plot_sample(::StochasticVehicleSchedulingBenchmark, sample::DataSample; kwargs...)
-    @assert hasproperty(sample.instance, :city) "Sample does not contain city information."
-    (; tasks, district_width, width) = sample.instance.city
-    ticks = 0:district_width:width
-    solution = Solution(sample.y, sample.instance)
-    path_list = compute_path_list(solution)
-    fig = Plots.plot(;
-        xlabel="x",
-        ylabel="y",
-        legend=false,
-        gridlinewidth=3,
-        aspect_ratio=:equal,
-        size=(500, 500),
-        xticks=ticks,
-        yticks=ticks,
-        xlims=(-1, width + 1),
-        ylims=(-1, width + 1),
-        kwargs...,
-    )
+function _plot_routes(fig, city, path_list; route_linewidth=2, route_alpha=0.7)
+    (; tasks) = city
     for path in path_list
         X = Float64[]
         Y = Float64[]
-        (; start_point, end_point) = tasks[path[1]]
-        (; x, y) = end_point
-        push!(X, x)
-        push!(Y, y)
-        for task in path[2:end]
-            (; start_point, end_point) = tasks[task]
+        (; end_point) = tasks[path[1]]
+        push!(X, end_point.x)
+        push!(Y, end_point.y)
+        for task_idx in path[2:end]
+            (; start_point, end_point) = tasks[task_idx]
             push!(X, start_point.x)
             push!(Y, start_point.y)
             push!(X, end_point.x)
             push!(Y, end_point.y)
         end
-        Plots.plot!(fig, X, Y; marker=:circle)
+        Plots.plot!(
+            fig,
+            X,
+            Y;
+            linewidth=route_linewidth,
+            alpha=route_alpha,
+            label=false,
+            z_order=:back,
+        )
     end
     return fig
 end
+
+# ── interface methods ──────────────────────────────────────────────────────────
+
+function plot_context(::StochasticVehicleSchedulingBenchmark, sample::DataSample; kwargs...)
+    @assert hasproperty(sample.instance, :city) "Sample does not contain city information."
+    return _plot_city(sample.instance.city; kwargs...)
+end
+
+function plot_sample(::StochasticVehicleSchedulingBenchmark, sample::DataSample; kwargs...)
+    @assert hasproperty(sample.instance, :city) "Sample does not contain city information."
+    city = sample.instance.city
+    fig = _plot_city(city; kwargs...)
+    solution = Solution(sample.y, sample.instance)
+    path_list = compute_path_list(solution)
+    _plot_routes(fig, city, path_list)
+    return fig
+end
diff --git a/src/Utils/policy.jl b/src/Utils/policy.jl
index 5eb0c6d..f20a6fb 100644
--- a/src/Utils/policy.jl
+++ b/src/Utils/policy.jl
@@ -39,12 +39,14 @@ function evaluate_policy!(
     end
     total_reward = 0.0
     labeled_dataset = DataSample[]
+    step = 0
     while !is_terminated(env)
+        step += 1
         y = policy(env; kwargs...)
         features, state = observe(env)
         state_copy = deepcopy(state)  # To avoid mutation issues
         reward = step!(env, y)
-        sample = DataSample(; x=features, y=y, instance=state_copy, extra=(; reward))
+        sample = DataSample(; x=features, y=y, instance=state_copy, extra=(; reward, step))
         if isempty(labeled_dataset)
             labeled_dataset = typeof(sample)[sample]
         else

From 23b3ee893d5c48974874a237fb78dbdfa4fc8abe Mon Sep 17 00:00:00 2001
From: BatyLeo <leo.baty67@gmail.com>
Date: Tue, 5 May 2026 18:38:42 +0200
Subject: [PATCH 19/21] fix: formatting

---
 ext/plots/dvs_plots.jl | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/ext/plots/dvs_plots.jl b/ext/plots/dvs_plots.jl
index 3786fd3..7352593 100644
--- a/ext/plots/dvs_plots.jl
+++ b/ext/plots/dvs_plots.jl
@@ -46,8 +46,9 @@ function plot_state(
     show_colorbar=true,
     kwargs...,
 )
-    (; x_depot, y_depot, x_customers, y_customers, is_must_dispatch, start_times) =
-        DVS.build_state_data(state)
+    (; x_depot, y_depot, x_customers, y_customers, is_must_dispatch, start_times) = DVS.build_state_data(
+        state
+    )
 
     xlabel = show_axis_labels ? "x coordinate" : ""
     ylabel = show_axis_labels ? "y coordinate" : ""

From 823650d12e8df1b82f29b74d8687df035e082b19 Mon Sep 17 00:00:00 2001
From: BatyLeo <leo.baty67@gmail.com>
Date: Wed, 6 May 2026 10:09:29 +0200
Subject: [PATCH 20/21] fix: remove useless method

---
 ext/DFLBenchmarksPlotsExt.jl          | 18 ++++--------------
 ext/plots/argmax2d_plots.jl           |  6 ------
 ext/plots/dynamic_assortment_plots.jl |  4 ----
 ext/plots/maintenance_plots.jl        |  4 ----
 test/argmax.jl                        |  2 +-
 test/argmax_2d.jl                     |  2 +-
 test/contextual_stochastic_argmax.jl  |  2 +-
 test/dynamic_assortment.jl            |  2 +-
 test/fixed_size_shortest_path.jl      |  2 +-
 test/maintenance.jl                   |  2 +-
 test/portfolio_optimization.jl        |  2 +-
 test/ranking.jl                       |  2 +-
 test/subset_selection.jl              |  2 +-
 test/warcraft.jl                      |  2 +-
 14 files changed, 14 insertions(+), 38 deletions(-)

diff --git a/ext/DFLBenchmarksPlotsExt.jl b/ext/DFLBenchmarksPlotsExt.jl
index bed6b77..117d174 100644
--- a/ext/DFLBenchmarksPlotsExt.jl
+++ b/ext/DFLBenchmarksPlotsExt.jl
@@ -7,6 +7,10 @@ using Plots
 import DecisionFocusedLearningBenchmarks:
     has_visualization, plot_context, plot_sample, plot_trajectory, animate_trajectory
 
+function _step_str(sample::DataSample)
+    return hasproperty(sample, :step) ? " (step $(sample.step))" : ""
+end
+
 include("plots/argmax_plots.jl")
 include("plots/argmax2d_plots.jl")
 include("plots/ranking_plots.jl")
@@ -20,18 +24,4 @@ include("plots/dvs_plots.jl")
 include("plots/dynamic_assortment_plots.jl")
 include("plots/maintenance_plots.jl")
 
-"""
-    plot_sample(bench::AbstractBenchmark, sample::DataSample, y; kwargs...)
-
-Reconstruct a new sample with `y` overridden and delegate to the 2-arg
-[`plot_sample`](@ref). Only available when `Plots` is loaded.
-"""
-function plot_sample(bench::AbstractBenchmark, sample::DataSample, y; kwargs...)
-    return plot_sample(
-        bench,
-        DataSample(; sample.context..., x=sample.x, θ=sample.θ, y=y, extra=sample.extra);
-        kwargs...,
-    )
-end
-
 end
diff --git a/ext/plots/argmax2d_plots.jl b/ext/plots/argmax2d_plots.jl
index 6f4d2d8..cc348b6 100644
--- a/ext/plots/argmax2d_plots.jl
+++ b/ext/plots/argmax2d_plots.jl
@@ -58,9 +58,3 @@ function plot_sample(::Argmax2DBenchmark, sample::DataSample; kwargs...)
     return _plot_y!(pl, sample.y)
 end
 
-function plot_sample(::Argmax2DBenchmark, sample::DataSample, y; θ=sample.θ, kwargs...)
-    pl = _init_plot(; kwargs...)
-    _plot_polytope!(pl, sample.instance)
-    _plot_objective!(pl, θ)
-    return _plot_y!(pl, y)
-end
diff --git a/ext/plots/dynamic_assortment_plots.jl b/ext/plots/dynamic_assortment_plots.jl
index ef52d72..3aa310d 100644
--- a/ext/plots/dynamic_assortment_plots.jl
+++ b/ext/plots/dynamic_assortment_plots.jl
@@ -1,9 +1,5 @@
 has_visualization(::DynamicAssortmentBenchmark) = true
 
-function _step_str(sample::DataSample)
-    return hasproperty(sample, :step) ? " (step $(sample.step))" : ""
-end
-
 function plot_context(::DynamicAssortmentBenchmark, sample::DataSample; kwargs...)
     prices = sample.instance[1][1, :] .* 10
     N = length(prices)
diff --git a/ext/plots/maintenance_plots.jl b/ext/plots/maintenance_plots.jl
index 2d8f328..490a166 100644
--- a/ext/plots/maintenance_plots.jl
+++ b/ext/plots/maintenance_plots.jl
@@ -4,10 +4,6 @@ function _degradation_colors(state, n)
     return [s == n ? :firebrick : :steelblue for s in state]
 end
 
-function _step_str(sample::DataSample)
-    return hasproperty(sample, :step) ? " (step $(sample.step))" : ""
-end
-
 function plot_context(bench::MaintenanceBenchmark, sample::DataSample; kwargs...)
     state = sample.instance
     N = length(state)
diff --git a/test/argmax.jl b/test/argmax.jl
index 14598b9..aca8f98 100644
--- a/test/argmax.jl
+++ b/test/argmax.jl
@@ -41,7 +41,7 @@
         @test fig1 isa Plots.Plot
         fig2 = plot_sample(b, dataset[1])
         @test fig2 isa Plots.Plot
-        fig3 = plot_sample(b, dataset[1], dataset[2].y)
+        fig3 = plot_sample(b, DataSample(dataset[1]; y=dataset[2].y))
         @test fig3 isa Plots.Plot
     end
 end
diff --git a/test/argmax_2d.jl b/test/argmax_2d.jl
index 683c582..5447e7b 100644
--- a/test/argmax_2d.jl
+++ b/test/argmax_2d.jl
@@ -21,7 +21,7 @@
     @test figure isa Plots.Plot
     figure2 = plot_context(b, dataset[1])
     @test figure2 isa Plots.Plot
-    figure3 = plot_sample(b, dataset[1], dataset[2].y)
+    figure3 = plot_sample(b, DataSample(dataset[1]; y=dataset[2].y))
     @test figure3 isa Plots.Plot
 
     for (i, sample) in enumerate(dataset)
diff --git a/test/contextual_stochastic_argmax.jl b/test/contextual_stochastic_argmax.jl
index ed99e43..79533cc 100644
--- a/test/contextual_stochastic_argmax.jl
+++ b/test/contextual_stochastic_argmax.jl
@@ -129,6 +129,6 @@ end
     @test fig1 isa Plots.Plot
     fig2 = plot_sample(b, sample)
     @test fig2 isa Plots.Plot
-    fig3 = plot_sample(b, sample, dataset[2].y)
+    fig3 = plot_sample(b, DataSample(sample; y=dataset[2].y))
     @test fig3 isa Plots.Plot
 end
diff --git a/test/dynamic_assortment.jl b/test/dynamic_assortment.jl
index 93dce32..40a27ca 100644
--- a/test/dynamic_assortment.jl
+++ b/test/dynamic_assortment.jl
@@ -371,7 +371,7 @@ end
     @test fig1 isa Plots.Plot
     fig2 = plot_sample(b, traj[1])
     @test fig2 isa Plots.Plot
-    fig3 = plot_sample(b, traj[1], traj[2].y)
+    fig3 = plot_sample(b, DataSample(traj[1]; y=traj[2].y))
     @test fig3 isa Plots.Plot
     fig4 = plot_trajectory(b, traj)
     @test fig4 isa Plots.Plot
diff --git a/test/fixed_size_shortest_path.jl b/test/fixed_size_shortest_path.jl
index 79d35da..33fe679 100644
--- a/test/fixed_size_shortest_path.jl
+++ b/test/fixed_size_shortest_path.jl
@@ -40,7 +40,7 @@
         @test fig1 isa Plots.Plot
         fig2 = plot_sample(b, dataset[1])
         @test fig2 isa Plots.Plot
-        fig3 = plot_sample(b, dataset[1], dataset[2].y)
+        fig3 = plot_sample(b, DataSample(dataset[1]; y=dataset[2].y))
         @test fig3 isa Plots.Plot
     end
 end
diff --git a/test/maintenance.jl b/test/maintenance.jl
index 070c69b..8aa4b15 100644
--- a/test/maintenance.jl
+++ b/test/maintenance.jl
@@ -240,7 +240,7 @@ end
     @test fig1 isa Plots.Plot
     fig2 = plot_sample(b, traj[1])
     @test fig2 isa Plots.Plot
-    fig3 = plot_sample(b, traj[1], traj[2].y)
+    fig3 = plot_sample(b, DataSample(traj[1]; y=traj[2].y))
     @test fig3 isa Plots.Plot
     fig4 = plot_trajectory(b, traj)
     @test fig4 isa Plots.Plot
diff --git a/test/portfolio_optimization.jl b/test/portfolio_optimization.jl
index 7f63ba2..922e14f 100644
--- a/test/portfolio_optimization.jl
+++ b/test/portfolio_optimization.jl
@@ -37,7 +37,7 @@
         @test fig1 isa Plots.Plot
         fig2 = plot_sample(b, dataset[1])
         @test fig2 isa Plots.Plot
-        fig3 = plot_sample(b, dataset[1], dataset[2].y)
+        fig3 = plot_sample(b, DataSample(dataset[1]; y=dataset[2].y))
         @test fig3 isa Plots.Plot
     end
 end
diff --git a/test/ranking.jl b/test/ranking.jl
index 2991ffd..68aa6cf 100644
--- a/test/ranking.jl
+++ b/test/ranking.jl
@@ -42,7 +42,7 @@
         @test fig1 isa Plots.Plot
         fig2 = plot_sample(b, dataset[1])
         @test fig2 isa Plots.Plot
-        fig3 = plot_sample(b, dataset[1], dataset[2].y)
+        fig3 = plot_sample(b, DataSample(dataset[1]; y=dataset[2].y))
         @test fig3 isa Plots.Plot
     end
 end
diff --git a/test/subset_selection.jl b/test/subset_selection.jl
index 609eb0c..fa85e21 100644
--- a/test/subset_selection.jl
+++ b/test/subset_selection.jl
@@ -48,7 +48,7 @@
         @test fig1 isa Plots.Plot
         fig2 = plot_sample(b_identity, dataset[1])
         @test fig2 isa Plots.Plot
-        fig3 = plot_sample(b_identity, dataset[1], dataset[2].y)
+        fig3 = plot_sample(b_identity, DataSample(dataset[1]; y=dataset[2].y))
         @test fig3 isa Plots.Plot
     end
 end
diff --git a/test/warcraft.jl b/test/warcraft.jl
index a801e49..49f5ba0 100644
--- a/test/warcraft.jl
+++ b/test/warcraft.jl
@@ -18,7 +18,7 @@
     @test figure isa Plots.Plot
     figure2 = plot_context(b, dataset[1])
     @test figure2 isa Plots.Plot
-    figure3 = plot_sample(b, dataset[1], dataset[2].y)
+    figure3 = plot_sample(b, DataSample(dataset[1]; y=dataset[2].y))
     @test figure3 isa Plots.Plot
     gap = compute_gap(b, dataset, model, dijkstra_maximizer)
     @test gap >= 0

From 4eb081efc8b1086c17ed17130a430b0121f543cb Mon Sep 17 00:00:00 2001
From: BatyLeo <leo.baty67@gmail.com>
Date: Wed, 6 May 2026 10:09:59 +0200
Subject: [PATCH 21/21] fix: formatting

---
 ext/plots/argmax2d_plots.jl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/ext/plots/argmax2d_plots.jl b/ext/plots/argmax2d_plots.jl
index cc348b6..d609213 100644
--- a/ext/plots/argmax2d_plots.jl
+++ b/ext/plots/argmax2d_plots.jl
@@ -57,4 +57,3 @@ function plot_sample(::Argmax2DBenchmark, sample::DataSample; kwargs...)
     _plot_objective!(pl, sample.θ)
     return _plot_y!(pl, sample.y)
 end
-