Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
116 changes: 90 additions & 26 deletions tests/test_utils/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,13 @@

import os
import unittest.mock

import pandas as pd
import pytest

import openml
from openml.evaluations.evaluation import OpenMLEvaluation
from openml.setups.setup import OpenMLSetup
from openml.testing import _check_dataset


Expand Down Expand Up @@ -43,24 +48,47 @@ def min_number_evaluations_on_test_server() -> int:
return 8


def _create_mock_listing_call(total_items, item_factory, return_type="dataframe"):
def mock_listing_call(limit, offset, **kwargs):
if offset >= total_items:
return pd.DataFrame() if return_type == "dataframe" else []
size = min(limit, total_items - offset)
items = [item_factory(i) for i in range(offset, offset + size)]
return pd.DataFrame(items) if return_type == "dataframe" else items
return mock_listing_call
Comment on lines +52 to +58
Copy link

Copilot AI Mar 30, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_create_mock_listing_call appends an empty batch when offset >= total_items, which can leave an extra empty element in the _list_all() result list in cases where limit is None and total_items is an exact multiple of batch_size. This differs from real listing calls which often signal end-of-results via OpenMLServerNoResult, and can subtly change downstream behavior (e.g., callers that expect no empty final batch). Consider raising openml.exceptions.OpenMLServerNoResult when offset >= total_items, or ensure _list_all-style termination without adding an empty batch to results.

Copilot uses AI. Check for mistakes.
def _mocked_perform_api_call(call, request_method):
url = openml.config.server + call
return openml._api_calls._download_text_file(url)


@pytest.mark.test_server()
def test_list_all():
if call == "data/list/limit/1000/offset/0/data_name/iris/data_version/1":
return """<oml:data xmlns:oml="http://openml.org/openml">
<oml:dataset>
<oml:did>61</oml:did>
<oml:name>iris</oml:name>
<oml:version>1</oml:version>
<oml:status>active</oml:status>
</oml:dataset>
</oml:data>"""
raise ValueError(f"Unexpected call: {call}")


Comment on lines +61 to +71
Copy link

Copilot AI Mar 30, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_mocked_perform_api_call hard-codes the full expected call string (including parameter order). This makes the test brittle to harmless refactors in URL construction (e.g., reordering filters) while not changing behavior. Consider matching on structured components (e.g., prefix + presence of required segments) or parsing the endpoint into parts before deciding which response to return.

Copilot uses AI. Check for mistakes.
@unittest.mock.patch("openml.tasks.functions._list_tasks")
def test_list_all(mock_list_tasks):
mock_list_tasks.side_effect = _create_mock_listing_call(10, lambda i: {"tid": i})
openml.utils._list_all(listing_call=openml.tasks.functions._list_tasks)


@pytest.mark.test_server()
def test_list_all_for_tasks(min_number_tasks_on_test_server):
@unittest.mock.patch("openml.tasks.functions._list_tasks")
def test_list_all_for_tasks(mock_list_tasks, min_number_tasks_on_test_server):
mock_list_tasks.side_effect = _create_mock_listing_call(
min_number_tasks_on_test_server, lambda i: {"tid": i}
)
tasks = openml.tasks.list_tasks(size=min_number_tasks_on_test_server)
assert min_number_tasks_on_test_server == len(tasks)


Comment on lines +80 to 86
Copy link

Copilot AI Mar 30, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These tests now use local mocks rather than the test server, but the fixture/variable name min_number_tasks_on_test_server (and similar) still suggests a dependency on the remote test server state. Renaming the parameter/fixture (and updating its docstring) to reflect that it’s just a mocked item count would make the intent clearer and avoid confusion about network requirements.

Copilot uses AI. Check for mistakes.
@pytest.mark.test_server()
def test_list_all_with_multiple_batches(min_number_tasks_on_test_server):
@unittest.mock.patch("openml.tasks.functions._list_tasks")
def test_list_all_with_multiple_batches(mock_list_tasks, min_number_tasks_on_test_server):
mock_list_tasks.side_effect = _create_mock_listing_call(
min_number_tasks_on_test_server, lambda i: {"tid": i}
)
# By setting the batch size one lower than the minimum we guarantee at least two
# batches and at the same time do as few batches (roundtrips) as possible.
batch_size = min_number_tasks_on_test_server - 1
Expand All @@ -72,8 +100,11 @@ def test_list_all_with_multiple_batches(min_number_tasks_on_test_server):
assert min_number_tasks_on_test_server <= sum(len(batch) for batch in batches)


@pytest.mark.test_server()
def test_list_all_for_datasets(min_number_datasets_on_test_server):
@unittest.mock.patch("openml.datasets.functions._list_datasets")
def test_list_all_for_datasets(mock_list_datasets, min_number_datasets_on_test_server):
mock_list_datasets.side_effect = _create_mock_listing_call(
min_number_datasets_on_test_server, lambda i: {"did": i, "status": "active"}
)
datasets = openml.datasets.list_datasets(
size=min_number_datasets_on_test_server,
)
Expand All @@ -83,30 +114,57 @@ def test_list_all_for_datasets(min_number_datasets_on_test_server):
_check_dataset(dataset)


@pytest.mark.test_server()
def test_list_all_for_flows(min_number_flows_on_test_server):
@unittest.mock.patch("openml.flows.functions._list_flows")
def test_list_all_for_flows(mock_list_flows, min_number_flows_on_test_server):
mock_list_flows.side_effect = _create_mock_listing_call(
min_number_flows_on_test_server, lambda i: {"id": i}
)
flows = openml.flows.list_flows(size=min_number_flows_on_test_server)
assert min_number_flows_on_test_server == len(flows)


@pytest.mark.flaky() # Other tests might need to upload runs first
@pytest.mark.test_server()
def test_list_all_for_setups(min_number_setups_on_test_server):
@unittest.mock.patch("openml.setups.functions._list_setups")
def test_list_all_for_setups(mock_list_setups, min_number_setups_on_test_server):
mock_list_setups.side_effect = _create_mock_listing_call(
min_number_setups_on_test_server,
lambda i: OpenMLSetup(setup_id=i, flow_id=1, parameters={}),
return_type="list"
)
# TODO apparently list_setups function does not support kwargs
setups = openml.setups.list_setups(size=min_number_setups_on_test_server)
assert min_number_setups_on_test_server == len(setups)


@pytest.mark.flaky() # Other tests might need to upload runs first
@pytest.mark.test_server()
def test_list_all_for_runs(min_number_runs_on_test_server):
@unittest.mock.patch("openml.runs.functions._list_runs")
def test_list_all_for_runs(mock_list_runs, min_number_runs_on_test_server):
mock_list_runs.side_effect = _create_mock_listing_call(
min_number_runs_on_test_server, lambda i: {"run_id": i}
)
runs = openml.runs.list_runs(size=min_number_runs_on_test_server)
assert min_number_runs_on_test_server == len(runs)


@pytest.mark.flaky() # Other tests might need to upload runs first
@pytest.mark.test_server()
def test_list_all_for_evaluations(min_number_evaluations_on_test_server):
@unittest.mock.patch("openml.evaluations.functions._list_evaluations")
def test_list_all_for_evaluations(mock_list_evaluations, min_number_evaluations_on_test_server):
mock_list_evaluations.side_effect = _create_mock_listing_call(
min_number_evaluations_on_test_server,
lambda i: OpenMLEvaluation(
run_id=i,
task_id=1,
setup_id=1,
flow_id=1,
flow_name="flow",
data_id=1,
data_name="data",
function="predictive_accuracy",
upload_time="2020-01-01",
uploader=1,
uploader_name="user",
value=0.5,
values=None,
),
return_type="list"
)
# TODO apparently list_evaluations function does not support kwargs
evaluations = openml.evaluations.list_evaluations(
function="predictive_accuracy",
Expand All @@ -116,7 +174,6 @@ def test_list_all_for_evaluations(min_number_evaluations_on_test_server):


@unittest.mock.patch("openml._api_calls._perform_api_call", side_effect=_mocked_perform_api_call)
@pytest.mark.test_server()
def test_list_all_few_results_available(_perform_api_call):
datasets = openml.datasets.list_datasets(size=1000, data_name="iris", data_version=1)
assert len(datasets) == 1, "only one iris dataset version 1 should be present"
Expand All @@ -141,14 +198,21 @@ def test__create_cache_directory(config_mock, tmp_path):
openml.utils._create_cache_directory("ghi")


@pytest.mark.test_server()
def test_correct_test_server_download_state():
@unittest.mock.patch("openml.tasks.get_task")
def test_correct_test_server_download_state(mock_get_task):
"""This test verifies that the test server downloads the data from the correct source.

If this tests fails, it is highly likely that the test server is not configured correctly.
Usually, this means that the test server is serving data from the task with the same ID from the production server.
That is, it serves parquet files wrongly associated with the test server's task.
"""
mock_task = unittest.mock.Mock()
mock_dataset = unittest.mock.Mock()
mock_dataset.features = {0: "feature1", 1: "feature2"}
mock_dataset.get_data.return_value = (pd.DataFrame({"feature1": [1], "feature2": [2]}), None, None, None)
mock_task.get_dataset.return_value = mock_dataset
mock_get_task.return_value = mock_task

task = openml.tasks.get_task(119)
Comment on lines 203 to 216
Copy link

Copilot AI Mar 30, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

test_correct_test_server_download_state no longer tests the stated behavior in its docstring: patching openml.tasks.get_task replaces the whole OpenML code path with a mock, so the assertion only validates the mock setup (features vs dataframe columns) rather than verifying the server/data-source configuration. Either (a) update/rename the test and docstring to reflect the new goal, or (b) mock at a lower level (e.g., _perform_api_call / download layer) so openml.tasks.get_task() and dataset parsing logic are still exercised offline.

Copilot uses AI. Check for mistakes.
dataset = task.get_dataset()
assert len(dataset.features) == dataset.get_data()[0].shape[1]
Expand Down