-
-
Notifications
You must be signed in to change notification settings - Fork 272
MNT: Replace live server calls with mocks in tests/test_utils/test_utils.py #1679
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
65e8f7b
73d12dc
c145567
caec466
4afa10c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,8 +2,13 @@ | |
|
|
||
| import os | ||
| import unittest.mock | ||
|
|
||
| import pandas as pd | ||
| import pytest | ||
|
|
||
| import openml | ||
| from openml.evaluations.evaluation import OpenMLEvaluation | ||
| from openml.setups.setup import OpenMLSetup | ||
| from openml.testing import _check_dataset | ||
|
|
||
|
|
||
|
|
@@ -43,24 +48,47 @@ def min_number_evaluations_on_test_server() -> int: | |
| return 8 | ||
|
|
||
|
|
||
| def _create_mock_listing_call(total_items, item_factory, return_type="dataframe"): | ||
| def mock_listing_call(limit, offset, **kwargs): | ||
| if offset >= total_items: | ||
| return pd.DataFrame() if return_type == "dataframe" else [] | ||
| size = min(limit, total_items - offset) | ||
| items = [item_factory(i) for i in range(offset, offset + size)] | ||
| return pd.DataFrame(items) if return_type == "dataframe" else items | ||
| return mock_listing_call | ||
| def _mocked_perform_api_call(call, request_method): | ||
| url = openml.config.server + call | ||
| return openml._api_calls._download_text_file(url) | ||
|
|
||
|
|
||
| @pytest.mark.test_server() | ||
| def test_list_all(): | ||
| if call == "data/list/limit/1000/offset/0/data_name/iris/data_version/1": | ||
| return """<oml:data xmlns:oml="http://openml.org/openml"> | ||
| <oml:dataset> | ||
| <oml:did>61</oml:did> | ||
| <oml:name>iris</oml:name> | ||
| <oml:version>1</oml:version> | ||
| <oml:status>active</oml:status> | ||
| </oml:dataset> | ||
| </oml:data>""" | ||
| raise ValueError(f"Unexpected call: {call}") | ||
|
|
||
|
|
||
|
Comment on lines
+61
to
+71
|
||
| @unittest.mock.patch("openml.tasks.functions._list_tasks") | ||
| def test_list_all(mock_list_tasks): | ||
| mock_list_tasks.side_effect = _create_mock_listing_call(10, lambda i: {"tid": i}) | ||
| openml.utils._list_all(listing_call=openml.tasks.functions._list_tasks) | ||
|
|
||
|
|
||
| @pytest.mark.test_server() | ||
| def test_list_all_for_tasks(min_number_tasks_on_test_server): | ||
| @unittest.mock.patch("openml.tasks.functions._list_tasks") | ||
| def test_list_all_for_tasks(mock_list_tasks, min_number_tasks_on_test_server): | ||
| mock_list_tasks.side_effect = _create_mock_listing_call( | ||
| min_number_tasks_on_test_server, lambda i: {"tid": i} | ||
| ) | ||
| tasks = openml.tasks.list_tasks(size=min_number_tasks_on_test_server) | ||
| assert min_number_tasks_on_test_server == len(tasks) | ||
|
|
||
|
|
||
|
Comment on lines
+80
to
86
|
||
| @pytest.mark.test_server() | ||
| def test_list_all_with_multiple_batches(min_number_tasks_on_test_server): | ||
| @unittest.mock.patch("openml.tasks.functions._list_tasks") | ||
| def test_list_all_with_multiple_batches(mock_list_tasks, min_number_tasks_on_test_server): | ||
| mock_list_tasks.side_effect = _create_mock_listing_call( | ||
| min_number_tasks_on_test_server, lambda i: {"tid": i} | ||
| ) | ||
| # By setting the batch size one lower than the minimum we guarantee at least two | ||
| # batches and at the same time do as few batches (roundtrips) as possible. | ||
| batch_size = min_number_tasks_on_test_server - 1 | ||
|
|
@@ -72,8 +100,11 @@ def test_list_all_with_multiple_batches(min_number_tasks_on_test_server): | |
| assert min_number_tasks_on_test_server <= sum(len(batch) for batch in batches) | ||
|
|
||
|
|
||
| @pytest.mark.test_server() | ||
| def test_list_all_for_datasets(min_number_datasets_on_test_server): | ||
| @unittest.mock.patch("openml.datasets.functions._list_datasets") | ||
| def test_list_all_for_datasets(mock_list_datasets, min_number_datasets_on_test_server): | ||
| mock_list_datasets.side_effect = _create_mock_listing_call( | ||
| min_number_datasets_on_test_server, lambda i: {"did": i, "status": "active"} | ||
| ) | ||
| datasets = openml.datasets.list_datasets( | ||
| size=min_number_datasets_on_test_server, | ||
| ) | ||
|
|
@@ -83,30 +114,57 @@ def test_list_all_for_datasets(min_number_datasets_on_test_server): | |
| _check_dataset(dataset) | ||
|
|
||
|
|
||
| @pytest.mark.test_server() | ||
| def test_list_all_for_flows(min_number_flows_on_test_server): | ||
| @unittest.mock.patch("openml.flows.functions._list_flows") | ||
| def test_list_all_for_flows(mock_list_flows, min_number_flows_on_test_server): | ||
| mock_list_flows.side_effect = _create_mock_listing_call( | ||
| min_number_flows_on_test_server, lambda i: {"id": i} | ||
| ) | ||
| flows = openml.flows.list_flows(size=min_number_flows_on_test_server) | ||
| assert min_number_flows_on_test_server == len(flows) | ||
|
|
||
|
|
||
| @pytest.mark.flaky() # Other tests might need to upload runs first | ||
| @pytest.mark.test_server() | ||
| def test_list_all_for_setups(min_number_setups_on_test_server): | ||
| @unittest.mock.patch("openml.setups.functions._list_setups") | ||
| def test_list_all_for_setups(mock_list_setups, min_number_setups_on_test_server): | ||
| mock_list_setups.side_effect = _create_mock_listing_call( | ||
| min_number_setups_on_test_server, | ||
| lambda i: OpenMLSetup(setup_id=i, flow_id=1, parameters={}), | ||
| return_type="list" | ||
Rahuldrabit marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| ) | ||
| # TODO apparently list_setups function does not support kwargs | ||
| setups = openml.setups.list_setups(size=min_number_setups_on_test_server) | ||
| assert min_number_setups_on_test_server == len(setups) | ||
|
|
||
|
|
||
| @pytest.mark.flaky() # Other tests might need to upload runs first | ||
| @pytest.mark.test_server() | ||
| def test_list_all_for_runs(min_number_runs_on_test_server): | ||
| @unittest.mock.patch("openml.runs.functions._list_runs") | ||
| def test_list_all_for_runs(mock_list_runs, min_number_runs_on_test_server): | ||
| mock_list_runs.side_effect = _create_mock_listing_call( | ||
| min_number_runs_on_test_server, lambda i: {"run_id": i} | ||
| ) | ||
| runs = openml.runs.list_runs(size=min_number_runs_on_test_server) | ||
| assert min_number_runs_on_test_server == len(runs) | ||
|
|
||
|
|
||
| @pytest.mark.flaky() # Other tests might need to upload runs first | ||
| @pytest.mark.test_server() | ||
| def test_list_all_for_evaluations(min_number_evaluations_on_test_server): | ||
| @unittest.mock.patch("openml.evaluations.functions._list_evaluations") | ||
| def test_list_all_for_evaluations(mock_list_evaluations, min_number_evaluations_on_test_server): | ||
| mock_list_evaluations.side_effect = _create_mock_listing_call( | ||
| min_number_evaluations_on_test_server, | ||
| lambda i: OpenMLEvaluation( | ||
| run_id=i, | ||
| task_id=1, | ||
| setup_id=1, | ||
| flow_id=1, | ||
| flow_name="flow", | ||
| data_id=1, | ||
| data_name="data", | ||
| function="predictive_accuracy", | ||
| upload_time="2020-01-01", | ||
| uploader=1, | ||
| uploader_name="user", | ||
| value=0.5, | ||
| values=None, | ||
| ), | ||
| return_type="list" | ||
| ) | ||
| # TODO apparently list_evaluations function does not support kwargs | ||
| evaluations = openml.evaluations.list_evaluations( | ||
| function="predictive_accuracy", | ||
|
|
@@ -116,7 +174,6 @@ def test_list_all_for_evaluations(min_number_evaluations_on_test_server): | |
|
|
||
|
|
||
| @unittest.mock.patch("openml._api_calls._perform_api_call", side_effect=_mocked_perform_api_call) | ||
| @pytest.mark.test_server() | ||
| def test_list_all_few_results_available(_perform_api_call): | ||
| datasets = openml.datasets.list_datasets(size=1000, data_name="iris", data_version=1) | ||
| assert len(datasets) == 1, "only one iris dataset version 1 should be present" | ||
|
|
@@ -141,14 +198,21 @@ def test__create_cache_directory(config_mock, tmp_path): | |
| openml.utils._create_cache_directory("ghi") | ||
|
|
||
|
|
||
| @pytest.mark.test_server() | ||
| def test_correct_test_server_download_state(): | ||
| @unittest.mock.patch("openml.tasks.get_task") | ||
| def test_correct_test_server_download_state(mock_get_task): | ||
| """This test verifies that the test server downloads the data from the correct source. | ||
|
|
||
| If this tests fails, it is highly likely that the test server is not configured correctly. | ||
| Usually, this means that the test server is serving data from the task with the same ID from the production server. | ||
| That is, it serves parquet files wrongly associated with the test server's task. | ||
| """ | ||
| mock_task = unittest.mock.Mock() | ||
| mock_dataset = unittest.mock.Mock() | ||
| mock_dataset.features = {0: "feature1", 1: "feature2"} | ||
| mock_dataset.get_data.return_value = (pd.DataFrame({"feature1": [1], "feature2": [2]}), None, None, None) | ||
| mock_task.get_dataset.return_value = mock_dataset | ||
| mock_get_task.return_value = mock_task | ||
|
|
||
| task = openml.tasks.get_task(119) | ||
|
Comment on lines
203
to
216
|
||
| dataset = task.get_dataset() | ||
| assert len(dataset.features) == dataset.get_data()[0].shape[1] | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
_create_mock_listing_call appends an empty batch when
offset >= total_items, which can leave an extra empty element in the_list_all()result list in cases wherelimitis None andtotal_itemsis an exact multiple ofbatch_size. This differs from real listing calls which often signal end-of-results viaOpenMLServerNoResult, and can subtly change downstream behavior (e.g., callers that expect no empty final batch). Consider raisingopenml.exceptions.OpenMLServerNoResultwhenoffset >= total_items, or ensure_list_all-style termination without adding an empty batch to results.