Skip to content

Commit 0909980

Browse files
authored
Default to not checking for duplicates (#1431)
1 parent a098193 commit 0909980

File tree

6 files changed

+18
-27
lines changed

6 files changed

+18
-27
lines changed

openml/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ def _resolve_default_cache_dir() -> Path:
150150
"apikey": "",
151151
"server": "https://www.openml.org/api/v1/xml",
152152
"cachedir": _resolve_default_cache_dir(),
153-
"avoid_duplicate_runs": True,
153+
"avoid_duplicate_runs": False,
154154
"retry_policy": "human",
155155
"connection_n_retries": 5,
156156
"show_progress": False,

openml/runs/functions.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@
5959
def run_model_on_task( # noqa: PLR0913
6060
model: Any,
6161
task: int | str | OpenMLTask,
62-
avoid_duplicate_runs: bool = True, # noqa: FBT001, FBT002
62+
avoid_duplicate_runs: bool | None = None,
6363
flow_tags: list[str] | None = None,
6464
seed: int | None = None,
6565
add_local_measures: bool = True, # noqa: FBT001, FBT002
@@ -77,9 +77,10 @@ def run_model_on_task( # noqa: PLR0913
7777
task : OpenMLTask or int or str
7878
Task to perform or Task id.
7979
This may be a model instead if the first argument is an OpenMLTask.
80-
avoid_duplicate_runs : bool, optional (default=True)
80+
avoid_duplicate_runs : bool, optional (default=None)
8181
If True, the run will throw an error if the setup/task combination is already present on
8282
the server. This feature requires an internet connection.
83+
If not set, it will use the default from your openml configuration (False if unset).
8384
flow_tags : List[str], optional (default=None)
8485
A list of tags that the flow should have at creation.
8586
seed: int, optional (default=None)
@@ -104,6 +105,8 @@ def run_model_on_task( # noqa: PLR0913
104105
flow : OpenMLFlow (optional, only if `return_flow` is True).
105106
Flow generated from the model.
106107
"""
108+
if avoid_duplicate_runs is None:
109+
avoid_duplicate_runs = openml.config.avoid_duplicate_runs
107110
if avoid_duplicate_runs and not config.apikey:
108111
warnings.warn(
109112
"avoid_duplicate_runs is set to True, but no API key is set. "
@@ -175,7 +178,7 @@ def get_task_and_type_conversion(_task: int | str | OpenMLTask) -> OpenMLTask:
175178
def run_flow_on_task( # noqa: C901, PLR0912, PLR0915, PLR0913
176179
flow: OpenMLFlow,
177180
task: OpenMLTask,
178-
avoid_duplicate_runs: bool = True, # noqa: FBT002, FBT001
181+
avoid_duplicate_runs: bool | None = None,
179182
flow_tags: list[str] | None = None,
180183
seed: int | None = None,
181184
add_local_measures: bool = True, # noqa: FBT001, FBT002
@@ -195,9 +198,10 @@ def run_flow_on_task( # noqa: C901, PLR0912, PLR0915, PLR0913
195198
all supervised estimators of scikit learn follow this definition of a model.
196199
task : OpenMLTask
197200
Task to perform. This may be an OpenMLFlow instead if the first argument is an OpenMLTask.
198-
avoid_duplicate_runs : bool, optional (default=True)
201+
avoid_duplicate_runs : bool, optional (default=None)
199202
If True, the run will throw an error if the setup/task combination is already present on
200203
the server. This feature requires an internet connection.
204+
If not set, it will use the default from your openml configuration (False if unset).
201205
flow_tags : List[str], optional (default=None)
202206
A list of tags that the flow should have at creation.
203207
seed: int, optional (default=None)
@@ -221,6 +225,9 @@ def run_flow_on_task( # noqa: C901, PLR0912, PLR0915, PLR0913
221225
if flow_tags is not None and not isinstance(flow_tags, list):
222226
raise ValueError("flow_tags should be a list")
223227

228+
if avoid_duplicate_runs is None:
229+
avoid_duplicate_runs = openml.config.avoid_duplicate_runs
230+
224231
# TODO: At some point in the future do not allow for arguments in old order (changed 6-2018).
225232
# Flexibility currently still allowed due to code-snippet in OpenML100 paper (3-2019).
226233
if isinstance(flow, OpenMLTask) and isinstance(task, OpenMLFlow):

openml/testing.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,6 @@ def setUp(self, n_levels: int = 1, tmpdir_suffix: str = "") -> None:
101101
self.cached = True
102102
openml.config.apikey = TestBase.apikey
103103
self.production_server = "https://www.openml.org/api/v1/xml"
104-
openml.config.avoid_duplicate_runs = False
105104
openml.config.set_root_cache_directory(str(self.workdir))
106105

107106
# Increase the number of retries to avoid spurious server failures

tests/test_openml/test_config.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -175,13 +175,14 @@ def test_configuration_file_not_overwritten_on_load():
175175

176176
def test_configuration_loads_booleans(tmp_path):
177177
config_file_content = "avoid_duplicate_runs=true\nshow_progress=false"
178-
with (tmp_path / "config").open("w") as config_file:
178+
tmp_file = tmp_path / "config"
179+
with tmp_file.open("w") as config_file:
179180
config_file.write(config_file_content)
180-
read_config = openml.config._parse_config(tmp_path)
181+
read_config = openml.config._parse_config(tmp_file)
181182

182183
# Explicit test to avoid truthy/falsy modes of other types
183-
assert True == read_config["avoid_duplicate_runs"]
184-
assert False == read_config["show_progress"]
184+
assert read_config["avoid_duplicate_runs"] is True
185+
assert read_config["show_progress"] is False
185186

186187

187188
def test_openml_cache_dir_env_var(tmp_path: Path) -> None:

tests/test_runs/test_run.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,6 @@ def test_to_from_filesystem_vanilla(self):
130130
model=model,
131131
task=task,
132132
add_local_measures=False,
133-
avoid_duplicate_runs=False,
134133
upload_flow=True,
135134
)
136135

@@ -174,7 +173,6 @@ def test_to_from_filesystem_search(self):
174173
model=model,
175174
task=task,
176175
add_local_measures=False,
177-
avoid_duplicate_runs=False,
178176
)
179177

180178
cache_path = os.path.join(self.workdir, "runs", str(random.getrandbits(128)))
@@ -311,7 +309,6 @@ def test_publish_with_local_loaded_flow(self):
311309
flow=flow,
312310
task=task,
313311
add_local_measures=False,
314-
avoid_duplicate_runs=False,
315312
upload_flow=False,
316313
)
317314

@@ -351,7 +348,6 @@ def test_offline_and_online_run_identical(self):
351348
flow=flow,
352349
task=task,
353350
add_local_measures=False,
354-
avoid_duplicate_runs=False,
355351
upload_flow=False,
356352
)
357353

tests/test_runs/test_run_functions.py

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -181,14 +181,12 @@ def _rerun_model_and_compare_predictions(self, run_id, model_prime, seed, create
181181
run_prime = openml.runs.run_model_on_task(
182182
model=model_prime,
183183
task=task,
184-
avoid_duplicate_runs=False,
185184
seed=seed,
186185
)
187186
else:
188187
run_prime = openml.runs.run_model_on_task(
189188
model=model_prime,
190189
task=run.task_id,
191-
avoid_duplicate_runs=False,
192190
seed=seed,
193191
)
194192

@@ -278,7 +276,6 @@ def _remove_random_state(flow):
278276
flow=flow,
279277
task=task,
280278
seed=seed,
281-
avoid_duplicate_runs=openml.config.avoid_duplicate_runs,
282279
)
283280
run_ = run.publish()
284281
TestBase._mark_entity_for_removal("run", run.run_id)
@@ -414,7 +411,6 @@ def test_run_regression_on_classif_task(self):
414411
openml.runs.run_model_on_task(
415412
model=clf,
416413
task=task,
417-
avoid_duplicate_runs=False,
418414
)
419415

420416
@pytest.mark.sklearn()
@@ -969,7 +965,6 @@ def test_initialize_cv_from_run(self):
969965
run = openml.runs.run_model_on_task(
970966
model=randomsearch,
971967
task=task,
972-
avoid_duplicate_runs=False,
973968
seed=1,
974969
)
975970
run_ = run.publish()
@@ -1026,7 +1021,6 @@ def test_local_run_swapped_parameter_order_model(self):
10261021
run = openml.runs.run_model_on_task(
10271022
task,
10281023
clf,
1029-
avoid_duplicate_runs=False,
10301024
upload_flow=False,
10311025
)
10321026

@@ -1055,7 +1049,6 @@ def test_local_run_swapped_parameter_order_flow(self):
10551049
run = openml.runs.run_flow_on_task(
10561050
task,
10571051
flow,
1058-
avoid_duplicate_runs=False,
10591052
upload_flow=False,
10601053
)
10611054

@@ -1083,7 +1076,6 @@ def test_local_run_metric_score(self):
10831076
run = openml.runs.run_model_on_task(
10841077
model=clf,
10851078
task=task,
1086-
avoid_duplicate_runs=False,
10871079
upload_flow=False,
10881080
)
10891081

@@ -1142,7 +1134,6 @@ def test_initialize_model_from_run(self):
11421134
run = openml.runs.run_model_on_task(
11431135
model=clf,
11441136
task=task,
1145-
avoid_duplicate_runs=False,
11461137
)
11471138
run_ = run.publish()
11481139
TestBase._mark_entity_for_removal("run", run_.run_id)
@@ -1251,7 +1242,6 @@ def test_run_with_illegal_flow_id_after_load(self):
12511242
run = openml.runs.run_flow_on_task(
12521243
task=task,
12531244
flow=flow,
1254-
avoid_duplicate_runs=False,
12551245
upload_flow=False,
12561246
)
12571247

@@ -1316,7 +1306,6 @@ def test_run_with_illegal_flow_id_1_after_load(self):
13161306
run = openml.runs.run_flow_on_task(
13171307
task=task,
13181308
flow=flow_new,
1319-
avoid_duplicate_runs=False,
13201309
upload_flow=False,
13211310
)
13221311

@@ -1664,7 +1653,6 @@ def test_run_flow_on_task_downloaded_flow(self):
16641653
run = openml.runs.run_flow_on_task(
16651654
flow=downloaded_flow,
16661655
task=task,
1667-
avoid_duplicate_runs=False,
16681656
upload_flow=False,
16691657
)
16701658

@@ -1913,7 +1901,7 @@ def test_delete_run(self):
19131901
task = openml.tasks.get_task(32) # diabetes; crossvalidation
19141902

19151903
run = openml.runs.run_model_on_task(
1916-
model=clf, task=task, seed=rs, avoid_duplicate_runs=False
1904+
model=clf, task=task, seed=rs,
19171905
)
19181906
run.publish()
19191907

0 commit comments

Comments
 (0)