diff --git a/tests/files/mock_responses/evaluations/evaluation_list_run12.xml b/tests/files/mock_responses/evaluations/evaluation_list_run12.xml
new file mode 100644
index 000000000..96f3248c0
--- /dev/null
+++ b/tests/files/mock_responses/evaluations/evaluation_list_run12.xml
@@ -0,0 +1,15 @@
+
+
+ 12
+ 1
+ 1
+ 100
+ sklearn.ensemble.RandomForestClassifier
+ 1
+ anneal
+ predictive_accuracy
+ 2014-10-06T23:38:13
+ 1
+ 0.9876
+
+
diff --git a/tests/files/mock_responses/evaluations/user_list_1.xml b/tests/files/mock_responses/evaluations/user_list_1.xml
new file mode 100644
index 000000000..94960439f
--- /dev/null
+++ b/tests/files/mock_responses/evaluations/user_list_1.xml
@@ -0,0 +1,6 @@
+
+
+ 1
+ MockUser
+
+
diff --git a/tests/test_evaluations/test_evaluation_functions.py b/tests/test_evaluations/test_evaluation_functions.py
index e15556d7b..2c5e3ccaa 100644
--- a/tests/test_evaluations/test_evaluation_functions.py
+++ b/tests/test_evaluations/test_evaluation_functions.py
@@ -2,10 +2,12 @@
from __future__ import annotations
import pytest
+import requests
+from unittest import mock
import openml
import openml.evaluations
-from openml.testing import TestBase
+from openml.testing import TestBase, create_request_response
@pytest.mark.usefixtures("long_version")
@@ -125,24 +127,10 @@ def test_evaluation_list_filter_flow(self):
assert evaluations[run_id].values is None
@pytest.mark.production_server()
- def test_evaluation_list_filter_run(self):
+ def test_evaluation_list_filter_flow(self):
self.use_production_server()
- run_id = 12
-
- evaluations = openml.evaluations.list_evaluations(
- "predictive_accuracy",
- size=2,
- runs=[run_id],
- )
-
- assert len(evaluations) == 1
- for run_id in evaluations:
- assert evaluations[run_id].run_id == run_id
- # default behaviour of this method: return aggregated results (not
- # per fold)
- assert evaluations[run_id].value is not None
- assert evaluations[run_id].values is None
+ flow_id = 100
@pytest.mark.production_server()
def test_evaluation_list_limit(self):
@@ -264,3 +252,38 @@ def test_list_evaluations_setups_filter_task(self):
task_id = [6]
size = 121
self._check_list_evaluation_setups(tasks=task_id, size=size)
+
+
+@mock.patch.object(requests.Session, "get")
+def test_evaluation_list_filter_run(mock_get, test_files_directory, test_api_key):
+ mock_dir = test_files_directory / "mock_responses" / "evaluations"
+
+ # GET #1: evaluation list response
+ eval_response = create_request_response(
+ status_code=200,
+ content_filepath=mock_dir / "evaluation_list_run12.xml",
+ )
+ # GET #2: user list response (called internally to resolve uploader names)
+ user_response = create_request_response(
+ status_code=200,
+ content_filepath=mock_dir / "user_list_1.xml",
+ )
+ mock_get.side_effect = [eval_response, user_response]
+
+ run_id = 12
+ evaluations = openml.evaluations.list_evaluations(
+ "predictive_accuracy",
+ size=2,
+ runs=[run_id],
+ )
+
+ assert len(evaluations) == 1
+ for rid in evaluations:
+ assert evaluations[rid].run_id == run_id
+ assert evaluations[rid].value is not None
+ assert evaluations[rid].values is None
+
+ # Verify GET #1 URL contains the evaluation list + run filter
+ first_call_url = mock_get.call_args_list[0].args[0]
+ assert "evaluation/list" in first_call_url
+ assert "run/12" in first_call_url