diff --git a/google/genai/tests/tunings/test_validate_reinforcement_tuning_reward.py b/google/genai/tests/tunings/test_validate_reinforcement_tuning_reward.py new file mode 100644 index 000000000..b9ed14c8e --- /dev/null +++ b/google/genai/tests/tunings/test_validate_reinforcement_tuning_reward.py @@ -0,0 +1,115 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +"""Tests for tunings.validate_reinforcement_tuning_reward().""" + +from ... import types as genai_types +from .. import pytest_helper + +_SAMPLE_RESPONSE = genai_types.Content( + role="model", + parts=[genai_types.Part(text="Paris")], +) + +_EXAMPLE = genai_types.ReinforcementTuningExample( + contents=[ + genai_types.Content( + role="user", + parts=[genai_types.Part(text="What is the capital of France?")], + ) + ], + references={"answer": "Paris"}, +) + + +test_table: list[pytest_helper.TestTableItem] = [ + pytest_helper.TestTableItem( + name="test_validate_single_reward_autorater", + parameters=genai_types.ValidateReinforcementTuningRewardParameters( + parent="projects/801452371447/locations/us-central1", + sample_response=_SAMPLE_RESPONSE, + example=_EXAMPLE, + single_reward_config=genai_types.SingleReinforcementTuningRewardConfig( + reward_name="autoraterReward", + autorater_scorer=genai_types.ReinforcementTuningAutoraterScorer( + autorater_config=genai_types.AutoraterConfig( + autorater_model="test-model" + ) + ), + ), + ), + exception_if_mldev=( + "only supported in Gemini Enterprise Agent Platform mode" + ), + ), + pytest_helper.TestTableItem( + name="test_validate_single_reward_string_match", + parameters=genai_types.ValidateReinforcementTuningRewardParameters( + parent="projects/801452371447/locations/us-central1", + sample_response=_SAMPLE_RESPONSE, + example=_EXAMPLE, + single_reward_config=genai_types.SingleReinforcementTuningRewardConfig( + reward_name="stringMatchReward", + string_match_reward_scorer=genai_types.ReinforcementTuningStringMatchRewardScorer( + correct_answer_reward=1.0, + wrong_answer_reward=-1.0, + string_match_expression=genai_types.ReinforcementTuningStringMatchRewardScorerStringMatchExpression( + match_operation="EXACT_MATCH", + expression="{{references.answer}}", + ), + ), + ), + ), + exception_if_mldev=( + "only supported in Gemini Enterprise Agent Platform mode" + ), + ), + pytest_helper.TestTableItem( + name="test_validate_composite_reward", + parameters=genai_types.ValidateReinforcementTuningRewardParameters( + parent="projects/801452371447/locations/us-central1", + sample_response=_SAMPLE_RESPONSE, + example=_EXAMPLE, + composite_reward_config=genai_types.CompositeReinforcementTuningRewardConfig( + weighted_reward_configs=[ + genai_types.CompositeReinforcementTuningRewardConfigWeightedRewardConfig( + weight=1.0, + reward_config=genai_types.SingleReinforcementTuningRewardConfig( + reward_name="autoraterReward", + autorater_scorer=genai_types.ReinforcementTuningAutoraterScorer( + autorater_config=genai_types.AutoraterConfig( + autorater_model="test-model" + ) + ), + ), + ), + ], + ), + ), + exception_if_mldev=( + "only supported in Gemini Enterprise Agent Platform mode" + ), + ), +] + +pytestmark = pytest_helper.setup( + file=__file__, + globals_for_file=globals(), + test_method="tunings.validate_reinforcement_tuning_reward", + test_table=test_table, +) + +pytest_plugins = ("pytest_asyncio",) diff --git a/google/genai/tunings.py b/google/genai/tunings.py index fd4efe4da..0fd11de25 100644 --- a/google/genai/tunings.py +++ b/google/genai/tunings.py @@ -137,6 +137,27 @@ def _CancelTuningJobResponse_from_vertex( return to_object +def _CodeExecutionResult_to_vertex( + from_object: Union[dict[str, Any], object], + parent_object: Optional[dict[str, Any]] = None, + root_object: Optional[Union[dict[str, Any], object]] = None, +) -> dict[str, Any]: + to_object: dict[str, Any] = {} + if getv(from_object, ['outcome']) is not None: + setv(to_object, ['outcome'], getv(from_object, ['outcome'])) + + if getv(from_object, ['output']) is not None: + setv(to_object, ['output'], getv(from_object, ['output'])) + + if getv(from_object, ['id']) is not None: + raise ValueError( + 'id parameter is only supported in Gemini Developer API mode, not in' + ' Gemini Enterprise Agent Platform mode.' + ) + + return to_object + + def _CompositeReinforcementTuningRewardConfigWeightedRewardConfig_to_vertex( from_object: Union[dict[str, Any], object], parent_object: Optional[dict[str, Any]] = None, @@ -179,6 +200,28 @@ def _CompositeReinforcementTuningRewardConfig_to_vertex( return to_object +def _Content_to_vertex( + from_object: Union[dict[str, Any], object], + parent_object: Optional[dict[str, Any]] = None, + root_object: Optional[Union[dict[str, Any], object]] = None, +) -> dict[str, Any]: + to_object: dict[str, Any] = {} + if getv(from_object, ['parts']) is not None: + setv( + to_object, + ['parts'], + [ + _Part_to_vertex(item, to_object, root_object) + for item in getv(from_object, ['parts']) + ], + ) + + if getv(from_object, ['role']) is not None: + setv(to_object, ['role'], getv(from_object, ['role'])) + + return to_object + + def _CreateTuningJobConfig_to_mldev( from_object: Union[dict[str, Any], object], parent_object: Optional[dict[str, Any]] = None, @@ -996,6 +1039,27 @@ def _EvaluationConfig_to_vertex( return to_object +def _ExecutableCode_to_vertex( + from_object: Union[dict[str, Any], object], + parent_object: Optional[dict[str, Any]] = None, + root_object: Optional[Union[dict[str, Any], object]] = None, +) -> dict[str, Any]: + to_object: dict[str, Any] = {} + if getv(from_object, ['code']) is not None: + setv(to_object, ['code'], getv(from_object, ['code'])) + + if getv(from_object, ['language']) is not None: + setv(to_object, ['language'], getv(from_object, ['language'])) + + if getv(from_object, ['id']) is not None: + raise ValueError( + 'id parameter is only supported in Gemini Developer API mode, not in' + ' Gemini Enterprise Agent Platform mode.' + ) + + return to_object + + def _GenerationConfig_from_vertex( from_object: Union[dict[str, Any], object], parent_object: Optional[dict[str, Any]] = None, @@ -1335,6 +1399,88 @@ def _MultiSpeakerVoiceConfig_to_vertex( return to_object +def _Part_to_vertex( + from_object: Union[dict[str, Any], object], + parent_object: Optional[dict[str, Any]] = None, + root_object: Optional[Union[dict[str, Any], object]] = None, +) -> dict[str, Any]: + to_object: dict[str, Any] = {} + if getv(from_object, ['media_resolution']) is not None: + setv( + to_object, ['mediaResolution'], getv(from_object, ['media_resolution']) + ) + + if getv(from_object, ['code_execution_result']) is not None: + setv( + to_object, + ['codeExecutionResult'], + _CodeExecutionResult_to_vertex( + getv(from_object, ['code_execution_result']), to_object, root_object + ), + ) + + if getv(from_object, ['executable_code']) is not None: + setv( + to_object, + ['executableCode'], + _ExecutableCode_to_vertex( + getv(from_object, ['executable_code']), to_object, root_object + ), + ) + + if getv(from_object, ['file_data']) is not None: + setv(to_object, ['fileData'], getv(from_object, ['file_data'])) + + if getv(from_object, ['function_call']) is not None: + setv(to_object, ['functionCall'], getv(from_object, ['function_call'])) + + if getv(from_object, ['function_response']) is not None: + setv( + to_object, + ['functionResponse'], + getv(from_object, ['function_response']), + ) + + if getv(from_object, ['inline_data']) is not None: + setv(to_object, ['inlineData'], getv(from_object, ['inline_data'])) + + if getv(from_object, ['text']) is not None: + setv(to_object, ['text'], getv(from_object, ['text'])) + + if getv(from_object, ['thought']) is not None: + setv(to_object, ['thought'], getv(from_object, ['thought'])) + + if getv(from_object, ['thought_signature']) is not None: + setv( + to_object, + ['thoughtSignature'], + getv(from_object, ['thought_signature']), + ) + + if getv(from_object, ['video_metadata']) is not None: + setv(to_object, ['videoMetadata'], getv(from_object, ['video_metadata'])) + + if getv(from_object, ['tool_call']) is not None: + raise ValueError( + 'tool_call parameter is only supported in Gemini Developer API mode,' + ' not in Gemini Enterprise Agent Platform mode.' + ) + + if getv(from_object, ['tool_response']) is not None: + raise ValueError( + 'tool_response parameter is only supported in Gemini Developer API' + ' mode, not in Gemini Enterprise Agent Platform mode.' + ) + + if getv(from_object, ['part_metadata']) is not None: + raise ValueError( + 'part_metadata parameter is only supported in Gemini Developer API' + ' mode, not in Gemini Enterprise Agent Platform mode.' + ) + + return to_object + + def _ReinforcementTuningAutoraterScorer_to_vertex( from_object: Union[dict[str, Any], object], parent_object: Optional[dict[str, Any]] = None, @@ -1350,6 +1496,63 @@ def _ReinforcementTuningAutoraterScorer_to_vertex( ), ) + if getv(from_object, ['autorater_prompt']) is not None: + setv( + to_object, ['autoraterPrompt'], getv(from_object, ['autorater_prompt']) + ) + + if getv(from_object, ['autorater_response_parse_config']) is not None: + setv( + to_object, + ['autoraterResponseParseConfig'], + getv(from_object, ['autorater_response_parse_config']), + ) + + if getv(from_object, ['parsed_response_conversion_scorer']) is not None: + setv( + to_object, + ['parsedResponseConversionScorer'], + getv(from_object, ['parsed_response_conversion_scorer']), + ) + + if getv(from_object, ['exact_match_scorer']) is not None: + setv( + to_object, + ['exactMatchScorer'], + getv(from_object, ['exact_match_scorer']), + ) + + return to_object + + +def _ReinforcementTuningExample_to_vertex( + from_object: Union[dict[str, Any], object], + parent_object: Optional[dict[str, Any]] = None, + root_object: Optional[Union[dict[str, Any], object]] = None, +) -> dict[str, Any]: + to_object: dict[str, Any] = {} + if getv(from_object, ['contents']) is not None: + setv( + to_object, + ['contents'], + [ + _Content_to_vertex(item, to_object, root_object) + for item in getv(from_object, ['contents']) + ], + ) + + if getv(from_object, ['references']) is not None: + setv(to_object, ['references'], getv(from_object, ['references'])) + + if getv(from_object, ['system_instruction']) is not None: + setv( + to_object, + ['systemInstruction'], + _Content_to_vertex( + getv(from_object, ['system_instruction']), to_object, root_object + ), + ) + return to_object @@ -1399,6 +1602,37 @@ def _SingleReinforcementTuningRewardConfig_to_vertex( ), ) + if getv(from_object, ['reward_name']) is not None: + setv(to_object, ['rewardName'], getv(from_object, ['reward_name'])) + + if getv(from_object, ['parse_response_config']) is not None: + setv( + to_object, + ['parseResponseConfig'], + getv(from_object, ['parse_response_config']), + ) + + if getv(from_object, ['code_execution_reward_scorer']) is not None: + setv( + to_object, + ['codeExecutionRewardScorer'], + getv(from_object, ['code_execution_reward_scorer']), + ) + + if getv(from_object, ['string_match_reward_scorer']) is not None: + setv( + to_object, + ['stringMatchRewardScorer'], + getv(from_object, ['string_match_reward_scorer']), + ) + + if getv(from_object, ['cloud_run_reward_scorer']) is not None: + setv( + to_object, + ['cloudRunRewardScorer'], + getv(from_object, ['cloud_run_reward_scorer']), + ) + return to_object @@ -1852,6 +2086,83 @@ def _TuningValidationDataset_to_vertex( return to_object +def _ValidateReinforcementTuningRewardParametersPrivate_to_vertex( + from_object: Union[dict[str, Any], object], + parent_object: Optional[dict[str, Any]] = None, + root_object: Optional[Union[dict[str, Any], object]] = None, +) -> dict[str, Any]: + to_object: dict[str, Any] = {} + if getv(from_object, ['parent']) is not None: + setv(to_object, ['_url', 'parent'], getv(from_object, ['parent'])) + + if getv(from_object, ['sample_response']) is not None: + setv( + to_object, + ['sampleResponse'], + _Content_to_vertex( + getv(from_object, ['sample_response']), to_object, root_object + ), + ) + + if getv(from_object, ['example']) is not None: + setv( + to_object, + ['example'], + _ReinforcementTuningExample_to_vertex( + getv(from_object, ['example']), to_object, root_object + ), + ) + + if getv(from_object, ['single_reward_config']) is not None: + setv( + to_object, + ['singleRewardConfig'], + _SingleReinforcementTuningRewardConfig_to_vertex( + getv(from_object, ['single_reward_config']), to_object, root_object + ), + ) + + if getv(from_object, ['composite_reward_config']) is not None: + setv( + to_object, + ['compositeRewardConfig'], + _CompositeReinforcementTuningRewardConfig_to_vertex( + getv(from_object, ['composite_reward_config']), + to_object, + root_object, + ), + ) + + return to_object + + +def _ValidateReinforcementTuningRewardResponse_from_vertex( + from_object: Union[dict[str, Any], object], + parent_object: Optional[dict[str, Any]] = None, + root_object: Optional[Union[dict[str, Any], object]] = None, +) -> dict[str, Any]: + to_object: dict[str, Any] = {} + if getv(from_object, ['sdkHttpResponse']) is not None: + setv( + to_object, ['sdk_http_response'], getv(from_object, ['sdkHttpResponse']) + ) + + if getv(from_object, ['overallReward']) is not None: + setv(to_object, ['overall_reward'], getv(from_object, ['overallReward'])) + + if getv(from_object, ['error']) is not None: + setv(to_object, ['error'], getv(from_object, ['error'])) + + if getv(from_object, ['rewardInfoDetails']) is not None: + setv( + to_object, + ['reward_info_details'], + {k: v for k, v in getv(from_object, ['rewardInfoDetails']).items()}, + ) + + return to_object + + def _VoiceConfig_to_vertex( from_object: Union[dict[str, Any], object], parent_object: Optional[dict[str, Any]] = None, @@ -2326,6 +2637,129 @@ def _tune_mldev( self._api_client._verify_response(return_value) return return_value + def validate_reinforcement_tuning_reward( + self, + *, + parent: str, + sample_response: types.ContentOrDict, + example: types.ReinforcementTuningExampleOrDict, + single_reward_config: Optional[ + types.SingleReinforcementTuningRewardConfigOrDict + ] = None, + composite_reward_config: Optional[ + types.CompositeReinforcementTuningRewardConfigOrDict + ] = None, + config: Optional[ + types.ValidateReinforcementTuningRewardConfigOrDict + ] = None, + ) -> types.ValidateReinforcementTuningRewardResponse: + """Validates a reinforcement tuning reward configuration. + + Allows users to validate a reward configuration against a sample response + and example before creating a reinforcement tuning job, so that they can + iterate on the reward configuration without having to create a tuning job + each time. + + Args: + parent: The resource name of the Location to validate the reward in, e.g. + `projects/{project}/locations/{location}`. + sample_response: The sample response for validating the reward + configuration. + example: The example to validate the reward configuration. + single_reward_config: Single reward function configuration for + reinforcement tuning. Mutually exclusive with composite_reward_config. + composite_reward_config: Composite reward function configuration for + reinforcement tuning. Mutually exclusive with single_reward_config. + config: Optional parameters for the request. + + Returns: + A ValidateReinforcementTuningRewardResponse with the computed reward(s). + """ + + parameter_model = types._ValidateReinforcementTuningRewardParametersPrivate( + parent=parent, + sample_response=sample_response, + example=example, + single_reward_config=single_reward_config, + composite_reward_config=composite_reward_config, + config=config, + ) + + request_url_dict: Optional[dict[str, str]] + if not self._api_client.vertexai: + raise ValueError( + 'This method is only supported in Gemini Enterprise Agent Platform' + ' mode, not in Gemini Developer API mode.' + ) + else: + request_dict = ( + _ValidateReinforcementTuningRewardParametersPrivate_to_vertex( + parameter_model, None, parameter_model + ) + ) + request_url_dict = request_dict.get('_url') + if request_url_dict: + path = ( + '{parent}/tuningJobs:validateReinforcementTuningReward'.format_map( + request_url_dict + ) + ) + else: + path = '{parent}/tuningJobs:validateReinforcementTuningReward' + + query_params = request_dict.get('_query') + if query_params: + path = f'{path}?{urlencode(query_params)}' + # TODO: remove the hack that pops config. + request_dict.pop('config', None) + + http_options: Optional[types.HttpOptions] = None + if ( + parameter_model.config is not None + and parameter_model.config.http_options is not None + ): + http_options = parameter_model.config.http_options + + request_dict = _common.convert_to_dict(request_dict) + request_dict = _common.encode_unserializable_types(request_dict) + + response = self._api_client.request( + 'post', path, request_dict, http_options + ) + + response_dict = {} if not response.body else json.loads(response.body) + + if self._api_client.vertexai: + response_dict = _ValidateReinforcementTuningRewardResponse_from_vertex( + response_dict, None, parameter_model + ) + + return_value = ( + types.ValidateReinforcementTuningRewardResponse._from_response( + response=response_dict, + kwargs={ + 'config': { + 'response_schema': getattr( + parameter_model.config, 'response_schema', None + ), + 'response_json_schema': getattr( + parameter_model.config, 'response_json_schema', None + ), + 'include_all_fields': getattr( + parameter_model.config, 'include_all_fields', None + ), + } + } + if getattr(parameter_model, 'config', None) + else {}, + ) + ) + return_value.sdk_http_response = types.HttpResponse( + headers=response.headers + ) + self._api_client._verify_response(return_value) + return return_value + def get( self, *, @@ -2906,6 +3340,129 @@ async def _tune_mldev( self._api_client._verify_response(return_value) return return_value + async def validate_reinforcement_tuning_reward( + self, + *, + parent: str, + sample_response: types.ContentOrDict, + example: types.ReinforcementTuningExampleOrDict, + single_reward_config: Optional[ + types.SingleReinforcementTuningRewardConfigOrDict + ] = None, + composite_reward_config: Optional[ + types.CompositeReinforcementTuningRewardConfigOrDict + ] = None, + config: Optional[ + types.ValidateReinforcementTuningRewardConfigOrDict + ] = None, + ) -> types.ValidateReinforcementTuningRewardResponse: + """Validates a reinforcement tuning reward configuration. + + Allows users to validate a reward configuration against a sample response + and example before creating a reinforcement tuning job, so that they can + iterate on the reward configuration without having to create a tuning job + each time. + + Args: + parent: The resource name of the Location to validate the reward in, e.g. + `projects/{project}/locations/{location}`. + sample_response: The sample response for validating the reward + configuration. + example: The example to validate the reward configuration. + single_reward_config: Single reward function configuration for + reinforcement tuning. Mutually exclusive with composite_reward_config. + composite_reward_config: Composite reward function configuration for + reinforcement tuning. Mutually exclusive with single_reward_config. + config: Optional parameters for the request. + + Returns: + A ValidateReinforcementTuningRewardResponse with the computed reward(s). + """ + + parameter_model = types._ValidateReinforcementTuningRewardParametersPrivate( + parent=parent, + sample_response=sample_response, + example=example, + single_reward_config=single_reward_config, + composite_reward_config=composite_reward_config, + config=config, + ) + + request_url_dict: Optional[dict[str, str]] + if not self._api_client.vertexai: + raise ValueError( + 'This method is only supported in Gemini Enterprise Agent Platform' + ' mode, not in Gemini Developer API mode.' + ) + else: + request_dict = ( + _ValidateReinforcementTuningRewardParametersPrivate_to_vertex( + parameter_model, None, parameter_model + ) + ) + request_url_dict = request_dict.get('_url') + if request_url_dict: + path = ( + '{parent}/tuningJobs:validateReinforcementTuningReward'.format_map( + request_url_dict + ) + ) + else: + path = '{parent}/tuningJobs:validateReinforcementTuningReward' + + query_params = request_dict.get('_query') + if query_params: + path = f'{path}?{urlencode(query_params)}' + # TODO: remove the hack that pops config. + request_dict.pop('config', None) + + http_options: Optional[types.HttpOptions] = None + if ( + parameter_model.config is not None + and parameter_model.config.http_options is not None + ): + http_options = parameter_model.config.http_options + + request_dict = _common.convert_to_dict(request_dict) + request_dict = _common.encode_unserializable_types(request_dict) + + response = await self._api_client.async_request( + 'post', path, request_dict, http_options + ) + + response_dict = {} if not response.body else json.loads(response.body) + + if self._api_client.vertexai: + response_dict = _ValidateReinforcementTuningRewardResponse_from_vertex( + response_dict, None, parameter_model + ) + + return_value = ( + types.ValidateReinforcementTuningRewardResponse._from_response( + response=response_dict, + kwargs={ + 'config': { + 'response_schema': getattr( + parameter_model.config, 'response_schema', None + ), + 'response_json_schema': getattr( + parameter_model.config, 'response_json_schema', None + ), + 'include_all_fields': getattr( + parameter_model.config, 'include_all_fields', None + ), + } + } + if getattr(parameter_model, 'config', None) + else {}, + ) + ) + return_value.sdk_http_response = types.HttpResponse( + headers=response.headers + ) + self._api_client._verify_response(return_value) + return return_value + async def get( self, *, diff --git a/google/genai/types.py b/google/genai/types.py index 708519471..bc0c129c4 100644 --- a/google/genai/types.py +++ b/google/genai/types.py @@ -1063,6 +1063,30 @@ class TuningMethod(_common.CaseInSensitiveEnum): """Reinforcement tuning.""" +class ResponseParseType(_common.CaseInSensitiveEnum): + """Defines how to parse sample response.""" + + RESPONSE_PARSE_TYPE_UNSPECIFIED = 'RESPONSE_PARSE_TYPE_UNSPECIFIED' + """Default value. This value is unused.""" + IDENTITY = 'IDENTITY' + """Use the sample response as is.""" + REGEX_EXTRACT = 'REGEX_EXTRACT' + """Use regex to extract the important part of sample response.""" + + +class MatchOperation(_common.CaseInSensitiveEnum): + """Match operation to use for evaluation.""" + + MATCH_OPERATION_UNSPECIFIED = 'MATCH_OPERATION_UNSPECIFIED' + """Default value. This value is unused.""" + REGEX_CONTAINS = 'REGEX_CONTAINS' + """Equivalent to GoogleSQL `REGEX_CONTAINS(target, expression)`.""" + PARTIAL_MATCH = 'PARTIAL_MATCH' + """`expression` is a substring of target.""" + EXACT_MATCH = 'EXACT_MATCH' + """`expression` is an exact match of target.""" + + class FileState(_common.CaseInSensitiveEnum): """State for the lifecycle of a File.""" @@ -14262,12 +14286,125 @@ class TuningValidationDatasetDict(TypedDict, total=False): ] +class ReinforcementTuningParseResponseConfig(_common.BaseModel): + """Defines how to parse sample response for reinforcement tuning.""" + + parse_type: Optional[ResponseParseType] = Field( + default=None, description="""Defines how to parse sample response.""" + ) + regex_extract_expression: Optional[str] = Field( + default=None, + description="""Defines the regex to extract the important part of sample response. This field is only used when `parse_type` is `REGEX_EXTRACT`.""", + ) + + +class ReinforcementTuningParseResponseConfigDict(TypedDict, total=False): + """Defines how to parse sample response for reinforcement tuning.""" + + parse_type: Optional[ResponseParseType] + """Defines how to parse sample response.""" + + regex_extract_expression: Optional[str] + """Defines the regex to extract the important part of sample response. This field is only used when `parse_type` is `REGEX_EXTRACT`.""" + + +ReinforcementTuningParseResponseConfigOrDict = Union[ + ReinforcementTuningParseResponseConfig, + ReinforcementTuningParseResponseConfigDict, +] + + +class ReinforcementTuningAutoraterScorerParsedResponseConversionScorer( + _common.BaseModel +): + """Scores responses by directly converting parsed autorater response to float reward. + + Important: reward is clipped to be within [-1, 1]. + """ + + pass + + +class ReinforcementTuningAutoraterScorerParsedResponseConversionScorerDict( + TypedDict, total=False +): + """Scores responses by directly converting parsed autorater response to float reward. + + Important: reward is clipped to be within [-1, 1]. + """ + + pass + + +ReinforcementTuningAutoraterScorerParsedResponseConversionScorerOrDict = Union[ + ReinforcementTuningAutoraterScorerParsedResponseConversionScorer, + ReinforcementTuningAutoraterScorerParsedResponseConversionScorerDict, +] + + +class ReinforcementTuningAutoraterScorerExactMatchScorer(_common.BaseModel): + """Scores autorater responses by using exact string match reward scorer.""" + + correct_answer_reward: Optional[float] = Field( + default=None, + description="""Assigns this reward score if parsed response string equals the expression.""", + ) + wrong_answer_reward: Optional[float] = Field( + default=None, + description="""Assigns this reward score if parsed reward value does not equal the expression.""", + ) + expression: Optional[str] = Field( + default=None, + description="""The string expression to match against. Supports substitution in the format of {{references.reference}} before matching. No regex support.""", + ) + + +class ReinforcementTuningAutoraterScorerExactMatchScorerDict( + TypedDict, total=False +): + """Scores autorater responses by using exact string match reward scorer.""" + + correct_answer_reward: Optional[float] + """Assigns this reward score if parsed response string equals the expression.""" + + wrong_answer_reward: Optional[float] + """Assigns this reward score if parsed reward value does not equal the expression.""" + + expression: Optional[str] + """The string expression to match against. Supports substitution in the format of {{references.reference}} before matching. No regex support.""" + + +ReinforcementTuningAutoraterScorerExactMatchScorerOrDict = Union[ + ReinforcementTuningAutoraterScorerExactMatchScorer, + ReinforcementTuningAutoraterScorerExactMatchScorerDict, +] + + class ReinforcementTuningAutoraterScorer(_common.BaseModel): """Reinforcement tuning autorater scorer.""" autorater_config: Optional[AutoraterConfig] = Field( default=None, description="""Autorater config for evaluation.""" ) + autorater_prompt: Optional[str] = Field( + default=None, + description="""Allows substituting {{prompt}}, {{response}}, {{system_instruction}} and {{references.reference}} into autorater prompt.""", + ) + autorater_response_parse_config: Optional[ + ReinforcementTuningParseResponseConfig + ] = Field(default=None, description="""Parses autorater returned response.""") + parsed_response_conversion_scorer: Optional[ + ReinforcementTuningAutoraterScorerParsedResponseConversionScorer + ] = Field( + default=None, + description="""Scores autorater responses by directly converting parsed autorater response to float reward.""", + ) + exact_match_scorer: Optional[ + ReinforcementTuningAutoraterScorerExactMatchScorer + ] = Field( + default=None, + description="""Scores autorater responses by using exact string match reward scorer.""", + ) class ReinforcementTuningAutoraterScorerDict(TypedDict, total=False): @@ -14276,17 +14413,227 @@ class ReinforcementTuningAutoraterScorerDict(TypedDict, total=False): autorater_config: Optional[AutoraterConfigDict] """Autorater config for evaluation.""" + autorater_prompt: Optional[str] + """Allows substituting {{prompt}}, {{response}}, {{system_instruction}} and {{references.reference}} into autorater prompt.""" + + autorater_response_parse_config: Optional[ + ReinforcementTuningParseResponseConfigDict + ] + """Parses autorater returned response.""" + + parsed_response_conversion_scorer: Optional[ + ReinforcementTuningAutoraterScorerParsedResponseConversionScorerDict + ] + """Scores autorater responses by directly converting parsed autorater response to float reward.""" + + exact_match_scorer: Optional[ + ReinforcementTuningAutoraterScorerExactMatchScorerDict + ] + """Scores autorater responses by using exact string match reward scorer.""" + ReinforcementTuningAutoraterScorerOrDict = Union[ ReinforcementTuningAutoraterScorer, ReinforcementTuningAutoraterScorerDict ] +class ReinforcementTuningCodeExecutionRewardScorer(_common.BaseModel): + """Scores parsed responses for code execution use cases.""" + + python_code_snippet: Optional[str] = Field( + default=None, + description="""Example python code snippet which assigns reward of 1 to answer matching user provided reference answer in per prompt references map.""", + ) + + +class ReinforcementTuningCodeExecutionRewardScorerDict(TypedDict, total=False): + """Scores parsed responses for code execution use cases.""" + + python_code_snippet: Optional[str] + """Example python code snippet which assigns reward of 1 to answer matching user provided reference answer in per prompt references map.""" + + +ReinforcementTuningCodeExecutionRewardScorerOrDict = Union[ + ReinforcementTuningCodeExecutionRewardScorer, + ReinforcementTuningCodeExecutionRewardScorerDict, +] + + +class ReinforcementTuningStringMatchRewardScorerStringMatchExpression( + _common.BaseModel +): + """Evaluates parsed response using match type against expression.""" + + match_operation: Optional[MatchOperation] = Field( + default=None, description="""Match operation to use for evaluation.""" + ) + expression: Optional[str] = Field( + default=None, + description="""String or regular expression to match against. Customer can also provide a references map of {key_name: value} whose value will be substituted into expression {{references.key_name}}.""", + ) + + +class ReinforcementTuningStringMatchRewardScorerStringMatchExpressionDict( + TypedDict, total=False +): + """Evaluates parsed response using match type against expression.""" + + match_operation: Optional[MatchOperation] + """Match operation to use for evaluation.""" + + expression: Optional[str] + """String or regular expression to match against. Customer can also provide a references map of {key_name: value} whose value will be substituted into expression {{references.key_name}}.""" + + +ReinforcementTuningStringMatchRewardScorerStringMatchExpressionOrDict = Union[ + ReinforcementTuningStringMatchRewardScorerStringMatchExpression, + ReinforcementTuningStringMatchRewardScorerStringMatchExpressionDict, +] + + +class ReinforcementTuningStringMatchRewardScorerJsonMatchExpression( + _common.BaseModel +): + """Converts parsed responses to JSON format, finds the first-level matching key, then performs StringMatchExpression on the value.""" + + key_name: Optional[str] = Field( + default=None, + description="""Json key name to find the value to match against.""", + ) + value_string_match_expression: Optional[ + ReinforcementTuningStringMatchRewardScorerStringMatchExpression + ] = Field( + default=None, + description="""String match expression to match against the value of json key.""", + ) + + +class ReinforcementTuningStringMatchRewardScorerJsonMatchExpressionDict( + TypedDict, total=False +): + """Converts parsed responses to JSON format, finds the first-level matching key, then performs StringMatchExpression on the value.""" + + key_name: Optional[str] + """Json key name to find the value to match against.""" + + value_string_match_expression: Optional[ + ReinforcementTuningStringMatchRewardScorerStringMatchExpressionDict + ] + """String match expression to match against the value of json key.""" + + +ReinforcementTuningStringMatchRewardScorerJsonMatchExpressionOrDict = Union[ + ReinforcementTuningStringMatchRewardScorerJsonMatchExpression, + ReinforcementTuningStringMatchRewardScorerJsonMatchExpressionDict, +] + + +class ReinforcementTuningStringMatchRewardScorer(_common.BaseModel): + """Scores parsed responses for string matching use cases.""" + + wrong_answer_reward: Optional[float] = Field( + default=None, + description="""Wrong answer reward is returned if evaluator evaluates to `false`. All wrong answers get the same reward.""", + ) + correct_answer_reward: Optional[float] = Field( + default=None, + description="""Correct answer reward is returned if evaluator evaluates to `true`. All correct answers get the same reward.""", + ) + string_match_expression: Optional[ + ReinforcementTuningStringMatchRewardScorerStringMatchExpression + ] = Field( + default=None, + description="""Uses string match expression to evaluate parsed response.""", + ) + json_match_expression: Optional[ + ReinforcementTuningStringMatchRewardScorerJsonMatchExpression + ] = Field( + default=None, + description="""Uses json match expression to evaluate parsed response.""", + ) + + +class ReinforcementTuningStringMatchRewardScorerDict(TypedDict, total=False): + """Scores parsed responses for string matching use cases.""" + + wrong_answer_reward: Optional[float] + """Wrong answer reward is returned if evaluator evaluates to `false`. All wrong answers get the same reward.""" + + correct_answer_reward: Optional[float] + """Correct answer reward is returned if evaluator evaluates to `true`. All correct answers get the same reward.""" + + string_match_expression: Optional[ + ReinforcementTuningStringMatchRewardScorerStringMatchExpressionDict + ] + """Uses string match expression to evaluate parsed response.""" + + json_match_expression: Optional[ + ReinforcementTuningStringMatchRewardScorerJsonMatchExpressionDict + ] + """Uses json match expression to evaluate parsed response.""" + + +ReinforcementTuningStringMatchRewardScorerOrDict = Union[ + ReinforcementTuningStringMatchRewardScorer, + ReinforcementTuningStringMatchRewardScorerDict, +] + + +class ReinforcementTuningCloudRunRewardScorer(_common.BaseModel): + """Scores parsed responses by calling a Cloud Run service.""" + + cloud_run_uri: Optional[str] = Field( + default=None, + description="""URI of the Cloud Run service that will be used to compute the reward. The Vertex AI Secure Fine Tuning Service Agent (`service-@gcp-sa-vertex-tune.iam.gserviceaccount.com`) must be granted the permission (e.g. by granting `roles/run.invoker` in IAM) to invoke the Cloud Run service.""", + ) + + +class ReinforcementTuningCloudRunRewardScorerDict(TypedDict, total=False): + """Scores parsed responses by calling a Cloud Run service.""" + + cloud_run_uri: Optional[str] + """URI of the Cloud Run service that will be used to compute the reward. The Vertex AI Secure Fine Tuning Service Agent (`service-@gcp-sa-vertex-tune.iam.gserviceaccount.com`) must be granted the permission (e.g. by granting `roles/run.invoker` in IAM) to invoke the Cloud Run service.""" + + +ReinforcementTuningCloudRunRewardScorerOrDict = Union[ + ReinforcementTuningCloudRunRewardScorer, + ReinforcementTuningCloudRunRewardScorerDict, +] + + class SingleReinforcementTuningRewardConfig(_common.BaseModel): """Single reinforcement tuning reward config.""" autorater_scorer: Optional[ReinforcementTuningAutoraterScorer] = Field( - default=None, description="""""" + default=None, + description="""Scores parsed responses for autorater use cases by using a model to compute the reward.""", + ) + reward_name: Optional[str] = Field( + default=None, + description="""A unique reward name used to identify each single reinforcement tuning reward.""", + ) + parse_response_config: Optional[ReinforcementTuningParseResponseConfig] = ( + Field( + default=None, description="""Defines how to parse sample response.""" + ) + ) + code_execution_reward_scorer: Optional[ + ReinforcementTuningCodeExecutionRewardScorer + ] = Field( + default=None, + description="""Scores parsed responses for code execution use cases.""", + ) + string_match_reward_scorer: Optional[ + ReinforcementTuningStringMatchRewardScorer + ] = Field( + default=None, + description="""Scores parsed responses for simple string matching use cases against reference answer without writing python code.""", + ) + cloud_run_reward_scorer: Optional[ReinforcementTuningCloudRunRewardScorer] = ( + Field( + default=None, + description="""Scores parsed responses by calling a Cloud Run service.""", + ) ) @@ -14294,7 +14641,26 @@ class SingleReinforcementTuningRewardConfigDict(TypedDict, total=False): """Single reinforcement tuning reward config.""" autorater_scorer: Optional[ReinforcementTuningAutoraterScorerDict] - """""" + """Scores parsed responses for autorater use cases by using a model to compute the reward.""" + + reward_name: Optional[str] + """A unique reward name used to identify each single reinforcement tuning reward.""" + + parse_response_config: Optional[ReinforcementTuningParseResponseConfigDict] + """Defines how to parse sample response.""" + + code_execution_reward_scorer: Optional[ + ReinforcementTuningCodeExecutionRewardScorerDict + ] + """Scores parsed responses for code execution use cases.""" + + string_match_reward_scorer: Optional[ + ReinforcementTuningStringMatchRewardScorerDict + ] + """Scores parsed responses for simple string matching use cases against reference answer without writing python code.""" + + cloud_run_reward_scorer: Optional[ReinforcementTuningCloudRunRewardScorerDict] + """Scores parsed responses by calling a Cloud Run service.""" SingleReinforcementTuningRewardConfigOrDict = Union[ @@ -14653,6 +15019,211 @@ class TuningOperationDict(TypedDict, total=False): TuningOperationOrDict = Union[TuningOperation, TuningOperationDict] +class ReinforcementTuningExample(_common.BaseModel): + """User-facing format for Gemini Reinforcement Tuning examples on Vertex.""" + + contents: Optional[list[Content]] = Field( + default=None, + description="""Multi-turn contents that represents the Prompt.""", + ) + references: Optional[dict[str, str]] = Field( + default=None, + description="""References for the given prompt. The key is the name of the reference, and the value is the reference itself.""", + ) + system_instruction: Optional[Content] = Field( + default=None, + description="""Corresponds to `system_instruction` in user-facing GenerateContentRequest.""", + ) + + +class ReinforcementTuningExampleDict(TypedDict, total=False): + """User-facing format for Gemini Reinforcement Tuning examples on Vertex.""" + + contents: Optional[list[ContentDict]] + """Multi-turn contents that represents the Prompt.""" + + references: Optional[dict[str, str]] + """References for the given prompt. The key is the name of the reference, and the value is the reference itself.""" + + system_instruction: Optional[ContentDict] + """Corresponds to `system_instruction` in user-facing GenerateContentRequest.""" + + +ReinforcementTuningExampleOrDict = Union[ + ReinforcementTuningExample, ReinforcementTuningExampleDict +] + + +class ValidateReinforcementTuningRewardConfig(_common.BaseModel): + """Optional parameters for tunings.validate_reinforcement_tuning_reward.""" + + http_options: Optional[HttpOptions] = Field( + default=None, description="""Used to override HTTP request options.""" + ) + + +class ValidateReinforcementTuningRewardConfigDict(TypedDict, total=False): + """Optional parameters for tunings.validate_reinforcement_tuning_reward.""" + + http_options: Optional[HttpOptionsDict] + """Used to override HTTP request options.""" + + +ValidateReinforcementTuningRewardConfigOrDict = Union[ + ValidateReinforcementTuningRewardConfig, + ValidateReinforcementTuningRewardConfigDict, +] + + +class _ValidateReinforcementTuningRewardParametersPrivate(_common.BaseModel): + """Parameters for the validate_reinforcement_tuning_reward method. + + This class is used as the internal parameter type for the method. + The generator emits this as + `_ValidateReinforcementTuningRewardParametersPrivate`. + The public-facing parameter class is + `ValidateReinforcementTuningRewardParameters` + below. + """ + + parent: Optional[str] = Field( + default=None, + description="""Required. The resource name of the Location to validate the reward in, e.g. `projects/{project}/locations/{location}`.""", + ) + sample_response: Optional[Content] = Field( + default=None, + description="""Required. The sample response for validating the reward configuration.""", + ) + example: Optional[ReinforcementTuningExample] = Field( + default=None, + description="""Required. The example to validate the reward configuration.""", + ) + single_reward_config: Optional[SingleReinforcementTuningRewardConfig] = Field( + default=None, + description="""Single reward function configuration for reinforcement tuning. Mutually exclusive with composite_reward_config.""", + ) + composite_reward_config: Optional[ + CompositeReinforcementTuningRewardConfig + ] = Field( + default=None, + description="""Composite reward function configuration for reinforcement tuning. Mutually exclusive with single_reward_config.""", + ) + config: Optional[ValidateReinforcementTuningRewardConfig] = Field( + default=None, description="""Optional parameters for the request.""" + ) + + +class _ValidateReinforcementTuningRewardParametersPrivateDict( + TypedDict, total=False +): + """Parameters for the validate_reinforcement_tuning_reward method. + + This class is used as the internal parameter type for the method. + The generator emits this as + `_ValidateReinforcementTuningRewardParametersPrivate`. + The public-facing parameter class is + `ValidateReinforcementTuningRewardParameters` + below. + """ + + parent: Optional[str] + """Required. The resource name of the Location to validate the reward in, e.g. `projects/{project}/locations/{location}`.""" + + sample_response: Optional[ContentDict] + """Required. The sample response for validating the reward configuration.""" + + example: Optional[ReinforcementTuningExampleDict] + """Required. The example to validate the reward configuration.""" + + single_reward_config: Optional[SingleReinforcementTuningRewardConfigDict] + """Single reward function configuration for reinforcement tuning. Mutually exclusive with composite_reward_config.""" + + composite_reward_config: Optional[ + CompositeReinforcementTuningRewardConfigDict + ] + """Composite reward function configuration for reinforcement tuning. Mutually exclusive with single_reward_config.""" + + config: Optional[ValidateReinforcementTuningRewardConfigDict] + """Optional parameters for the request.""" + + +_ValidateReinforcementTuningRewardParametersPrivateOrDict = Union[ + _ValidateReinforcementTuningRewardParametersPrivate, + _ValidateReinforcementTuningRewardParametersPrivateDict, +] + + +class ReinforcementTuningRewardInfo(_common.BaseModel): + """The reward info for a reward function.""" + + reward: Optional[float] = Field( + default=None, + description="""Output only. The calculated reward for the reward function.""", + ) + user_requested_aux_info: Optional[str] = Field( + default=None, + description="""Output only. The user-requested auxiliary info for the reward function.""", + ) + + +class ReinforcementTuningRewardInfoDict(TypedDict, total=False): + """The reward info for a reward function.""" + + reward: Optional[float] + """Output only. The calculated reward for the reward function.""" + + user_requested_aux_info: Optional[str] + """Output only. The user-requested auxiliary info for the reward function.""" + + +ReinforcementTuningRewardInfoOrDict = Union[ + ReinforcementTuningRewardInfo, ReinforcementTuningRewardInfoDict +] + + +class ValidateReinforcementTuningRewardResponse(_common.BaseModel): + """Response for the validate_reinforcement_tuning_reward method.""" + + sdk_http_response: Optional[HttpResponse] = Field( + default=None, description="""Used to retain the full HTTP response.""" + ) + overall_reward: Optional[float] = Field( + default=None, + description="""Output only. The overall weighted reward. For a `CompositeReinforcementTuningRewardConfig`, this is the weighted average of all rewards. For a `SingleReinforcementTuningRewardConfig`, this will be the value of the single reward.""", + ) + error: Optional[str] = Field( + default=None, + description="""Output only. In case of an error, this field will be populated with a detailed error message to help with debugging.""", + ) + reward_info_details: Optional[dict[str, ReinforcementTuningRewardInfo]] = ( + Field( + default=None, description="""A map from reward name to reward info.""" + ) + ) + + +class ValidateReinforcementTuningRewardResponseDict(TypedDict, total=False): + """Response for the validate_reinforcement_tuning_reward method.""" + + sdk_http_response: Optional[HttpResponseDict] + """Used to retain the full HTTP response.""" + + overall_reward: Optional[float] + """Output only. The overall weighted reward. For a `CompositeReinforcementTuningRewardConfig`, this is the weighted average of all rewards. For a `SingleReinforcementTuningRewardConfig`, this will be the value of the single reward.""" + + error: Optional[str] + """Output only. In case of an error, this field will be populated with a detailed error message to help with debugging.""" + + reward_info_details: Optional[dict[str, ReinforcementTuningRewardInfoDict]] + """A map from reward name to reward info.""" + + +ValidateReinforcementTuningRewardResponseOrDict = Union[ + ValidateReinforcementTuningRewardResponse, + ValidateReinforcementTuningRewardResponseDict, +] + + class CreateCachedContentConfig(_common.BaseModel): """Optional configuration for cached content creation.""" @@ -20888,6 +21459,66 @@ class CreateTuningJobParametersDict(TypedDict, total=False): ] +class ValidateReinforcementTuningRewardParameters(_common.BaseModel): + """Parameters for the validate_reinforcement_tuning_reward method.""" + + parent: Optional[str] = Field( + default=None, + description="""Required. The resource name of the Location to validate the reward in, e.g. `projects/{project}/locations/{location}`.""", + ) + sample_response: Optional[Content] = Field( + default=None, + description="""Required. The sample response for validating the reward configuration.""", + ) + example: Optional[ReinforcementTuningExample] = Field( + default=None, + description="""Required. The example to validate the reward configuration.""", + ) + single_reward_config: Optional[SingleReinforcementTuningRewardConfig] = Field( + default=None, + description="""Single reward function configuration for reinforcement tuning. Mutually exclusive with composite_reward_config.""", + ) + composite_reward_config: Optional[ + CompositeReinforcementTuningRewardConfig + ] = Field( + default=None, + description="""Composite reward function configuration for reinforcement tuning. Mutually exclusive with single_reward_config.""", + ) + config: Optional[ValidateReinforcementTuningRewardConfig] = Field( + default=None, description="""Optional parameters for the request.""" + ) + + +class ValidateReinforcementTuningRewardParametersDict(TypedDict, total=False): + """Parameters for the validate_reinforcement_tuning_reward method.""" + + parent: Optional[str] + """Required. The resource name of the Location to validate the reward in, e.g. `projects/{project}/locations/{location}`.""" + + sample_response: Optional[ContentDict] + """Required. The sample response for validating the reward configuration.""" + + example: Optional[ReinforcementTuningExampleDict] + """Required. The example to validate the reward configuration.""" + + single_reward_config: Optional[SingleReinforcementTuningRewardConfigDict] + """Single reward function configuration for reinforcement tuning. Mutually exclusive with composite_reward_config.""" + + composite_reward_config: Optional[ + CompositeReinforcementTuningRewardConfigDict + ] + """Composite reward function configuration for reinforcement tuning. Mutually exclusive with single_reward_config.""" + + config: Optional[ValidateReinforcementTuningRewardConfigDict] + """Optional parameters for the request.""" + + +ValidateReinforcementTuningRewardParametersOrDict = Union[ + ValidateReinforcementTuningRewardParameters, + ValidateReinforcementTuningRewardParametersDict, +] + + class EmbedContentParameters(_common.BaseModel): """Parameters for the embed_content method."""