Source code for oumi.judges_v2.simple_judge

# Copyright 2025 - Oumi
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Union

from typing_extensions import override

from oumi.core.configs.inference_config import InferenceConfig
from oumi.core.configs.judge_config_v2 import JudgeConfig
from oumi.core.configs.params.judge_params import (
    JudgeOutputType,
    JudgeParams,
    JudgeResponseFormat,
)
from oumi.core.inference import BaseInferenceEngine
from oumi.judges_v2.base_judge import (
    BaseJudge,
    JudgeOutputField,
)

# Expected field/key names in the judge's output.
EXPLANATION_KEY = "explanation"
JUDGMENT_KEY = "judgment"

# Judgment options: describing to the judge how to format its judgment.
JUDGMENT_OPTIONS_BOOL = "Your judgment should be a single word: 'Yes' or 'No'"
JUDGMENT_OPTIONS_INT = "Your judgment should be an integer value"
JUDGMENT_OPTIONS_FLOAT = "Your judgment should be a float value"
JUDGMENT_OPTIONS_ENUM_PREFIX = "Your judgment should be one of the following options: "
JUDGMENT_OPTIONS_TEXT = "Your judgment should be provided in the form of free text"

# Prompt suffix: describing to the judge how to format its response (XML, JSON, or RAW).
XML_SUFFIX = (
    "\n\nProvide your response in XML format only. Include your judgment enclosed "
    "within <{judgment_key}> and </{judgment_key}> tags. {judgment_options}Do not  "
    "include any text outside the XML. Ensure that all tags are properly closed and "
    "that the XML is well-formed."
)
XML_SUFFIX_WITH_EXPLANATION = (
    "\n\nProvide your response in XML format only. Begin with an explanation "
    "justifying your judgment, enclosed within <{explanation_key}> and "
    "</{explanation_key}> tags. Follow this with your judgment, enclosed within "
    "<{judgment_key}> and </{judgment_key}> tags. {judgment_options}Do not include any "
    "text outside the XML. Ensure that all tags are properly closed and that the XML "
    "is well-formed."
)
JSON_SUFFIX = (
    "\n\nProvide your response in JSON format only. Include your judgment as the value "
    "of a single key named '{judgment_key}'. {judgment_options}Do not include any "
    "text outside the JSON. Ensure the JSON is properly formatted and valid."
)
JSON_SUFFIX_WITH_EXPLANATION = (
    "\n\nProvide your response in JSON format only. Begin with an explanation "
    "justifying your judgment, using the key '{explanation_key}'. Then include your "
    "judgment using the key '{judgment_key}'. {judgment_options}Do not include any "
    "text outside the JSON. Ensure the JSON is properly formatted and valid."
)
RAW_SUFFIX_WITH_EXPLANATION = (
    "\n\nExplain your reasoning before providing your judgment."
)


[docs] class SimpleJudge(BaseJudge): """Judge class for evaluating outputs based on a given configuration.""" def __init__( self, judge_config: Union[JudgeConfig, str], ): """Initialize the Judge. Args: judge_config: JudgeConfig object or a path to a judge configuration file. Contains both judge parameters and inference configuration. """ if isinstance(judge_config, str): judge_config = JudgeConfig.from_path(judge_config) self._judge_params = judge_config.judge_params self._judge_params.replace_template_variables() self._inference_config = judge_config.inference_config # Create output fields based on judge configuration output_fields = [] if self._judge_params.include_explanation: output_fields.append(self._create_explanation_output_field()) output_fields.append(self._create_judgment_output_field(self._judge_params)) # Generate an inference engine from inference config inference_engine = self._create_inference_engine(self._inference_config) # Append format suffix to system instruction if it exists system_instruction = self._judge_params.system_instruction if system_instruction: system_instruction = f"{system_instruction}{self._get_format_suffix()}" super().__init__( prompt_template=self._judge_params.prompt_template, system_instruction=system_instruction, example_field_values=self._judge_params.examples, response_format=self._judge_params.response_format, output_fields=output_fields, inference_engine=inference_engine, ) @override def _build_judgment_prompt(self, judge_input: dict[str, str]) -> str: """Generate judge prompts using the template.""" prompt_content = super()._build_judgment_prompt(judge_input) # Only append format suffix to judgment prompt if no system instruction exists # (otherwise it was already appended to system instruction in __init__) if not self._judge_params.system_instruction: prompt_content += self._get_format_suffix() return prompt_content def _get_format_suffix(self) -> str: """Get the appropriate format suffix based on response format and explanation. Returns: Format-specific instruction suffix to append to prompts """ response_format = self._judge_params.response_format include_explanation = self._judge_params.include_explanation # Describe the expected judgment options to the judge if ( self._judge_params.judgment_scores and len(self._judge_params.judgment_scores) > 1 ): choices = [f"'{c}'" for c in self._judge_params.judgment_scores.keys()] choices_str = ", ".join(choices) judgment_options = f"{JUDGMENT_OPTIONS_ENUM_PREFIX}{choices_str}. " elif self._judge_params.judgment_type == JudgeOutputType.BOOL: judgment_options = f"{JUDGMENT_OPTIONS_BOOL}. " elif self._judge_params.judgment_type == JudgeOutputType.FLOAT: judgment_options = f"{JUDGMENT_OPTIONS_FLOAT}. " elif self._judge_params.judgment_type == JudgeOutputType.INT: judgment_options = f"{JUDGMENT_OPTIONS_INT}. " elif self._judge_params.judgment_type == JudgeOutputType.TEXT: judgment_options = f"{JUDGMENT_OPTIONS_TEXT}. " else: judgment_options = "" # Describe the expected response format to the judge if response_format == JudgeResponseFormat.XML: suffix = XML_SUFFIX_WITH_EXPLANATION if include_explanation else XML_SUFFIX elif response_format == JudgeResponseFormat.JSON: suffix = ( JSON_SUFFIX_WITH_EXPLANATION if include_explanation else JSON_SUFFIX ) elif response_format == JudgeResponseFormat.RAW: suffix = RAW_SUFFIX_WITH_EXPLANATION if include_explanation else "" else: suffix = "" return suffix.format( judgment_key=JUDGMENT_KEY, explanation_key=EXPLANATION_KEY, judgment_options=judgment_options, ) def _create_judgment_output_field(self, params: JudgeParams) -> JudgeOutputField: """Create the main judgment output field.""" return JudgeOutputField( field_key=JUDGMENT_KEY, field_type=params.judgment_type, field_scores=params.judgment_scores, ) def _create_explanation_output_field(self) -> JudgeOutputField: """Create the explanation output field.""" return JudgeOutputField( field_key=EXPLANATION_KEY, field_type=JudgeOutputType.TEXT, field_scores=None, ) def _create_inference_engine( self, inference_config: InferenceConfig ) -> BaseInferenceEngine: """Create the inference engine based on the provided configuration.""" from oumi.builders.inference_engines import build_inference_engine if inference_config.engine is None: raise ValueError("Inference engine not specified in the configuration.") elif inference_config.input_path or inference_config.output_path: raise ValueError( "Input and output paths are not supported in inference_config, when " "instantiating the SimpleJudge. Please set both to None." ) return build_inference_engine( engine_type=inference_config.engine, model_params=inference_config.model, remote_params=inference_config.remote_params, generation_params=inference_config.generation, )