Source code for oumi.judges_v2.simple_judge
# Copyright 2025 - Oumi
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Union
from typing_extensions import override
from oumi.core.configs.inference_config import InferenceConfig
from oumi.core.configs.judge_config_v2 import JudgeConfig
from oumi.core.configs.params.judge_params import (
JudgeOutputType,
JudgeParams,
JudgeResponseFormat,
)
from oumi.core.inference import BaseInferenceEngine
from oumi.judges_v2.base_judge import (
BaseJudge,
JudgeOutputField,
)
# Expected field/key names in the judge's output.
EXPLANATION_KEY = "explanation"
JUDGMENT_KEY = "judgment"
# Judgment options: describing to the judge how to format its judgment.
JUDGMENT_OPTIONS_BOOL = "Your judgment should be a single word: 'Yes' or 'No'"
JUDGMENT_OPTIONS_INT = "Your judgment should be an integer value"
JUDGMENT_OPTIONS_FLOAT = "Your judgment should be a float value"
JUDGMENT_OPTIONS_ENUM_PREFIX = "Your judgment should be one of the following options: "
JUDGMENT_OPTIONS_TEXT = "Your judgment should be provided in the form of free text"
# Prompt suffix: describing to the judge how to format its response (XML, JSON, or RAW).
XML_SUFFIX = (
"\n\nProvide your response in XML format only. Include your judgment enclosed "
"within <{judgment_key}> and </{judgment_key}> tags. {judgment_options}Do not "
"include any text outside the XML. Ensure that all tags are properly closed and "
"that the XML is well-formed."
)
XML_SUFFIX_WITH_EXPLANATION = (
"\n\nProvide your response in XML format only. Begin with an explanation "
"justifying your judgment, enclosed within <{explanation_key}> and "
"</{explanation_key}> tags. Follow this with your judgment, enclosed within "
"<{judgment_key}> and </{judgment_key}> tags. {judgment_options}Do not include any "
"text outside the XML. Ensure that all tags are properly closed and that the XML "
"is well-formed."
)
JSON_SUFFIX = (
"\n\nProvide your response in JSON format only. Include your judgment as the value "
"of a single key named '{judgment_key}'. {judgment_options}Do not include any "
"text outside the JSON. Ensure the JSON is properly formatted and valid."
)
JSON_SUFFIX_WITH_EXPLANATION = (
"\n\nProvide your response in JSON format only. Begin with an explanation "
"justifying your judgment, using the key '{explanation_key}'. Then include your "
"judgment using the key '{judgment_key}'. {judgment_options}Do not include any "
"text outside the JSON. Ensure the JSON is properly formatted and valid."
)
RAW_SUFFIX_WITH_EXPLANATION = (
"\n\nExplain your reasoning before providing your judgment."
)
[docs]
class SimpleJudge(BaseJudge):
"""Judge class for evaluating outputs based on a given configuration."""
def __init__(
self,
judge_config: Union[JudgeConfig, str],
):
"""Initialize the Judge.
Args:
judge_config: JudgeConfig object or a path to a judge configuration file.
Contains both judge parameters and inference configuration.
"""
if isinstance(judge_config, str):
judge_config = JudgeConfig.from_path(judge_config)
self._judge_params = judge_config.judge_params
self._judge_params.replace_template_variables()
self._inference_config = judge_config.inference_config
# Create output fields based on judge configuration
output_fields = []
if self._judge_params.include_explanation:
output_fields.append(self._create_explanation_output_field())
output_fields.append(self._create_judgment_output_field(self._judge_params))
# Generate an inference engine from inference config
inference_engine = self._create_inference_engine(self._inference_config)
# Append format suffix to system instruction if it exists
system_instruction = self._judge_params.system_instruction
if system_instruction:
system_instruction = f"{system_instruction}{self._get_format_suffix()}"
super().__init__(
prompt_template=self._judge_params.prompt_template,
system_instruction=system_instruction,
example_field_values=self._judge_params.examples,
response_format=self._judge_params.response_format,
output_fields=output_fields,
inference_engine=inference_engine,
)
@override
def _build_judgment_prompt(self, judge_input: dict[str, str]) -> str:
"""Generate judge prompts using the template."""
prompt_content = super()._build_judgment_prompt(judge_input)
# Only append format suffix to judgment prompt if no system instruction exists
# (otherwise it was already appended to system instruction in __init__)
if not self._judge_params.system_instruction:
prompt_content += self._get_format_suffix()
return prompt_content
def _get_format_suffix(self) -> str:
"""Get the appropriate format suffix based on response format and explanation.
Returns:
Format-specific instruction suffix to append to prompts
"""
response_format = self._judge_params.response_format
include_explanation = self._judge_params.include_explanation
# Describe the expected judgment options to the judge
if (
self._judge_params.judgment_scores
and len(self._judge_params.judgment_scores) > 1
):
choices = [f"'{c}'" for c in self._judge_params.judgment_scores.keys()]
choices_str = ", ".join(choices)
judgment_options = f"{JUDGMENT_OPTIONS_ENUM_PREFIX}{choices_str}. "
elif self._judge_params.judgment_type == JudgeOutputType.BOOL:
judgment_options = f"{JUDGMENT_OPTIONS_BOOL}. "
elif self._judge_params.judgment_type == JudgeOutputType.FLOAT:
judgment_options = f"{JUDGMENT_OPTIONS_FLOAT}. "
elif self._judge_params.judgment_type == JudgeOutputType.INT:
judgment_options = f"{JUDGMENT_OPTIONS_INT}. "
elif self._judge_params.judgment_type == JudgeOutputType.TEXT:
judgment_options = f"{JUDGMENT_OPTIONS_TEXT}. "
else:
judgment_options = ""
# Describe the expected response format to the judge
if response_format == JudgeResponseFormat.XML:
suffix = XML_SUFFIX_WITH_EXPLANATION if include_explanation else XML_SUFFIX
elif response_format == JudgeResponseFormat.JSON:
suffix = (
JSON_SUFFIX_WITH_EXPLANATION if include_explanation else JSON_SUFFIX
)
elif response_format == JudgeResponseFormat.RAW:
suffix = RAW_SUFFIX_WITH_EXPLANATION if include_explanation else ""
else:
suffix = ""
return suffix.format(
judgment_key=JUDGMENT_KEY,
explanation_key=EXPLANATION_KEY,
judgment_options=judgment_options,
)
def _create_judgment_output_field(self, params: JudgeParams) -> JudgeOutputField:
"""Create the main judgment output field."""
return JudgeOutputField(
field_key=JUDGMENT_KEY,
field_type=params.judgment_type,
field_scores=params.judgment_scores,
)
def _create_explanation_output_field(self) -> JudgeOutputField:
"""Create the explanation output field."""
return JudgeOutputField(
field_key=EXPLANATION_KEY,
field_type=JudgeOutputType.TEXT,
field_scores=None,
)
def _create_inference_engine(
self, inference_config: InferenceConfig
) -> BaseInferenceEngine:
"""Create the inference engine based on the provided configuration."""
from oumi.builders.inference_engines import build_inference_engine
if inference_config.engine is None:
raise ValueError("Inference engine not specified in the configuration.")
elif inference_config.input_path or inference_config.output_path:
raise ValueError(
"Input and output paths are not supported in inference_config, when "
"instantiating the SimpleJudge. Please set both to None."
)
return build_inference_engine(
engine_type=inference_config.engine,
model_params=inference_config.model,
remote_params=inference_config.remote_params,
generation_params=inference_config.generation,
)