Module: Raif::Evals::EvalSets::Expectations

Included in:
Raif::Evals::EvalSet
Defined in:
lib/raif/evals/eval_sets/expectations.rb

Instance Method Summary collapse

Instance Method Details

#expect(description, result_metadata: nil, &block) ⇒ Object



8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
# File 'lib/raif/evals/eval_sets/expectations.rb', line 8

def expect(description, result_metadata: nil, &block)
  result = begin
    if block.call
      output.puts Raif::Utils::Colors.green("#{description}")
      output.puts Raif::Utils::Colors.green("#{.inspect}") if  && Raif.config.evals_verbose_output
      ExpectationResult.new(description: description, status: :passed, metadata: )
    else
      output.puts Raif::Utils::Colors.red("#{description}")
      output.puts Raif::Utils::Colors.red("#{.inspect}") if  && Raif.config.evals_verbose_output
      ExpectationResult.new(description: description, status: :failed, metadata: )
    end
  rescue => e
    output.puts Raif::Utils::Colors.red("#{description} (Error: #{e.message})")
    ExpectationResult.new(description: description, status: :error, error: e, metadata: )
  end

  current_eval.add_expectation_result(result)
  result
end

#expect_no_tool_invocation(tool_invoker, tool_name) ⇒ Object



44
45
46
47
48
# File 'lib/raif/evals/eval_sets/expectations.rb', line 44

def expect_no_tool_invocation(tool_invoker, tool_name)
  expect "does not invoke #{tool_name}" do
    tool_invoker.raif_model_tool_invocations.none? { |inv| inv.tool_name == tool_name }
  end
end

#expect_tool_invocation(tool_invoker, tool_type, with: {}) ⇒ Object



28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# File 'lib/raif/evals/eval_sets/expectations.rb', line 28

def expect_tool_invocation(tool_invoker, tool_type, with: {})
  invocations = tool_invoker.raif_model_tool_invocations.select { |inv| inv.tool_type == tool_type }
  invoked_tools = tool_invoker.raif_model_tool_invocations.map{|inv| [inv.tool_type, inv.tool_arguments] }.to_h

  if with.any?
    invocations = invocations.select do |invocation|
      with.all? { |key, value| invocation.tool_arguments[key.to_s] == value }
    end
  end

   = { invoked_tools: invoked_tools }
  expect "invokes #{tool_type}#{with.any? ? " with #{with.to_json}" : ""}", result_metadata:  do
    invocations.any?
  end
end