Class: Raif::PromptStudioBatchRun

Inherits:
ApplicationRecord show all
Defined in:
app/models/raif/prompt_studio_batch_run.rb

Constant Summary collapse

ALLOWED_JUDGE_TYPES =
[
  "Raif::Evals::LlmJudges::Binary",
  "Raif::Evals::LlmJudges::Scored",
  "Raif::Evals::LlmJudges::Comparative",
  "Raif::Evals::LlmJudges::Summarization"
].freeze

Instance Method Summary collapse

Methods inherited from ApplicationRecord

table_name_prefix, where_json_not_blank

Instance Method Details

#check_completion!Object



138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# File 'app/models/raif/prompt_studio_batch_run.rb', line 138

def check_completion!
  reload
  remaining = items.where(status: %w[pending running judging]).count
  self.completed_count = items.where(status: "completed").count
  self.failed_count = items.where(status: "failed").count

  if remaining.zero?
    if failed_count > 0 && completed_count == 0
      self.failed_at = Time.current
    else
      self.completed_at = Time.current
    end
  end

  save!
end

#has_judge?Boolean

Returns:

  • (Boolean)


66
67
68
# File 'app/models/raif/prompt_studio_batch_run.rb', line 66

def has_judge?
  judge_type.present?
end

#judge_average_scoreObject



83
84
85
86
87
88
# File 'app/models/raif/prompt_studio_batch_run.rb', line 83

def judge_average_score
  scores = completed_judge_tasks.filter_map(&:judgment_score)
  return if scores.empty?

  (scores.sum.to_f / scores.size).round(1)
end

#judge_classObject



70
71
72
# File 'app/models/raif/prompt_studio_batch_run.rb', line 70

def judge_class
  judge_type&.safe_constantize
end

#judge_comparative_summaryObject



90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# File 'app/models/raif/prompt_studio_batch_run.rb', line 90

def judge_comparative_summary
  completed_items = items.where.not(judge_task_id: nil).includes(:judge_task)
  return if completed_items.empty?

  new_wins = 0
  original_wins = 0
  ties = 0

  completed_items.each do |item|
    next unless item.judge_task&.completed?

    parsed = item.judge_task.parsed_response
    next unless parsed.is_a?(Hash)

    winner = parsed["winner"]
    if winner == "tie"
      ties += 1
    elsif winner == item.&.dig("new_response_letter")
      new_wins += 1
    else
      original_wins += 1
    end
  end

  total = new_wins + original_wins + ties
  return if total.zero?

  {
    new_wins: new_wins,
    original_wins: original_wins,
    ties: ties,
    total: total,
    new_win_pct: ((new_wins.to_f / total) * 100).round,
    original_win_pct: ((original_wins.to_f / total) * 100).round,
    tie_pct: ((ties.to_f / total) * 100).round
  }
end

#judge_pass_rateObject



74
75
76
77
78
79
80
81
# File 'app/models/raif/prompt_studio_batch_run.rb', line 74

def judge_pass_rate
  judge_tasks = completed_judge_tasks
  return if judge_tasks.empty?

  pass_count = judge_tasks.count(&:passes?)
  percentage = ((pass_count.to_f / judge_tasks.size) * 100).round
  "#{percentage}% (#{pass_count}/#{judge_tasks.size})"
end

#progress_percentageObject



60
61
62
63
64
# File 'app/models/raif/prompt_studio_batch_run.rb', line 60

def progress_percentage
  return 0 if total_count.zero?

  ((completed_count + failed_count).to_f / total_count * 100).round
end

#statusObject



48
49
50
51
52
53
54
55
56
57
58
# File 'app/models/raif/prompt_studio_batch_run.rb', line 48

def status
  if completed_at?
    :completed
  elsif failed_at?
    :failed
  elsif started_at?
    :in_progress
  else
    :pending
  end
end