Class: Raif::PromptStudioBatchRun
Constant Summary
collapse
- ALLOWED_JUDGE_TYPES =
[
"Raif::Evals::LlmJudges::Binary",
"Raif::Evals::LlmJudges::Scored",
"Raif::Evals::LlmJudges::Comparative",
"Raif::Evals::LlmJudges::Summarization"
].freeze
Instance Method Summary
collapse
table_name_prefix, where_json_not_blank
Instance Method Details
#check_completion! ⇒ Object
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
|
# File 'app/models/raif/prompt_studio_batch_run.rb', line 138
def check_completion!
reload
remaining = items.where(status: %w[pending running judging]).count
self.completed_count = items.where(status: "completed").count
self.failed_count = items.where(status: "failed").count
if remaining.zero?
if failed_count > 0 && completed_count == 0
self.failed_at = Time.current
else
self.completed_at = Time.current
end
end
save!
end
|
#has_judge? ⇒ Boolean
66
67
68
|
# File 'app/models/raif/prompt_studio_batch_run.rb', line 66
def has_judge?
judge_type.present?
end
|
#judge_average_score ⇒ Object
83
84
85
86
87
88
|
# File 'app/models/raif/prompt_studio_batch_run.rb', line 83
def judge_average_score
scores = completed_judge_tasks.filter_map(&:judgment_score)
return if scores.empty?
(scores.sum.to_f / scores.size).round(1)
end
|
#judge_class ⇒ Object
70
71
72
|
# File 'app/models/raif/prompt_studio_batch_run.rb', line 70
def judge_class
judge_type&.safe_constantize
end
|
#judge_comparative_summary ⇒ Object
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
|
# File 'app/models/raif/prompt_studio_batch_run.rb', line 90
def judge_comparative_summary
completed_items = items.where.not(judge_task_id: nil).includes(:judge_task)
return if completed_items.empty?
new_wins = 0
original_wins = 0
ties = 0
completed_items.each do |item|
next unless item.judge_task&.completed?
parsed = item.judge_task.parsed_response
next unless parsed.is_a?(Hash)
winner = parsed["winner"]
if winner == "tie"
ties += 1
elsif winner == item.metadata&.dig("new_response_letter")
new_wins += 1
else
original_wins += 1
end
end
total = new_wins + original_wins + ties
return if total.zero?
{
new_wins: new_wins,
original_wins: original_wins,
ties: ties,
total: total,
new_win_pct: ((new_wins.to_f / total) * 100).round,
original_win_pct: ((original_wins.to_f / total) * 100).round,
tie_pct: ((ties.to_f / total) * 100).round
}
end
|
#judge_pass_rate ⇒ Object
74
75
76
77
78
79
80
81
|
# File 'app/models/raif/prompt_studio_batch_run.rb', line 74
def judge_pass_rate
judge_tasks = completed_judge_tasks
return if judge_tasks.empty?
pass_count = judge_tasks.count(&:passes?)
percentage = ((pass_count.to_f / judge_tasks.size) * 100).round
"#{percentage}% (#{pass_count}/#{judge_tasks.size})"
end
|
#progress_percentage ⇒ Object
60
61
62
63
64
|
# File 'app/models/raif/prompt_studio_batch_run.rb', line 60
def progress_percentage
return 0 if total_count.zero?
((completed_count + failed_count).to_f / total_count * 100).round
end
|
#status ⇒ Object
48
49
50
51
52
53
54
55
56
57
58
|
# File 'app/models/raif/prompt_studio_batch_run.rb', line 48
def status
if completed_at?
:completed
elsif failed_at?
:failed
elsif started_at?
:in_progress
else
:pending
end
end
|