// Source: index.html script block (extracted). Loaded via <script type="text/babel"> in index.html.

// ---- helpers -----------------------------------------------------------------

function benchNowHMS() {
  const d = new Date();
  return [d.getHours(), d.getMinutes(), d.getSeconds()]
    .map(n => String(n).padStart(2, "0")).join(":");
}

function formatPct(n) {
  if (n == null || Number.isNaN(n)) return "—";
  return (n * 100).toFixed(1) + "%";
}

function computeBenchmarkMetrics(b) {
  const taskIds = Array.isArray(b && b.taskIds) ? b.taskIds : [];
  const k = Math.max(1, (b && b.k) || 1);
  let totPass = 0, totHat = 0, totRuns = 0, passes = 0, rewardSum = 0;
  const perTask = [];
  for (const tid of taskIds) {
    const runs = (b.runs || {})[tid] || [];
    const n = runs.length;
    const c = runs.filter(r => r && r.binary_pass).length;
    const p = passAtK(n, c, k);
    const ph = passHatK(n, c, k);
    totPass += p;
    totHat += ph;
    totRuns += n;
    passes += c;
    rewardSum += runs.reduce((s, r) => s + (r && typeof r.reward === "number" ? r.reward : 0), 0);
    perTask.push({ taskId: tid, n, c, pass_at_k: p, pass_hat_k: ph });
  }
  const denom = Math.max(1, taskIds.length);
  return {
    pass_at_k: totPass / denom,
    pass_hat_k: totHat / denom,
    total_runs: totRuns,
    passes,
    mean_reward: totRuns > 0 ? rewardSum / totRuns : 0,
    per_task: perTask,
  };
}

function BenchStatusTag({ status, tr }) {
  const color = {
    draft: "var(--ink-mute)",
    running: "var(--amber, #b45309)",
    done: "var(--green, #2e8a53)",
    cancelled: "var(--red, #b91c1c)",
  }[status] || "var(--ink-mute)";
  const label = tr("bench_status_" + status) || status;
  return (
    <span className="tag" style={{color, borderColor: color}}>{label}</span>
  );
}

// One benchmark item: run the full agent↔patient loop until record_diagnosis,
// max_turns, or max_steps. Mirrors simulator.jsx:169–233 but headless.
async function runOneBenchmarkItem({ task, r, cfg, signal }) {
  const startedAt = new Date().toISOString();
  try {
    let convo = task && task.reasonForCall
      ? [{ idx: 0, role: "user", text: task.reasonForCall, t: benchNowHMS() }]
      : [];
    let state = {};
    let completed = false;
    const maxTurns = (task && task.maxTurns) || 12;
    const maxSteps = Math.max(10, maxTurns * 3);
    while (!completed && convo.length < maxSteps) {
      if (signal && signal.aborted) break;
      const last = convo[convo.length - 1];
      const lastRole = last && last.role;
      if (!last || lastRole === "user" || lastRole === "tool") {
        const res = await runOneAgentStep({
          messages: convo, task, state, cfg, temperature: 0.3, signal,
          tools: buildToolsForTask(task),
        });
        convo = [...convo, ...((res && res.newMessages) || [])];
        state = (res && res.state) || state;
        completed = !!(res && res.completed);
      } else {
        const asstTurns = convo.filter(m => m.role === "assistant" && !(m.toolCalls && m.toolCalls.length)).length;
        if (asstTurns >= maxTurns) break;
        const turn = await runOnePatientStep({ messages: convo, task, cfg, signal });
        convo = [...convo, turn];
      }
    }
    const reward = computeFinalReward({ messages: convo, task });
    return {
      runIdx: r,
      binary_pass: !!(reward && reward.binary_pass),
      reward: reward ? reward.reward : 0,
      overall_score: reward ? reward.overall_score : 0,
      checklist: (reward && reward.checklist) || {},
      info: (reward && reward.info) || {},
      comments: (reward && reward.comments) || "",
      messages: convo,
      envState: state,
      completed,
      startedAt,
      finishedAt: new Date().toISOString(),
    };
  } catch (err) {
    const aborted = err && (err.name === "AbortError" || (signal && signal.aborted));
    return {
      runIdx: r,
      binary_pass: false,
      reward: 0,
      overall_score: 0,
      checklist: {},
      info: {},
      comments: aborted ? "aborted" : "error",
      error: aborted ? "aborted" : ((err && err.message) || String(err)),
      messages: [],
      envState: {},
      completed: false,
      startedAt,
      finishedAt: new Date().toISOString(),
    };
  }
}

// ---- root component ----------------------------------------------------------

function Benchmark({ user, tasks, onTasksLoaded }) {
  const { t: tr } = useLang();

  if (!user || user.kind !== "manager") {
    return (
      <div className="bench">
        <div className="bench-head">
          <div>
            <h1>{tr("bench_title")}</h1>
            <p>Managers only — this page is restricted to users with a manager account.</p>
          </div>
        </div>
      </div>
    );
  }

  const [benchmarks, setBenchmarks] = React.useState(() => benchmarksLoad());
  const [selectedId, setSelectedId] = React.useState(null);
  const [transcript, setTranscript] = React.useState(null);

  const reload = React.useCallback(() => setBenchmarks(benchmarksLoad()), []);

  const upsert = React.useCallback((b) => {
    benchmarkUpsert(b);
    setBenchmarks(benchmarksLoad());
  }, []);

  const remove = React.useCallback((id) => {
    benchmarkDelete(id);
    setBenchmarks(benchmarksLoad());
  }, []);

  const createDraft = React.useCallback(() => {
    const id = "bench_" + Date.now().toString(36);
    const draft = {
      id,
      name: "",
      cohortId: null,
      cohortName: null,
      taskIds: [],
      k: 1,
      createdAt: new Date().toISOString(),
      status: "draft",
      runs: {},
    };
    upsert(draft);
    setSelectedId(id);
  }, [upsert]);

  const selected = selectedId ? benchmarks.find(b => b.id === selectedId) : null;

  // If current selection disappears (e.g. deleted externally), reset.
  React.useEffect(() => {
    if (selectedId && !benchmarks.find(b => b.id === selectedId)) {
      setSelectedId(null);
    }
  }, [selectedId, benchmarks]);

  let body;
  if (!selected) {
    body = <BenchList benchmarks={benchmarks} onOpen={setSelectedId} onNew={createDraft} tr={tr} />;
  } else if (selected.status === "draft") {
    body = <BenchBuilder
      benchmark={selected}
      tasks={tasks}
      onTasksLoaded={onTasksLoaded}
      onBack={() => setSelectedId(null)}
      onUpdate={upsert}
      tr={tr}
    />;
  } else if (selected.status === "running") {
    body = <BenchRunner
      benchmark={selected}
      tasks={tasks}
      onUpdate={upsert}
      tr={tr}
    />;
  } else {
    body = <BenchResults
      benchmark={selected}
      tasks={tasks}
      onBack={() => setSelectedId(null)}
      onUpdate={upsert}
      onDelete={(id) => { remove(id); setSelectedId(null); }}
      onOpenTranscript={setTranscript}
      tr={tr}
    />;
  }

  return (
    <div className="bench">
      {body}
      {transcript && (
        <BenchTranscriptModal
          messages={transcript.messages}
          taskId={transcript.taskId}
          runIdx={transcript.runIdx}
          reward={transcript.reward}
          onClose={() => setTranscript(null)}
        />
      )}
    </div>
  );
}

// ---- view A: list ------------------------------------------------------------

function BenchList({ benchmarks, onOpen, onNew, tr }) {
  const rows = benchmarks.slice().sort((a, b) =>
    String(b && b.createdAt || "").localeCompare(String(a && a.createdAt || "")));

  const headCols = "1.6fr 1.1fr 0.4fr 0.8fr 0.7fr 0.7fr 0.7fr";

  return (
    <>
      <div className="bench-head">
        <div>
          <div style={{fontSize:11.5, color:"var(--ink-mute)", fontFamily:"var(--font-mono)", textTransform:"uppercase", letterSpacing:"0.06em"}}>
            Admin · manager
          </div>
          <h1>{tr("bench_title")}</h1>
          <p>{tr("bench_subtitle")}</p>
        </div>
        <div style={{marginLeft:"auto", display:"flex", gap:8, alignItems:"center"}}>
          <button className="btn primary" onClick={onNew}>{Ico.plus()} {tr("bench_new")}</button>
        </div>
      </div>

      {rows.length === 0 ? (
        <div style={{padding:"60px 20px", textAlign:"center", color:"var(--ink-mute)", fontSize:13, border:"1px dashed var(--line)", borderRadius:6}}>
          {Ico.chart()} <div style={{marginTop: 10}}>{tr("bench_empty")}</div>
        </div>
      ) : (
        <div style={{border:"1px solid var(--line)", borderRadius:6, overflow: "hidden"}}>
          <div style={{display:"grid", gridTemplateColumns: headCols, gap: 12, padding: "8px 14px", background:"var(--bg-sunken)", borderBottom:"1px solid var(--line)", fontSize: 11, fontFamily:"var(--font-mono)", color:"var(--ink-mute)", textTransform:"uppercase", letterSpacing:"0.05em"}}>
            <span>Name</span>
            <span>Cohort</span>
            <span>k</span>
            <span>Status</span>
            <span>{tr("bench_pass_at_k")}</span>
            <span>{tr("bench_pass_hat_k")}</span>
            <span>Created</span>
          </div>
          {rows.map(b => {
            const metrics = computeBenchmarkMetrics(b);
            const taskCount = (b.taskIds || []).length;
            return (
              <div key={b.id}
                style={{display:"grid", gridTemplateColumns: headCols, gap: 12, padding: "10px 14px", borderBottom:"1px solid var(--line)", cursor:"pointer", alignItems: "center"}}
                onClick={() => onOpen(b.id)}>
                <div style={{minWidth: 0}}>
                  <div style={{fontWeight: 500, overflow:"hidden", textOverflow:"ellipsis", whiteSpace:"nowrap"}}>{b.name || "(unnamed)"}</div>
                  <code style={{fontFamily:"var(--font-mono)", fontSize:10.5, color:"var(--ink-mute)"}}>{b.id}</code>
                </div>
                <div style={{minWidth: 0}}>
                  <div style={{fontSize: 12.5, overflow:"hidden", textOverflow:"ellipsis", whiteSpace:"nowrap"}}>{b.cohortName || b.cohortId || "—"}</div>
                  <div style={{fontSize: 11, color:"var(--ink-mute)", fontFamily:"var(--font-mono)"}}>{taskCount} tasks</div>
                </div>
                <div style={{fontFamily:"var(--font-mono)"}}>{b.k}</div>
                <div><BenchStatusTag status={b.status} tr={tr}/></div>
                <div style={{fontFamily:"var(--font-mono)"}}>{formatPct(metrics.pass_at_k)}</div>
                <div style={{fontFamily:"var(--font-mono)"}}>{formatPct(metrics.pass_hat_k)}</div>
                <div style={{fontFamily:"var(--font-mono)", fontSize: 11.5, color:"var(--ink-dim)"}}>
                  {String(b.createdAt || "").slice(0,10)}
                </div>
              </div>
            );
          })}
        </div>
      )}
    </>
  );
}

// ---- view B: builder (draft) -------------------------------------------------

function BenchBuilder({ benchmark, tasks, onTasksLoaded, onBack, onUpdate, tr }) {
  const [name, setName] = React.useState(benchmark.name || "");
  const [k, setK] = React.useState(benchmark.k || 1);
  const [cohortId, setCohortId] = React.useState(benchmark.cohortId || null);
  const [cohortName, setCohortName] = React.useState(benchmark.cohortName || null);
  const [selectedIds, setSelectedIds] = React.useState(() => new Set(benchmark.taskIds || []));

  const [cohorts, setCohorts] = React.useState([]);
  const [cohortsLoading, setCohortsLoading] = React.useState(true);
  const [cohortsError, setCohortsError] = React.useState(null);

  const [cohortTasks, setCohortTasks] = React.useState([]);
  const [tasksLoading, setTasksLoading] = React.useState(false);
  const [tasksError, setTasksError] = React.useState(null);

  const cfg = React.useMemo(() => getLlmConfig(), []);
  const hasKey = !!cfg.apiKey;

  // Load cohorts once.
  React.useEffect(() => {
    let cancelled = false;
    (async () => {
      setCohortsLoading(true);
      setCohortsError(null);
      try {
        if (!window.tasksApi || window.tasksApi.OFFLINE) {
          throw new Error("Backend offline — cohorts unavailable.");
        }
        const res = await window.tasksApi.listCohorts();
        if (cancelled) return;
        const items = Array.isArray(res && res.items) ? res.items : [];
        setCohorts(items);
        if (!cohortId && items[0]) {
          setCohortId(items[0].id);
          setCohortName(items[0].name);
        }
      } catch (err) {
        if (cancelled) return;
        setCohortsError((err && err.message) || "Failed to load cohorts");
      } finally {
        if (!cancelled) setCohortsLoading(false);
      }
    })();
    return () => { cancelled = true; };
    // eslint-disable-next-line react-hooks/exhaustive-deps
  }, []);

  // Load tasks whenever the cohort changes.
  React.useEffect(() => {
    if (!cohortId) { setCohortTasks([]); return; }
    // Prefer tasks already in the app state (cohort-filtered) for instant hydrate.
    const fromProp = (tasks || []).filter(t => t && (t.cohortId === cohortId || t.cohort_id === cohortId));
    if (fromProp.length > 0) {
      setCohortTasks(fromProp);
    }
    let cancelled = false;
    (async () => {
      setTasksLoading(true);
      setTasksError(null);
      try {
        if (!window.tasksApi || window.tasksApi.OFFLINE) {
          if (fromProp.length === 0) throw new Error("Backend offline — task list unavailable.");
          return;
        }
        const res = await window.tasksApi.list({ cohort: cohortId, limit: 1000 });
        if (cancelled) return;
        const items = Array.isArray(res && res.items) ? res.items : [];
        setCohortTasks(items);
        if (onTasksLoaded && items.length > 0) onTasksLoaded(items);
      } catch (err) {
        if (cancelled) return;
        setTasksError((err && err.message) || "Failed to load tasks");
      } finally {
        if (!cancelled) setTasksLoading(false);
      }
    })();
    return () => { cancelled = true; };
    // eslint-disable-next-line react-hooks/exhaustive-deps
  }, [cohortId]);

  const changeCohort = (id) => {
    setCohortId(id);
    const c = cohorts.find(x => x.id === id);
    setCohortName(c ? c.name : null);
    setSelectedIds(new Set());
  };

  const toggle = (id) => {
    setSelectedIds(prev => {
      const next = new Set(prev);
      if (next.has(id)) next.delete(id);
      else next.add(id);
      return next;
    });
  };
  const selectAll = () => setSelectedIds(new Set(cohortTasks.map(t => t.id)));
  const selectNone = () => setSelectedIds(new Set());
  const allSelected = cohortTasks.length > 0 && cohortTasks.every(t => selectedIds.has(t.id));

  const buildPatch = () => ({
    ...benchmark,
    name,
    k: Math.max(1, Math.min(8, Number(k) || 1)),
    cohortId,
    cohortName,
    taskIds: Array.from(selectedIds),
  });

  const saveDraft = () => onUpdate(buildPatch());

  const runNow = () => {
    if (selectedIds.size === 0 || !hasKey) return;
    onUpdate({ ...buildPatch(), status: "running", runs: {} });
  };

  const totalRuns = selectedIds.size * Math.max(1, Number(k) || 1);

  return (
    <>
      <div className="bench-head">
        <div>
          <div style={{fontSize:11.5, color:"var(--ink-mute)", fontFamily:"var(--font-mono)", textTransform:"uppercase", letterSpacing:"0.06em"}}>
            {tr("bench_status_draft")}
          </div>
          <h1>{name || tr("bench_new")}</h1>
          <p>{tr("bench_subtitle")}</p>
        </div>
        <div style={{marginLeft:"auto", display:"flex", gap: 8, alignItems: "center"}}>
          <button className="btn ghost" onClick={() => { saveDraft(); onBack(); }}>← {tr("bench_title")}</button>
        </div>
      </div>

      <div className="bench-layout">
        <div>
          <div style={{fontSize:11.5, color:"var(--ink-mute)", fontFamily:"var(--font-mono)", textTransform:"uppercase", letterSpacing:"0.05em", marginBottom: 8}}>
            {tr("bench_pick_cohort")}
          </div>
          <select value={cohortId || ""} onChange={e => changeCohort(e.target.value)}
            style={{width:"100%", padding:"6px 8px", border:"1px solid var(--line)", borderRadius: 4, fontSize: 13, marginBottom: 14, background: "var(--bg, #fff)"}}>
            <option value="" disabled>{cohortsLoading ? "Loading cohorts…" : "Select a cohort"}</option>
            {cohorts.map(c => (
              <option key={c.id} value={c.id}>
                {c.name} ({c.total || c.task_count || 0})
              </option>
            ))}
          </select>
          {cohortsError && (
            <div className="tag red" style={{marginBottom: 10}}>{cohortsError}</div>
          )}

          <div style={{display: "flex", alignItems: "center", gap: 8, marginBottom: 8}}>
            <div style={{fontSize:11.5, color:"var(--ink-mute)", fontFamily:"var(--font-mono)", textTransform:"uppercase", letterSpacing:"0.05em", flex: 1}}>
              {tr("bench_pick_tasks")} · <b style={{color:"var(--ink)"}}>{selectedIds.size}</b> {tr("bench_selected")}
            </div>
            <button className="btn ghost" style={{fontSize:11, padding:"2px 8px"}} onClick={selectAll} disabled={cohortTasks.length === 0 || allSelected}>
              Select all
            </button>
            <button className="btn ghost" style={{fontSize:11, padding:"2px 8px"}} onClick={selectNone} disabled={selectedIds.size === 0}>
              None
            </button>
          </div>
          <div style={{border:"1px solid var(--line)", borderRadius: 6, maxHeight: 460, overflow: "auto"}}>
            {tasksLoading && (
              <div style={{padding: "14px", color:"var(--ink-mute)", fontSize: 12.5}}>{tr("bench_loading_tasks")}</div>
            )}
            {!tasksLoading && tasksError && (
              <div style={{padding: "14px", color:"var(--red, #b91c1c)", fontSize: 12.5}}>{tasksError}</div>
            )}
            {!tasksLoading && !tasksError && cohortTasks.length === 0 && (
              <div style={{padding: "14px", color:"var(--ink-mute)", fontSize: 12.5}}>{tr("bench_no_tasks_in_cohort")}</div>
            )}
            {cohortTasks.map(t => {
              const sel = selectedIds.has(t.id);
              const chief = t.chief
                || (t.description && t.description.chief_complaint)
                || t.reasonForCall
                || (t.user_scenario && t.user_scenario.instructions && t.user_scenario.instructions.reason_for_call)
                || "";
              const dx = t.diagnosis
                || (t.description && t.description.suspected_dx)
                || "";
              return (
                <label key={t.id}
                  style={{display:"flex", gap: 10, alignItems:"flex-start", padding:"8px 12px", borderBottom: "1px solid var(--line)", cursor: "pointer", background: sel ? "var(--bg-sunken)" : ""}}>
                  <input type="checkbox" checked={sel} onChange={() => toggle(t.id)} style={{marginTop: 3}}/>
                  <div style={{flex: 1, minWidth: 0}}>
                    <div style={{fontSize: 12.5, overflow:"hidden", textOverflow:"ellipsis", whiteSpace:"nowrap"}}>
                      {chief || "(no chief complaint)"}
                    </div>
                    <div style={{fontSize: 11, color:"var(--ink-mute)", fontFamily:"var(--font-mono)", overflow:"hidden", textOverflow:"ellipsis", whiteSpace:"nowrap"}}>
                      <code>{t.id}</code>{dx ? " · " + dx : ""}
                    </div>
                  </div>
                </label>
              );
            })}
          </div>
        </div>

        <div style={{border:"1px solid var(--line)", borderRadius: 6, padding: 18, display:"flex", flexDirection:"column", gap: 14}}>
          <div className="form-field">
            <label>{tr("bench_name_ph")}</label>
            <input value={name} onChange={e => setName(e.target.value)} placeholder="e.g. smoke-1" autoFocus/>
          </div>
          <div className="form-field">
            <label>{tr("bench_k")}</label>
            <input type="number" min={1} max={8} value={k}
              onChange={e => setK(Math.max(1, Math.min(8, Number(e.target.value) || 1)))}/>
            <div style={{fontSize: 11, color:"var(--ink-mute)", marginTop: 3, fontFamily:"var(--font-mono)"}}>
              each task is run k times; pass@k and pass^k are computed over those runs.
            </div>
          </div>
          <div style={{padding: "12px 14px", background:"var(--bg-sunken)", borderRadius: 4, fontSize: 12.5, color:"var(--ink-dim)"}}>
            <b style={{color:"var(--ink)", fontFamily:"var(--font-mono)"}}>{selectedIds.size}</b>
            {" "}tasks ×{" "}
            <b style={{color:"var(--ink)", fontFamily:"var(--font-mono)"}}>{Math.max(1, Number(k) || 1)}</b>
            {" "}={" "}
            <b style={{color:"var(--ink)", fontFamily:"var(--font-mono)"}}>{totalRuns}</b>
            {" "}total runs
            <div style={{fontSize: 11.5, color:"var(--ink-mute)", marginTop: 4, fontFamily:"var(--font-mono)"}}>
              model: {cfg.model || "—"}
            </div>
          </div>
          {!hasKey && (
            <div style={{fontSize: 12, color:"var(--red, #b91c1c)", background:"var(--bg-sunken)", padding: "8px 12px", borderRadius: 4}}>
              {tr("bench_no_key")}
            </div>
          )}
          <div style={{display:"flex", gap: 8, marginTop: "auto"}}>
            <button className="btn" onClick={saveDraft}>Save draft</button>
            <button className="btn primary" onClick={runNow}
              disabled={selectedIds.size === 0 || !hasKey}
              style={{marginLeft: "auto"}}
              title={!hasKey ? tr("bench_no_key") : selectedIds.size === 0 ? "Pick at least one task" : "Start running"}
            >
              {Ico.play()} {tr("bench_run")}
            </button>
          </div>
        </div>
      </div>
    </>
  );
}

// ---- view C: running ---------------------------------------------------------

function BenchRunner({ benchmark, tasks, onUpdate, tr }) {
  const [runs, setRuns] = React.useState(() => ({ ...(benchmark.runs || {}) }));
  const [progress, setProgress] = React.useState({ taskIdx: 0, runIdx: 0 });
  const [error, setError] = React.useState(null);
  const abortRef = React.useRef(null);
  const startedRef = React.useRef(false);
  const cfg = React.useMemo(() => getLlmConfig(), []);

  const resolveTask = React.useCallback(
    (id) => (tasks || []).find(t => t && t.id === id) || null,
    [tasks],
  );

  React.useEffect(() => {
    if (startedRef.current) return;
    startedRef.current = true;
    const ac = new AbortController();
    abortRef.current = ac;

    (async () => {
      const taskIds = benchmark.taskIds || [];
      const k = Math.max(1, benchmark.k || 1);
      const localRuns = {};
      for (const tid of taskIds) localRuns[tid] = [];
      setRuns({ ...localRuns });

      const persist = (status) => {
        benchmarkUpsert({ ...benchmark, runs: { ...localRuns }, status });
      };

      let aborted = false;
      outer: for (let i = 0; i < taskIds.length; i++) {
        const taskId = taskIds[i];
        const task = resolveTask(taskId) || { id: taskId };
        for (let r = 0; r < k; r++) {
          if (ac.signal.aborted) { aborted = true; break outer; }
          setProgress({ taskIdx: i, runIdx: r });
          let run;
          try {
            run = await runOneBenchmarkItem({ task, r, cfg, signal: ac.signal });
          } catch (err) {
            // runOneBenchmarkItem already catches; this is defense-in-depth.
            run = {
              runIdx: r,
              binary_pass: false,
              reward: 0,
              overall_score: 0,
              error: (err && err.message) || String(err),
              messages: [],
              envState: {},
              completed: false,
              finishedAt: new Date().toISOString(),
            };
          }
          if (ac.signal.aborted) { aborted = true; break outer; }
          localRuns[taskId] = [...(localRuns[taskId] || []), run];
          setRuns({ ...localRuns });
          persist("running");
        }
      }

      const finalStatus = aborted ? "cancelled" : "done";
      const finished = { ...benchmark, runs: localRuns, status: finalStatus };
      benchmarkUpsert(finished);
      onUpdate(finished);
    })().catch(err => {
      setError((err && err.message) || String(err));
    });

    return () => {
      if (abortRef.current) abortRef.current.abort();
    };
    // eslint-disable-next-line react-hooks/exhaustive-deps
  }, []);

  const cancel = () => {
    if (abortRef.current) abortRef.current.abort();
  };

  const taskIds = benchmark.taskIds || [];
  const k = Math.max(1, benchmark.k || 1);
  let totalDone = 0, totalPass = 0;
  for (const tid of taskIds) {
    const rs = runs[tid] || [];
    totalDone += rs.length;
    totalPass += rs.filter(x => x && x.binary_pass).length;
  }
  const totalPlanned = taskIds.length * k;

  return (
    <>
      <div className="bench-head">
        <div>
          <div style={{fontSize:11.5, color:"var(--ink-mute)", fontFamily:"var(--font-mono)", textTransform:"uppercase", letterSpacing:"0.06em"}}>
            {tr("bench_running")}
          </div>
          <h1>{benchmark.name || benchmark.id}</h1>
          <p>
            {benchmark.cohortName || benchmark.cohortId} · {taskIds.length} tasks × k={k}
          </p>
        </div>
      </div>

      {error && (
        <div style={{margin:"0 0 14px", padding:"8px 12px", fontSize:12.5, border:"1px solid var(--line)", background:"var(--bg-sunken)", borderRadius:"var(--r-md)", color:"var(--red, #b91c1c)"}}>
          {error}
        </div>
      )}

      <div className="bench-layout">
        <div>
          <div style={{display:"flex", alignItems:"center", gap: 10, marginBottom: 10}}>
            <div style={{fontSize:11.5, color:"var(--ink-mute)", fontFamily:"var(--font-mono)", textTransform:"uppercase", letterSpacing:"0.05em"}}>
              Live progress
            </div>
            <div style={{fontFamily:"var(--font-mono)", fontSize: 11.5, color:"var(--ink-dim)"}}>
              task {Math.min(progress.taskIdx + 1, taskIds.length)}/{taskIds.length} · run {progress.runIdx + 1}/{k}
            </div>
          </div>
          <div style={{border:"1px solid var(--line)", borderRadius: 6, maxHeight: 520, overflow: "auto"}}>
            {taskIds.map((tid, ix) => {
              const task = resolveTask(tid);
              const chief = (task && task.chief) || tid;
              const rs = runs[tid] || [];
              return (
                <div key={tid}
                  style={{display:"grid", gridTemplateColumns: "1fr auto", gap: 12, padding:"10px 12px", borderBottom:"1px solid var(--line)", alignItems: "center"}}>
                  <div style={{minWidth: 0}}>
                    <div style={{fontSize: 12.5, overflow:"hidden", textOverflow:"ellipsis", whiteSpace:"nowrap"}}>
                      {chief || tid}
                    </div>
                    <div style={{fontSize: 10.5, color:"var(--ink-mute)", fontFamily:"var(--font-mono)"}}>{tid}</div>
                  </div>
                  <div className="bench-run-grid" style={{maxWidth: Math.max(60, k * 20)}}>
                    {Array.from({length: k}).map((_, r) => {
                      const run = rs[r];
                      let state = "pending";
                      if (run) state = run.binary_pass ? "pass" : "fail";
                      else if (ix === progress.taskIdx && r === progress.runIdx) state = "running";
                      return <span key={r} className="bench-run-chip" data-state={state}/>;
                    })}
                  </div>
                </div>
              );
            })}
          </div>
        </div>

        <div style={{border:"1px solid var(--line)", borderRadius: 6, padding: 18, display:"flex", flexDirection:"column", gap: 14}}>
          <div>
            <div style={{fontSize:11.5, color:"var(--ink-mute)", fontFamily:"var(--font-mono)", textTransform:"uppercase", letterSpacing:"0.05em", marginBottom: 4}}>
              Aggregate
            </div>
            <div style={{fontFamily:"var(--font-mono)", fontSize: 14, color:"var(--ink)"}}>
              {totalPass} <span style={{color: "var(--green, #2e8a53)"}}>pass</span>
              {" · "}
              {totalDone - totalPass} <span style={{color: "var(--red, #b91c1c)"}}>fail</span>
              {" "}<span style={{color: "var(--ink-mute)"}}>/ {totalDone} done ({totalPlanned} planned)</span>
            </div>
          </div>
          <div>
            <div style={{fontSize:11.5, color:"var(--ink-mute)", fontFamily:"var(--font-mono)", textTransform:"uppercase", letterSpacing:"0.05em", marginBottom: 4}}>
              Model
            </div>
            <div style={{fontFamily:"var(--font-mono)", fontSize: 12.5}}>{cfg.model || "—"}</div>
          </div>
          <button className="btn" onClick={cancel} style={{marginTop:"auto"}}>
            {Ico.pause()} {tr("bench_stop")}
          </button>
        </div>
      </div>
    </>
  );
}

// ---- view D: results (done / cancelled) --------------------------------------

function BenchResults({ benchmark, tasks, onBack, onUpdate, onDelete, onOpenTranscript, tr }) {
  const metrics = computeBenchmarkMetrics(benchmark);
  const taskIds = benchmark.taskIds || [];
  const k = Math.max(1, benchmark.k || 1);
  const resolveTask = (id) => (tasks || []).find(t => t && t.id === id) || null;

  const rerun = () => {
    onUpdate({ ...benchmark, status: "running", runs: {} });
  };

  const del = () => {
    const ok = window.confirm(`Delete benchmark "${benchmark.name || benchmark.id}"? This cannot be undone.`);
    if (!ok) return;
    onDelete(benchmark.id);
  };

  const exportJson = () => {
    const allRuns = [];
    for (const tid of taskIds) {
      for (const run of ((benchmark.runs || {})[tid] || [])) {
        allRuns.push({
          taskId: tid,
          runIdx: run.runIdx,
          binary_pass: run.binary_pass,
          reward: run.reward,
          overall_score: run.overall_score,
          checklist: run.checklist,
          messages: run.messages,
          completed: run.completed,
          error: run.error,
          finishedAt: run.finishedAt,
        });
      }
    }
    const payload = {
      benchmark: {
        id: benchmark.id,
        name: benchmark.name,
        cohortId: benchmark.cohortId,
        cohortName: benchmark.cohortName,
        taskIds,
        k,
        createdAt: benchmark.createdAt,
        status: benchmark.status,
      },
      metrics,
      runs: allRuns,
    };
    const safeName = String(benchmark.name || benchmark.id).replace(/[^a-z0-9_\-.]+/gi, "_");
    downloadBlob(`${safeName}.benchmark.json`, JSON.stringify(payload, null, 2), "application/json");
  };

  const statusLabel = tr("bench_status_" + benchmark.status) || benchmark.status;

  return (
    <>
      <div className="bench-head">
        <div>
          <div style={{fontSize:11.5, color:"var(--ink-mute)", fontFamily:"var(--font-mono)", textTransform:"uppercase", letterSpacing:"0.06em"}}>
            {statusLabel}
          </div>
          <h1>{benchmark.name || benchmark.id}</h1>
          <p>
            {benchmark.cohortName || benchmark.cohortId} · {taskIds.length} tasks × k={k} · {String(benchmark.createdAt || "").slice(0,10)}
          </p>
        </div>
        <div style={{marginLeft:"auto", display:"flex", gap: 8, alignItems: "center"}}>
          <button className="btn ghost" onClick={onBack}>← {tr("bench_title")}</button>
          <button className="btn" onClick={rerun}>{Ico.reset()} {tr("bench_rerun")}</button>
          <button className="btn" onClick={exportJson}>{Ico.download()} {tr("bench_export")}</button>
          <button className="btn ghost" onClick={del} style={{color:"var(--red, #b91c1c)"}}>{tr("bench_delete")}</button>
        </div>
      </div>

      <div className="bench-stats">
        <div className="kpi">
          <div className="kpi-l">{tr("bench_pass_at_k")}</div>
          <div className="kpi-v">{formatPct(metrics.pass_at_k)}</div>
        </div>
        <div className="kpi">
          <div className="kpi-l">{tr("bench_pass_hat_k")}</div>
          <div className="kpi-v">{formatPct(metrics.pass_hat_k)}</div>
        </div>
        <div className="kpi">
          <div className="kpi-l">{tr("bench_total_runs")}</div>
          <div className="kpi-v">{metrics.total_runs}</div>
        </div>
        <div className="kpi">
          <div className="kpi-l">Passes</div>
          <div className="kpi-v">{metrics.passes}</div>
        </div>
        <div className="kpi">
          <div className="kpi-l">{tr("bench_mean_reward")}</div>
          <div className="kpi-v">{metrics.mean_reward.toFixed(2)}</div>
        </div>
      </div>

      <div style={{border:"1px solid var(--line)", borderRadius: 6, overflow: "hidden"}}>
        <div className="bench-task-row" style={{background:"var(--bg-sunken)", fontSize: 11, fontFamily:"var(--font-mono)", color:"var(--ink-mute)", textTransform:"uppercase", letterSpacing:"0.05em"}}>
          <span>Task</span>
          <span>{tr("bench_pass_at_k")}</span>
          <span>{tr("bench_pass_hat_k")}</span>
          <span>Runs</span>
        </div>
        {taskIds.length === 0 && (
          <div style={{padding: "14px 12px", color:"var(--ink-mute)", fontSize: 12.5}}>No tasks in this benchmark.</div>
        )}
        {taskIds.map(tid => {
          const task = resolveTask(tid);
          const runs = (benchmark.runs || {})[tid] || [];
          const n = runs.length;
          const c = runs.filter(r => r && r.binary_pass).length;
          const chief = (task && task.chief) || tid;
          return (
            <div key={tid} className="bench-task-row">
              <div style={{minWidth: 0}}>
                <div style={{fontSize: 12.5, overflow:"hidden", textOverflow:"ellipsis", whiteSpace:"nowrap"}}>{chief}</div>
                <code className="mono">{tid}</code>
              </div>
              <div className="mono">{formatPct(passAtK(n, c, k))}</div>
              <div className="mono">{formatPct(passHatK(n, c, k))}</div>
              <div className="bench-run-grid" style={{maxWidth: Math.max(80, k * 22)}}>
                {Array.from({length: k}).map((_, r) => {
                  const run = runs[r];
                  let state = "pending";
                  let title = "no run";
                  if (run) {
                    state = run.binary_pass ? "pass" : "fail";
                    title = run.comments || (run.binary_pass ? "pass" : (run.error || "fail"));
                  }
                  return (
                    <span key={r} className="bench-run-chip" data-state={state}
                      title={`run ${r + 1}: ${title}`}
                      onClick={(e) => {
                        e.stopPropagation();
                        if (!run) return;
                        onOpenTranscript({
                          messages: run.messages || [],
                          taskId: tid,
                          runIdx: r,
                          reward: run,
                        });
                      }}
                    />
                  );
                })}
              </div>
            </div>
          );
        })}
      </div>
    </>
  );
}

// ---- transcript modal --------------------------------------------------------

function BenchTranscriptModal({ messages, taskId, runIdx, reward, onClose }) {
  React.useEffect(() => {
    const onKey = (e) => { if (e.key === "Escape") onClose(); };
    window.addEventListener("keydown", onKey);
    return () => window.removeEventListener("keydown", onKey);
  }, [onClose]);

  const msgs = Array.isArray(messages) ? messages : [];
  const pass = reward && reward.binary_pass;
  const rewardNum = reward && typeof reward.reward === "number" ? reward.reward : null;

  return (
    <div
      onClick={onClose}
      style={{position:"fixed", inset: 0, background:"rgba(0,0,0,.4)", zIndex: 60, display:"flex", alignItems:"center", justifyContent:"center"}}
    >
      <div
        onClick={e => e.stopPropagation()}
        style={{background:"var(--bg, #fff)", border:"1px solid var(--line)", borderRadius: 6, width: 760, maxWidth: "92vw", maxHeight: "85vh", display:"flex", flexDirection:"column"}}
      >
        <div style={{display:"flex", alignItems:"center", padding: "14px 18px", borderBottom: "1px solid var(--line)"}}>
          <div style={{flex: 1, minWidth: 0}}>
            <h3 style={{margin: 0, fontSize: 15}}>{`Transcript · run ${runIdx + 1}`}</h3>
            <div style={{fontSize: 11.5, color:"var(--ink-mute)", fontFamily:"var(--font-mono)", overflow:"hidden", textOverflow:"ellipsis", whiteSpace:"nowrap"}}>
              task <code>{taskId}</code>
              {rewardNum != null ? ` · reward ${rewardNum.toFixed(2)}` : ""}
              {reward && reward.binary_pass != null ? ` · ${pass ? "PASS" : "FAIL"}` : ""}
              {reward && reward.error ? ` · ${reward.error}` : ""}
            </div>
          </div>
          <button className="btn ghost" onClick={onClose} style={{padding: "3px 10px"}}>{Ico.x()}</button>
        </div>
        <div style={{flex: 1, overflowY: "auto", padding: "14px 18px"}}>
          {msgs.length === 0 ? (
            <div style={{color: "var(--ink-mute)", fontSize: 12.5, textAlign:"center", padding: 30}}>
              No messages recorded.
              {reward && reward.error && (
                <div style={{marginTop: 8, fontFamily:"var(--font-mono)", fontSize: 11.5}}>error: {reward.error}</div>
              )}
            </div>
          ) : msgs.map((m, i) => (
            <div key={i} style={{marginBottom: 12, paddingBottom: 10, borderBottom: "1px dashed var(--line)"}}>
              <div style={{fontSize: 11, color:"var(--ink-mute)", fontFamily:"var(--font-mono)", textTransform:"uppercase", letterSpacing:"0.05em", marginBottom: 4}}>
                {m.role}{m.name ? ` · ${m.name}` : ""}{m.t ? ` · ${m.t}` : ""}
              </div>
              {m.role === "tool" ? (
                <div style={{fontFamily:"var(--font-mono)", fontSize: 11.5, background:"var(--bg-sunken)", padding:"8px 12px", borderRadius: 4, color:"var(--ink-dim)"}}>
                  <div>
                    <span style={{color:"var(--ink-mute)"}}>args </span>
                    {JSON.stringify(m.args || {})}
                  </div>
                  <div style={{marginTop: 3}}>
                    <span style={{color:"var(--ink-mute)"}}>ret </span>
                    {JSON.stringify(m.returns)}
                  </div>
                </div>
              ) : (
                <div style={{fontSize: 13, lineHeight: 1.45, whiteSpace: "pre-wrap"}}>{m.text}</div>
              )}
              {m.toolCalls && m.toolCalls.length > 0 && (
                <div style={{fontSize: 11, color:"var(--ink-mute)", fontFamily:"var(--font-mono)", marginTop: 4}}>
                  tool_calls: {m.toolCalls.map(tc => tc.name || tc.function?.name).filter(Boolean).join(", ")}
                </div>
              )}
            </div>
          ))}
        </div>
        {reward && reward.comments && (
          <div style={{padding: "10px 18px", borderTop: "1px solid var(--line)", fontSize: 11.5, color:"var(--ink-dim)", fontFamily:"var(--font-mono)"}}>
            {reward.comments}
          </div>
        )}
      </div>
    </div>
  );
}

Object.assign(window, { Benchmark });
