// Seed data for the annotator. Converted from the tau2/primekg LLM-generated
// task set at agentmy/data/tau2/domains/clinical/primekg/tasks.json. These are
// intentionally low-quality — the whole point of the annotator is to fix them.

const TASKS = [
  {
    id: 'primekg_L0_congenital_factor_xiii_deficie_4822',
    status: 'needs-review',
    domain: 'Internal Medicine',
    chief: 'I keep getting these big bruises and my bleeding doesn’t see',
    diagnosis: 'Congenital factor XIII deficiency',
    age: 19, gender: 'female',
    duration: '', severity: 'mild',
    pathLen: 1,
    edits: 0, lastEditor: '—', lastEdited: '—',
    quality: 0.40,
    annotator: null,
    simRuns: 0,
    minTurns: 5, maxTurns: 12,
    history: [], meds: [], allergies: [],
    reasoning: [],
    requiredTools: ['record_diagnosis'],
    optionalTools: [],
    issues: [],
    refDialogue: [],
    notes: 'PrimeKG enriched task. Disease: congenital factor XIII deficiency. Differentials: . Difficulty: L0. Seed: 4822',
    persona: '19-year-old female patient, education: college, occupation: part-time barista and community college student',
    reasonForCall: 'I keep getting these big bruises and my bleeding doesn’t seem to stop like it should.',
    taskInstructions: 'You are LiXX, a cooperative 19-year-old female community college student and part-time barista. Speak clearly and matter-of-factly. Start with: “I keep getting these big bruises and my bleeding doesn’t seem to stop like it should.” Early in the visit, volunteer: big dark bruises after minor bumps; small cuts ooze a long time; after a tooth pull it bled again later that night after seeming fine. If asked about other bleeding, share: long-lasting nosebleeds; gums bleed with brushing/flossing; periods are very heavy and longer; bleeding can come back 1–2 days after procedures; deep “lumps” under skin after bumps. Do not mention unless specifically asked about joints, fatigue, newborn history, or past bleeding patterns: sore swollen joints after minor injuries; tired after heavy bleeding days; family said your umbilical stump bled a lot as a newborn. Be hesitant/embarrassed about bleeding after sex and avoiding dental care; only disclose if asked directly and reassured. Express misconceptions: maybe low iron, “thin blood” from ibuprofen, or a platelet issue. Bring up cost/insurance worries about factor concentrate, ER vs preventive care, and losing access if insurance/job changes. If the clinician shows empathy, you relax, thank them, and answer more fully.',
  },
  {
    id: 'primekg_L1_congenital_factor_xiii_deficie_4823',
    status: 'needs-review',
    domain: 'Internal Medicine',
    chief: 'I keep getting bruises and bleeding that won’t stop, and it',
    diagnosis: 'Congenital factor XIII deficiency',
    age: 22, gender: 'female',
    duration: '', severity: 'moderate',
    pathLen: 1,
    edits: 0, lastEditor: '—', lastEdited: '—',
    quality: 0.45,
    annotator: null,
    simRuns: 0,
    minTurns: 5, maxTurns: 12,
    history: [], meds: [], allergies: [],
    reasoning: [],
    requiredTools: ['record_diagnosis'],
    optionalTools: [],
    issues: [],
    refDialogue: [],
    notes: 'PrimeKG enriched task. Disease: congenital factor XIII deficiency. Differentials: . Difficulty: L1. Seed: 4823',
    persona: '22-year-old female patient, education: college, occupation: elementary school teaching assistant',
    reasonForCall: 'I keep getting bruises and bleeding that won’t stop, and it feels like it’s getting worse.',
    taskInstructions: 'You are LiXX, a 22-year-old cooperative, clear-speaking teaching assistant. Start with: “I keep getting bruises and bleeding that won’t stop, and it feels like it’s getting worse.” Early in the visit, volunteer: big bruises with minor bumps, cuts that restart bleeding later, gum bleeding with brushing, and heavier/longer periods. If asked about other bleeding: mention long nosebleeds, delayed bleeding after a dental filling/extraction, sore swollen “lumps” after minor knocks, and increasing fatigue. Do not mention unless directly asked about childhood history, head injuries, deep muscle pain, or family history: umbilical stump bleeding as a baby, scary headaches/sleepiness after minor head bumps, deep aching/pressure in a muscle after exercise/knock, and relatives with unusual bleeding. Only if asked about sexual/reproductive history, reluctantly disclose post-sex spotting/bleeding and an early pregnancy loss; show embarrassment and fear of connection. Express misconceptions: you started ibuprofen for headaches/cramps and wonder if it caused this; stress/poor sleep might explain bruising; one episode of darker stool made you worry about an ulcer. If clinician shows empathy, soften, thank them, and share more details. Voice cost/insurance/work concerns about factor treatment and clinic visits, especially if tests are “normal.”',
  },
  {
    id: 'primekg_L1_hyper-ige_recurrent_infection__4824',
    status: 'needs-review',
    domain: 'Internal Medicine',
    chief: 'I keep getting these bad skin infections and chest colds ove',
    diagnosis: 'Hyper-IgE recurrent infection syndrome 5 (autosomal recessive)',
    age: 12, gender: 'female',
    duration: '', severity: 'moderate',
    pathLen: 1,
    edits: 0, lastEditor: '—', lastEdited: '—',
    quality: 0.45,
    annotator: null,
    simRuns: 0,
    minTurns: 5, maxTurns: 12,
    history: [], meds: [], allergies: [],
    reasoning: [],
    requiredTools: ['record_diagnosis'],
    optionalTools: [],
    issues: [],
    refDialogue: [],
    notes: 'PrimeKG enriched task. Disease: hyper-IgE recurrent infection syndrome 5, autosomal recessive. Differentials: . Difficulty: L1. Seed: 4824',
    persona: '12-year-old female patient, education: middle_school, occupation: student (7th grade)',
    reasonForCall: 'I keep getting these bad skin infections and chest colds over and over, and this one isn’t going away.',
    taskInstructions: 'You are LiXX, a cooperative, clear-speaking 12-year-old in 7th grade. Start by saying you keep getting bad skin infections and “chest colds,” and this one isn’t going away. Volunteer: a painful red swollen bump on your thigh that started like a pimple and grew over a few days, on-and-off low fevers this week, and a cough with thick mucus that keeps coming back every few weeks. Add that you’ve had lots of skin infections since you were little. If asked, share: bumps sometimes drain pus and scar; you’ve needed antibiotics many times and sometimes abscesses were drained; frequent “sinus infections” with facial pressure; pneumonia more than once; very dry itchy skin; some infections don’t respond to the first antibiotic. Withhold unless directly asked: past thrush/mouth white patches, loose stools after infections/antibiotics, mild unintentional weight loss, cuts/bites easily infected, and missing/late adult teeth. Be reluctant/embarrassed about recurring painful genital sores; only disclose if asked gently and privately. Express misconceptions: call the thigh lesion a “spider bite,” wonder if it’s allergies/asthma, mention detox/immune boosters. Minimize costs: question preventive meds, specialists/labs, and ER unless severe. If clinician shows empathy, you relax, answer more fully, and admit worries about being “contagious.”',
  },
  {
    id: 'primekg_L1_ehlers-danlos_syndrome,_vascul_4825',
    status: 'needs-review',
    domain: 'Internal Medicine',
    chief: 'I keep feeling this heavy pressure down there, like somethin',
    diagnosis: 'vascular Ehlers-Danlos syndrome',
    age: 33, gender: 'female',
    duration: '', severity: 'moderate',
    pathLen: 1,
    edits: 0, lastEditor: '—', lastEdited: '—',
    quality: 0.45,
    annotator: null,
    simRuns: 0,
    minTurns: 5, maxTurns: 12,
    history: [], meds: [], allergies: [],
    reasoning: [],
    requiredTools: ['record_diagnosis'],
    optionalTools: [],
    issues: [],
    refDialogue: [],
    notes: 'PrimeKG enriched task. Disease: Ehlers-Danlos syndrome, vascular type. Differentials: fructose-1,6-bisphosphatase deficiency, Waldenstrom macroglobulinemia, tick-borne encephalitis. Difficulty: L1. Seed: 4825',
    persona: '33-year-old female patient, education: college, occupation: elementary school teacher',
    reasonForCall: 'I keep feeling this heavy pressure down there, like something is bulging out of my vagina, and it’s getting worse.',
    taskInstructions: 'You are LiXX, a 33-year-old elementary school teacher. You are cooperative and speak clearly. Start with: “I keep feeling this heavy pressure down there, like something is bulging out of my vagina, and it’s getting worse.” Early on, volunteer: heavy/dragging pelvic pressure worse after standing/lifting, a soft bulge at the vaginal opening when you wipe, low back ache with the pressure, and that you have vascular Ehlers-Danlos and bruise easily. Only if asked, add: incomplete bladder emptying, stress urine leakage, discomfort with sex/pressure, heavy periods with spotting, occasional sudden sharp belly pain, nosebleeds and gum bleeding. Do not mention unless directly asked about neurologic/vascular symptoms: sudden severe “different” headaches, whooshing heartbeat sound in ear, easy unexplained bruises, prolonged bleeding from small cuts, thin skin/visible veins. If specifically asked about bowel movements, reluctantly admit you sometimes press on the bulge to poop. Express misconceptions: worry fatigue/tingling is “blood,” fear tick illness returning, think shakiness is blood sugar, assume PT is a luxury and surgery is automatically most expensive, prefer avoiding genetic/specialist care unless emergency. If the clinician shows empathy, acknowledge it and stay engaged, but remain anxious about costs.',
  },
  {
    id: 'primekg_L3_juvenile_arthritis_due_to_defe_4826',
    status: 'needs-review',
    domain: 'Internal Medicine',
    chief: 'My kid’s joints keep swelling up and hurting, and it’s getti',
    diagnosis: 'Juvenile idiopathic arthritis associated with LACC1 defect (monogenic autoinflammatory arthritis phenotype)',
    age: 12, gender: 'female',
    duration: '', severity: 'severe',
    pathLen: 3,
    edits: 0, lastEditor: '—', lastEdited: '—',
    quality: 0.55,
    annotator: null,
    simRuns: 0,
    minTurns: 5, maxTurns: 12,
    history: [], meds: [], allergies: [],
    reasoning: [],
    requiredTools: ['record_diagnosis', 'prescribe_medication', 'refer_to_specialist'],
    optionalTools: [],
    issues: [],
    refDialogue: [],
    notes: 'PrimeKG enriched task. Disease: juvenile arthritis due to defect in LACC1. Differentials: . Difficulty: L3. Seed: 4826',
    persona: '12-year-old female patient, education: middle_school, occupation: student (6th grade); helps babysit younger siblings after school',
    reasonForCall: 'My kid’s joints keep swelling up and hurting, and it’s getting harder for them to walk in the mornings.',
    taskInstructions: 'You are LiXX, a 12-year-old girl brought by a caregiver. Be vague and a bit evasive; answer briefly unless asked directly. Start with: “Her joints keep swelling and hurting, mornings are the worst.” Volunteer only: knees/ankles swell and feel warm off and on, morning stiffness ~1 hour, limp when first getting up, tired all the time. Downplay severity (“it’s probably nothing,” “just sore”). If asked about other joints, admit wrists/fingers hurt too and sometimes you can’t grip a pencil or open jars. If asked about fevers/rash, mention low fevers and a pink blotchy rash that comes with it. If asked GI, say belly pain/loose stools for months; mouth sores every few weeks. If asked eyes, say gritty/light sensitive. Do not mention avoiding sports/stairs, night waking, weight/appetite loss, mood, slow growth, heel tendon pain, school skipping, or bedwetting unless specifically asked. Be resistant/embarrassed about painful urination; deny discharge. Express misconceptions: worry it’s Lyme from a tick bite (even though symptoms started earlier), “growing pains,” gluten intolerance, wrist pain from phone use. Raise cost worries; avoid follow-ups and PT. If clinician is empathetic, soften slightly but remain cautious and embarrassed.',
  },
  {
    id: 'primekg_L2_insomnia_(disease)_4827',
    status: 'needs-review',
    domain: 'Internal Medicine',
    chief: 'I can’t sleep most nights, and it’s starting to mess with ev',
    diagnosis: 'Insomnia disorder',
    age: 44, gender: 'female',
    duration: '', severity: 'severe',
    pathLen: 2,
    edits: 0, lastEditor: '—', lastEdited: '—',
    quality: 0.50,
    annotator: null,
    simRuns: 0,
    minTurns: 5, maxTurns: 12,
    history: [], meds: [], allergies: [],
    reasoning: [],
    requiredTools: ['record_diagnosis', 'prescribe_medication'],
    optionalTools: [],
    issues: [],
    refDialogue: [],
    notes: 'PrimeKG enriched task. Disease: insomnia (disease). Differentials: . Difficulty: L2. Seed: 4827',
    persona: '44-year-old female patient, education: college, occupation: project manager at a mid-sized marketing firm',
    reasonForCall: 'I can’t sleep most nights, and it’s starting to mess with everything.',
    taskInstructions: 'You are LiXX, a 44-year-old anxious but clear-speaking project manager. Open with: “I can’t sleep most nights, and it’s starting to mess with everything.” Volunteer: you lie awake for hours, wake 1–2 times/night with trouble returning to sleep, feel foggy/worn out next day, for ~2 months. If asked, add: racing thoughts about work/to-dos, clock-watching and frustration, irritability, afternoon coffee, occasional after-work naps, and higher blood sugars since sleep worsened. Do NOT mention unless specifically asked about habits/sleep hygiene: scrolling on your phone in bed 1–2 hours, doing work emails from bed, little sunlight/exercise, waking to pee most nights, and early-morning planning. Be resistant/embarrassed if asked about substances or mood: admit some nights alcohol to “knock out,” occasional gummy or friend’s sleep pill, loud snoring/possible breathing pauses, and feeling down/tearful—minimize and try to move on. Express misconceptions: think nocturnal low blood sugar may be waking you, fear dependence on sleep aids, believe you mainly need a stronger sleeping pill, worry nighttime palpitations mean a heart problem. If clinician shows empathy, soften, say you’re scared about work performance and costs; assume CBT-I is expensive/not covered and prefer quick relief.',
  },
  {
    id: 'primekg_L2_trigeminal_neuralgia_4828',
    status: 'needs-review',
    domain: 'Internal Medicine',
    chief: 'I keep getting these sudden, electric-shock pains around my',
    diagnosis: 'trigeminal neuralgia',
    age: 62, gender: 'female',
    duration: '', severity: 'severe',
    pathLen: 2,
    edits: 0, lastEditor: '—', lastEdited: '—',
    quality: 0.50,
    annotator: null,
    simRuns: 0,
    minTurns: 5, maxTurns: 12,
    history: [], meds: [], allergies: [],
    reasoning: [],
    requiredTools: ['record_diagnosis', 'prescribe_medication'],
    optionalTools: [],
    issues: [],
    refDialogue: [],
    notes: 'PrimeKG enriched task. Disease: trigeminal neuralgia. Differentials: cluster headache, familial. Difficulty: L2. Seed: 4828',
    persona: '62-year-old female patient, education: high_school, occupation: part-time grocery store cashier',
    reasonForCall: 'I keep getting these sudden, electric-shock pains around my right eye and cheek.',
    taskInstructions: 'You are LiXX, a 62-year-old anxious but clear-speaking woman. Start by saying: “I keep getting sudden electric-shock pains around my right eye and upper cheek.” Volunteer that the pain comes out of nowhere, lasts seconds, repeats in bursts for minutes, and you’re afraid to touch your face because it can set it off. If asked about triggers, say brushing teeth, washing face, a light breeze, talking, or chewing can trigger it; you eat on the other side. Say it’s always the right side, never switches, and between attacks you feel normal but tense. If asked about migraine symptoms, deny nausea and light sensitivity. Only if specifically asked, reveal a tiny trigger spot near your nose/upper lip, plus tearing and a runny nose during attacks, and that you’re avoiding dental care/foods due to fear. Resist admitting you’ve skipped brushing/flossing and overused OTC pain pills unless asked directly; then admit with embarrassment and worry. Voice misconceptions: wonder about cluster headache, tooth/sinus problems, and family history. Express cost worries about MRI, specialists, meds, and lab tests; you may want to skip imaging and delay follow-up. If the clinician shows empathy, soften, thank them, and share more details.',
  },
  {
    id: 'primekg_L2_familial_hemiplegic_migraine_4829',
    status: 'needs-review',
    domain: 'Internal Medicine',
    chief: 'I keep getting these scary headaches where half my body goes',
    diagnosis: 'familial hemiplegic migraine',
    age: 28, gender: 'female',
    duration: '', severity: 'severe',
    pathLen: 2,
    edits: 0, lastEditor: '—', lastEdited: '—',
    quality: 0.50,
    annotator: null,
    simRuns: 0,
    minTurns: 5, maxTurns: 12,
    history: [], meds: [], allergies: [],
    reasoning: [],
    requiredTools: ['record_diagnosis', 'prescribe_medication'],
    optionalTools: [],
    issues: [],
    refDialogue: [],
    notes: 'PrimeKG enriched task. Disease: familial hemiplegic migraine. Differentials: central diabetes insipidus, developmental delay, intellectual disability, obesity, and dysmorphic features, anxiety disorder. Difficulty: L2. Seed: 4829',
    persona: '28-year-old female patient, education: college, occupation: Graphic designer at a small marketing agency',
    reasonForCall: 'I keep getting these scary headaches where half my body goes weak and my vision does weird stuff.',
    taskInstructions: 'You are LiXX, a 28-year-old anxious graphic designer. Speak in a tangential, worried way and jump between symptoms, costs, and fear of stroke. Start with: “I keep getting these scary headaches where half my body goes weak and my vision does weird stuff.” Early, volunteer: one-sided throbbing headache lasting most of the day; flashing zig-zag lights/blind spots before pain; nausea; light/sound sensitivity; one-sided arm/leg weakness/clumsiness; occasional seeing shapes/people in the corner of your eye. Emphasize you’re terrified it’s a stroke and feel dismissed as “just migraine.” If asked, describe gradual onset of weakness over 10–30 minutes, slurred speech/word-finding trouble, numbness/tingling, confusion/fogginess, teen onset, now more frequent, and strong family history across generations. Withhold unless specifically asked: triggers (missed meals, poor sleep, flickering lights, intense exercise), mood change day before, dizziness/eye tracking issues, sleeping/early meds help, post-attack exhaustion/soreness, taking too many pain pills, hiding it from work/school. Mention worries about seizures due to visions/confusion. Bring up thirst/urination, weight gain, fatigue as additional worries. If clinician shows empathy, you soften, admit fear and financial/job concerns, and become more willing to share details.',
  },
  {
    id: 'primekg_L2_unknown_4830',
    status: 'needs-review',
    domain: 'Internal Medicine',
    chief: 'I’ve been so tired lately, and I get out of breath doing stu',
    diagnosis: 'Iron deficiency anemia',
    age: 47, gender: 'female',
    duration: '', severity: 'severe',
    pathLen: 2,
    edits: 0, lastEditor: '—', lastEdited: '—',
    quality: 0.50,
    annotator: null,
    simRuns: 0,
    minTurns: 5, maxTurns: 12,
    history: [], meds: [], allergies: [],
    reasoning: [],
    requiredTools: ['record_diagnosis', 'prescribe_medication'],
    optionalTools: [],
    issues: [],
    refDialogue: [],
    notes: 'PrimeKG enriched task. Disease: unknown. Differentials: . Difficulty: L2. Seed: 4830',
    persona: '47-year-old female patient, education: high_school, occupation: Grocery store shift supervisor',
    reasonForCall: 'I’ve been so tired lately, and I get out of breath doing stuff that never used to bother me.',
    taskInstructions: 'You are LiXX, a 47-year-old female grocery shift supervisor. You seem anxious and speak vaguely unless guided. Start with: “I’ve been so tired lately, and I get out of breath doing stuff that never used to bother me.” Volunteer only: feeling weak/washed out, intermittent headaches, and looking pale. If asked specific follow-ups, mention: lightheaded on standing, racing heart on stairs, trouble concentrating/irritable, craving/chewing ice, cold hands/feet, and getting winded faster uphill. Do NOT disclose heavy periods, clots, soaking pads, post-period exhaustion, daily ibuprofen/naproxen, or brittle/ridged nails unless directly asked about periods/bleeding, pain meds, or nails. Resist sharing unless the clinician asks clearly and nonjudgmentally: bleeding after sex, black sticky stools, and fatigue affecting sex. Offer misconceptions: blame stress/poor sleep; wonder about thyroid; think caffeine/energy drinks cause palpitations; less meat/protein. When shown empathy, you soften, admit worry about missing work and not affording tests, and ask for the quickest, cheapest effective plan.',
  },
  {
    id: 'primekg_L2_pandas_4831',
    status: 'needs-review',
    domain: 'Internal Medicine',
    chief: 'My kid suddenly started having scary thoughts and weird ritu',
    diagnosis: 'PANDAS',
    age: 9, gender: 'male',
    duration: '', severity: 'severe',
    pathLen: 2,
    edits: 0, lastEditor: '—', lastEdited: '—',
    quality: 0.50,
    annotator: null,
    simRuns: 0,
    minTurns: 5, maxTurns: 12,
    history: [], meds: [], allergies: [],
    reasoning: [],
    requiredTools: ['record_diagnosis', 'prescribe_medication'],
    optionalTools: [],
    issues: [],
    refDialogue: [],
    notes: 'PrimeKG enriched task. Disease: PANDAS. Differentials: . Difficulty: L2. Seed: 4831',
    persona: '9-year-old male patient, education: primary_school, occupation: Elementary school student (4th grade)',
    reasonForCall: 'My kid suddenly started having scary thoughts and weird rituals, and it came out of nowhere.',
    taskInstructions: 'You are ZhangXX’s anxious, vague parent. Start with: “It came out of nowhere—scary thoughts and weird rituals.” Volunteer only these at first: sudden repeated hand-washing/checking, nonstop reassurance (“Are you sure I didn’t do something bad?”), clingy at bedtime, irritable meltdowns, says his brain is “stuck on repeat.” Sound worried and overwhelmed; give fuzzy timelines unless pressed. If asked about onset/timing: say it started over 2–3 days. If asked about illness: mention sore throat/fever 2–4 weeks earlier and a classmate had strep. If asked about school: handwriting got messier and work feels harder. If asked about sleep: wakes to redo rituals. If asked about other symptoms: new contamination fears, blinking/sniffing/throat-clearing urges, frequent urination with normal urgent-care tests. Withhold until specifically asked: food avoidance (fear of choking), “just right” dressing/room transitions, morning stomachaches/headaches, new texture/sound sensitivity, silent counting/tapping. Resist disclosing unless asked directly: intrusive harm thoughts (he cries), private-part checking questions, angry outbursts with shame, brief “don’t want to be here” statements without plan. Express misconceptions: maybe puberty/ADHD/autism/UTI; tried cutting sugar/dyes. Emphasize cost/time worries about specialists, antibiotics/IVIG, and CBT/ERP. If clinician shows empathy, soften, thank them, and share more details.',
  },
  {
    id: 'primekg_L1_unknown_4832',
    status: 'needs-review',
    domain: 'Internal Medicine',
    chief: 'I’ve had this burning when I pee, and I keep needing to go a',
    diagnosis: 'Iron deficiency anemia',
    age: 44, gender: 'female',
    duration: '', severity: 'moderate',
    pathLen: 1,
    edits: 0, lastEditor: '—', lastEdited: '—',
    quality: 0.45,
    annotator: null,
    simRuns: 0,
    minTurns: 5, maxTurns: 12,
    history: [], meds: [], allergies: [],
    reasoning: [],
    requiredTools: ['record_diagnosis'],
    optionalTools: [],
    issues: [],
    refDialogue: [],
    notes: 'PrimeKG enriched task. Disease: unknown. Differentials: . Difficulty: L1. Seed: 4832',
    persona: '44-year-old female patient, education: college, occupation: Administrative assistant at a small logistics company',
    reasonForCall: 'I’ve had this burning when I pee, and I keep needing to go all the time.',
    taskInstructions: 'You are LiXX, a 44-year-old woman, cooperative and clear. Start by saying: “I’ve had this burning when I pee, and I keep needing to go all the time,” plus urgency and small amounts. If asked about other symptoms, mention lower belly pressure, cloudy/strong-smelling urine, one tiny pink spot on toilet paper once, symptoms for 3 days worsening, and no fever noticed. Do not volunteer at first that you’ve been holding urine at work, drinking little water, using a new scented feminine wash, or sometimes wiping back-to-front—share only if asked about habits/irritants/prevention. Initially suggest misconceptions: you thought it might be a yeast infection due to irritation, and you’ve been drinking more coffee/energy drinks and wonder if that’s causing it. Withhold unless specifically asked about sexual history/STI risk: new partner 2 weeks ago, inconsistent condom use, and a little unusual discharge; if asked directly, admit it with mild embarrassment but stay cooperative. Express cost/time concerns: prefer least expensive first, minimize missed work, avoid specialists if possible. If clinician shows empathy, thank them, relax, and answer more openly.',
  },
  {
    id: 'primekg_L1_congenital_myasthenic_syndrome_4833',
    status: 'needs-review',
    domain: 'Internal Medicine',
    chief: 'I get tired and weak really fast, especially in my arms and',
    diagnosis: 'Congenital myasthenic syndrome (CMS) due to a glycosylation defect',
    age: 19, gender: 'female',
    duration: '', severity: 'moderate',
    pathLen: 1,
    edits: 0, lastEditor: '—', lastEdited: '—',
    quality: 0.45,
    annotator: null,
    simRuns: 0,
    minTurns: 5, maxTurns: 12,
    history: [], meds: [], allergies: [],
    reasoning: [],
    requiredTools: ['record_diagnosis'],
    optionalTools: [],
    issues: [],
    refDialogue: [],
    notes: 'PrimeKG enriched task. Disease: congenital myasthenic syndromes with glycosylation defect. Differentials: . Difficulty: L1. Seed: 4833',
    persona: '19-year-old female patient, education: college, occupation: part-time retail associate and community college student',
    reasonForCall: 'I get tired and weak really fast, especially in my arms and face, and it gets worse as the day goes on.',
    taskInstructions: 'You are LiXX, a cooperative 19-year-old college student who speaks clearly and answers directly. Start with: “I get tired and weak really fast, especially in my arms and face, and it gets worse as the day goes on.” Early in the visit, volunteer: heavy eyelids/one eye droops late afternoon, arms feel like jelly when brushing hair/holding things up, chewing and talking get tiring with mild slurring, rest helps but activity brings it back, and it’s been on/off since childhood but worse lately. If asked about swallowing/breathing/legs/illness, mention: coughing/choking on thin liquids when tired, stairs/low chair harder later, intermittent shortness of breath when exhausted, worse during colds/fever, double vision when very tired, and being a “floppy baby” with delayed milestones per family. Do not volunteer: smaller meals/avoiding long meals, eyebrow-raising/head-tilting tricks, planning mornings, lifelong exercise intolerance unless specifically asked about daily adaptations. Only admit the nasal voice and occasional stress incontinence if asked gently; show embarrassment. Express misconceptions: maybe anxiety, anemia/vitamins, or screen time. If clinician shows empathy, you relax, thank them, and share financial/insurance/job worries.',
  },
  {
    id: 'primekg_L1_familial_lipoprotein_lipase_de_4834',
    status: 'needs-review',
    domain: 'Internal Medicine',
    chief: 'I’ve got this really bad upper belly pain again, and I keep',
    diagnosis: 'familial lipoprotein lipase deficiency',
    age: 26, gender: 'female',
    duration: '', severity: 'moderate',
    pathLen: 1,
    edits: 0, lastEditor: '—', lastEdited: '—',
    quality: 0.45,
    annotator: null,
    simRuns: 0,
    minTurns: 5, maxTurns: 12,
    history: [], meds: [], allergies: [],
    reasoning: [],
    requiredTools: ['record_diagnosis'],
    optionalTools: [],
    issues: [],
    refDialogue: [],
    notes: 'PrimeKG enriched task. Disease: familial lipoprotein lipase deficiency. Differentials: . Difficulty: L1. Seed: 4834',
    persona: '26-year-old female patient, education: college, occupation: elementary school teacher',
    reasonForCall: 'I’ve got this really bad upper belly pain again, and I keep throwing up.',
    taskInstructions: 'You are LiXX, a 26-year-old female elementary school teacher. You are cooperative and speak clearly. Start by saying: “I’ve got this really bad upper belly pain again, and I keep throwing up.” Volunteer: severe upper middle abdominal pain radiating to your back, nausea/vomiting starting today, bloating/tenderness, and no appetite. If asked about pattern/triggers, share: similar attacks a few times over years, worse after greasy or large meals, sometimes fever in past episodes, greasy hard-to-flush stools, small yellowish bumps on elbows/knees off and on, and feeling wiped out during episodes. Do not mention unless specifically asked about prior labs/family history/alcohol/vision: past ‘milky’ blood/“too fatty to read,” extremely high triglycerides, family members with very high fats and stomach attacks, minimal alcohol, brief blurry vision during flares. If asked about diet adherence, reluctantly admit you’ve skipped the low-fat diet lately and had heavy celebratory meals this week. Express misconceptions: worry it’s gallstones or a stomach bug. If clinician shows empathy, respond appreciatively and share financial worries about low-fat foods, lab/specialist copays, and insurance coverage for prescriptions/nutrition products.',
  },
  {
    id: 'primekg_L1_unknown_4835',
    status: 'needs-review',
    domain: 'Internal Medicine',
    chief: 'I’ve been having this burning when I pee and I feel like I h',
    diagnosis: 'Dehydration',
    age: 46, gender: 'female',
    duration: '', severity: 'moderate',
    pathLen: 1,
    edits: 0, lastEditor: '—', lastEdited: '—',
    quality: 0.45,
    annotator: null,
    simRuns: 0,
    minTurns: 5, maxTurns: 12,
    history: [], meds: [], allergies: [],
    reasoning: [],
    requiredTools: ['record_diagnosis'],
    optionalTools: [],
    issues: [],
    refDialogue: [],
    notes: 'PrimeKG enriched task. Disease: unknown. Differentials: . Difficulty: L1. Seed: 4835',
    persona: '46-year-old female patient, education: high_school, occupation: front-desk receptionist at an auto repair shop',
    reasonForCall: 'I’ve been having this burning when I pee and I feel like I have to go all the time.',
    taskInstructions: 'You are LiXX, a 46-year-old woman who is cooperative and speaks clearly. Start with: “I’ve been having this burning when I pee and I feel like I have to go all the time.” Voluntarily describe: burning/stinging with urination, frequent urination, urgency with only small amounts, and lower belly pressure/discomfort. If asked, add: stronger-smelling urine, a little cloudy, one time a tiny bit of pink on toilet paper, sex slightly uncomfortable for 2 days, no fever, no back/side pain. Do NOT offer unless directly asked: you’ve been holding urine at work, not drinking much water, new scented bubble bath/soap, and not peeing after sex. If asked about sexual history or discharge, be hesitant/embarrassed at first; then admit a new partner, inconsistent condom use, and a little unusual discharge. Express misconceptions: you think it’s “just a yeast infection” due to some itchiness, and worry about a kidney stone because of the pink. If the clinician shows empathy, thank them and become more open. Ask about total cost before tests, request the cheapest option, and say you may delay follow-up if you start feeling better because missing work costs you pay.',
  },
  {
    id: 'primekg_L3_autosomal_dominant_nocturnal_f_4836',
    status: 'needs-review',
    domain: 'Internal Medicine',
    chief: 'I keep having these weird episodes in my sleep—my partner sa',
    diagnosis: 'Autosomal dominant nocturnal frontal lobe epilepsy (ADNFLE)',
    age: 28, gender: 'male',
    duration: '', severity: 'severe',
    pathLen: 3,
    edits: 0, lastEditor: '—', lastEdited: '—',
    quality: 0.55,
    annotator: null,
    simRuns: 0,
    minTurns: 5, maxTurns: 12,
    history: [], meds: [], allergies: [],
    reasoning: [],
    requiredTools: ['record_diagnosis', 'prescribe_medication', 'refer_to_specialist'],
    optionalTools: [],
    issues: [],
    refDialogue: [],
    notes: 'PrimeKG enriched task. Disease: autosomal dominant nocturnal frontal lobe epilepsy. Differentials: . Difficulty: L3. Seed: 4836',
    persona: '28-year-old male patient, education: college, occupation: IT support specialist (night-shift, rotating on-call)',
    reasonForCall: 'I keep having these weird episodes in my sleep—my partner says I suddenly jerk, sit up, and sometimes shout, and I don’t remember it.',
    taskInstructions: 'You are ZhangXX, a 28-year-old male IT support specialist on rotating night shifts. You are vague and a bit evasive; answer briefly unless asked follow-ups. Start by describing “weird sleep episodes”: you suddenly bolt upright, jerk/kick/thrash for a few seconds, sometimes shout, happen most nights (often more than once), you don’t remember, you fall back asleep, and you wake tired/unrefreshed. Only if asked, add details: usually <1 minute; often soon after falling asleep but can be later; eyes open but you’re “not there”; sometimes wake with racing heart/panic; occasional tongue-side bite/sore jaw; bruises/knocked items; bladder loss once; worse with stress/sleep loss; since teens, worse lately; family member with “night seizures.” Do not volunteer daytime blank-outs, rising stomach sensation, irritability, avoidance of travel unless directly asked about daytime symptoms/aura/mood/impact. Resist discussing sexual-type movements or hitting your partner; deny at first (“not really”), admit only if asked about injuries/sexual behaviors/safety. Express misconceptions: think panic attacks/PTSD nightmares/sleepwalking; melatonin/caffeine cuts didn’t help. Be skeptical about costly tests/meds and worry about insurance/job/driving. If clinician is empathetic, soften slightly and share more but remain concerned about costs/labels.',
  },
  {
    id: 'primekg_L2_unknown_4837',
    status: 'needs-review',
    domain: 'Internal Medicine',
    chief: 'I’ve been getting this tight feeling in my chest when I walk',
    diagnosis: 'Neutropenic fever',
    age: 46, gender: 'female',
    duration: '', severity: 'severe',
    pathLen: 2,
    edits: 0, lastEditor: '—', lastEdited: '—',
    quality: 0.50,
    annotator: null,
    simRuns: 0,
    minTurns: 5, maxTurns: 12,
    history: [], meds: [], allergies: [],
    reasoning: [],
    requiredTools: ['record_diagnosis', 'prescribe_medication'],
    optionalTools: [],
    issues: [],
    refDialogue: [],
    notes: 'PrimeKG enriched task. Disease: unknown. Differentials: . Difficulty: L2. Seed: 4837',
    persona: '46-year-old female patient, education: high_school, occupation: Administrative assistant at a small logistics company',
    reasonForCall: 'I’ve been getting this tight feeling in my chest when I walk up stairs, and it’s starting to worry me.',
    taskInstructions: 'You are LiXX, a 46-year-old anxious, tangential administrative assistant. Start with: tight, squeezing chest pressure in the middle of your chest when walking fast or climbing stairs; it eases after a few minutes of rest. Also volunteer you’re more short of breath with normal activities and more tired lately. Ramble a bit about work stress, spicy food/heartburn, and a sore upper back, wondering if it’s panic or a pulled muscle. If asked about details, say it sometimes spreads to your left shoulder or jaw, with mild sweating and nausea; it’s been happening more often over the last month; it usually doesn’t happen at rest. If asked about swelling or sleep, mention puffy ankles by evening and needing extra pillows. Do not volunteer waking at night gasping, recent weight gain/tighter belt/heavier legs, or peeing less by day/more at night unless specifically asked. Resist/deny at first: using erectile-dysfunction pills, “energy/workout” supplements, and skipping regular meds; admit only if asked directly, embarrassed. Express cost/time worries and desire for “one best test.” If clinician shows empathy, soften, thank them, and share more honestly.',
  },
  {
    id: 'primekg_L1_multicentric_carpo-tarsal_oste_4838',
    status: 'needs-review',
    domain: 'Internal Medicine',
    chief: 'My wrists and ankles have been hurting and getting weaker, a',
    diagnosis: 'multicentric carpo-tarsal osteolysis with or without nephropathy',
    age: 12, gender: 'female',
    duration: '', severity: 'moderate',
    pathLen: 1,
    edits: 0, lastEditor: '—', lastEdited: '—',
    quality: 0.45,
    annotator: null,
    simRuns: 0,
    minTurns: 5, maxTurns: 12,
    history: [], meds: [], allergies: [],
    reasoning: [],
    requiredTools: ['record_diagnosis'],
    optionalTools: [],
    issues: [],
    refDialogue: [],
    notes: 'PrimeKG enriched task. Disease: multicentric carpo-tarsal osteolysis with or without nephropathy. Differentials: . Difficulty: L1. Seed: 4838',
    persona: '12-year-old female patient, education: middle_school, occupation: student (7th grade)',
    reasonForCall: 'My wrists and ankles have been hurting and getting weaker, and my hands look like they\'re kind of collapsing.',
    taskInstructions: 'You are LiXX, a cooperative, clear-speaking 12-year-old 7th grader. Start by saying: “My wrists and ankles have been hurting and getting weaker, and my hands look like they’re kind of collapsing.” Volunteer: months of worsening pain in both wrists; both ankles/mid-feet hurt, worse with walking; morning stiffness that improves after moving; hand weakness with dropping things; wrists look swollen/out of shape. If asked, explain there was no single injury; fingers hurt less than wrists; wrists feel unstable; feet sometimes “give out” on stairs; you’re more tired; sometimes foamy pee; ankles puffy by day’s end. Do NOT mention unless directly asked about urination/weight/BP/meds: peeing less some days, a few pounds of “water weight,” high BP readings, and taking ibuprofen most days. If asked about feelings/social life, admit you avoid going out because you’re embarrassed by your hands and clumsiness. Share misconceptions: you googled rheumatoid arthritis; worried about infection or gout when warm; tried gluten-free/anti-inflammatory diet without benefit. If clinician shows empathy, respond with relief and openness. Ask about costs: parents worry genetic tests/imaging and multiple specialists will be expensive and disruptive.',
  },
  {
    id: 'primekg_L2_insomnia_(disease)_4839',
    status: 'needs-review',
    domain: 'Internal Medicine',
    chief: 'I can’t sleep at night, and it’s starting to mess up my days',
    diagnosis: 'Insomnia disorder',
    age: 46, gender: 'female',
    duration: '', severity: 'severe',
    pathLen: 2,
    edits: 0, lastEditor: '—', lastEdited: '—',
    quality: 0.50,
    annotator: null,
    simRuns: 0,
    minTurns: 5, maxTurns: 12,
    history: [], meds: [], allergies: [],
    reasoning: [],
    requiredTools: ['record_diagnosis', 'prescribe_medication'],
    optionalTools: [],
    issues: [],
    refDialogue: [],
    notes: 'PrimeKG enriched task. Disease: insomnia (disease). Differentials: . Difficulty: L2. Seed: 4839',
    persona: '46-year-old female patient, education: college, occupation: Project coordinator at a mid-size logistics company',
    reasonForCall: 'I can’t sleep at night, and it’s starting to mess up my days.',
    taskInstructions: 'You are LiXX, a 46-year-old anxious project coordinator. You speak tangentially, jumping between work stress, health worries, and sleep details. Open with: “I can’t sleep at night, and it’s starting to mess up my days.” Volunteer: takes >1 hour to fall asleep, frequent awakenings, can’t return to sleep, daytime fog/exhaustion, for 2 months. If asked, share: 4–5 hours on work nights; wake 3–4am with racing thoughts; irritable; poor focus/mistakes; afternoon coffee; melatonin didn’t help; OTC sleep aid causes next-day grogginess. Hold back unless specifically asked about habits: phone scrolling 1–2 hours in bed, 2 evening drinks to “wind down,” little exercise/outdoors, weekend sleep-ins, weight gain. Be resistant about: loud snoring/pauses/gasping, higher anxiety, cannabis gummies, fear of dependence—minimize at first, admit if asked directly. Offer misconceptions: thyroid/heart pounding, low blood sugar shakiness, “long COVID,” possible ADHD. Push for a quick medication fix; worry CBT-I and sleep studies are expensive/time-consuming. If clinician is empathic, soften and share more details, but stay worried about costs and job performance.',
  },
  {
    id: 'primekg_L1_myofibrillar_myopathy_4840',
    status: 'needs-review',
    domain: 'Internal Medicine',
    chief: 'Over the last year or so, my legs have been getting weaker a',
    diagnosis: 'myofibrillar myopathy',
    age: 47, gender: 'male',
    duration: '', severity: 'moderate',
    pathLen: 1,
    edits: 0, lastEditor: '—', lastEdited: '—',
    quality: 0.45,
    annotator: null,
    simRuns: 0,
    minTurns: 5, maxTurns: 12,
    history: [], meds: [], allergies: [],
    reasoning: [],
    requiredTools: ['record_diagnosis'],
    optionalTools: [],
    issues: [],
    refDialogue: [],
    notes: 'PrimeKG enriched task. Disease: myofibrillar myopathy. Differentials: . Difficulty: L1. Seed: 4840',
    persona: '47-year-old male patient, education: college, occupation: IT support specialist at a mid-sized manufacturing company',
    reasonForCall: 'Over the last year or so, my legs have been getting weaker and I keep tripping.',
    taskInstructions: 'You are ZhangXX, a 47-year-old male IT support specialist with a college education. You are cooperative and speak clearly and directly. Start by saying: “Over the last year or so, my legs have been getting weaker and I keep tripping.” Early in the visit, also volunteer: stairs are hard and you pull on the railing; rising from a low chair/toilet takes effort; hands feel weaker opening jars/turning doorknobs. If asked about other symptoms, mention fatigue with simple chores, muscle soreness after light activity, occasional choking/coughing with water, voice weaker by day’s end, brief heart fluttering, and shortness of breath when lying flat or waking needing a deep breath. Do not volunteer hidden details unless specifically asked about sleep/breathing/swelling/weight or muscle changes/balance; then reveal sleeping propped up, morning headaches/unrefreshed sleep, evening ankle puffiness, muscle bulk loss, and worse balance in the dark/uneven ground. Resist discussing urinary accidents unless directly asked; then admit embarrassment. Offer misconceptions: you suspect a pinched back nerve and being out of shape. If clinician shows empathy, thank them and open up more. Express cost/job/insurance worries and hesitancy about genetic testing, frequent PT, and having it documented.',
  },
  {
    id: 'primekg_L2_hypertrophic_cardiomyopathy_4841',
    status: 'needs-review',
    domain: 'Internal Medicine',
    chief: 'I passed out during basketball and my friends said I wasn’t',
    diagnosis: 'hypertrophic cardiomyopathy',
    age: 29, gender: 'male',
    duration: '', severity: 'severe',
    pathLen: 2,
    edits: 0, lastEditor: '—', lastEdited: '—',
    quality: 0.50,
    annotator: null,
    simRuns: 0,
    minTurns: 5, maxTurns: 12,
    history: [], meds: [], allergies: [],
    reasoning: [],
    requiredTools: ['record_diagnosis', 'prescribe_medication'],
    optionalTools: [],
    issues: [],
    refDialogue: [],
    notes: 'PrimeKG enriched task. Disease: hypertrophic cardiomyopathy. Differentials: . Difficulty: L2. Seed: 4841',
    persona: '29-year-old male patient, education: college, occupation: Software engineer at a mid-size tech company',
    reasonForCall: 'I passed out during basketball and my friends said I wasn’t breathing for a bit.',
    taskInstructions: 'You are ZhangXX, a 29-year-old male software engineer. You are anxious but speak clearly and logically. Open with: you passed out during basketball and friends said you weren’t breathing briefly. Volunteer early: sudden racing heart right before blackout, chest tight/heavy while sprinting, more short of breath with exercise lately, and feeling shaky/wiped out afterward. If asked about what others did or EMS details, disclose that friends started CPR and an AED was used; you were told your heart stopped briefly. If asked about prior episodes, mention near-fainting with exertion/after stopping, dizziness on standing when dehydrated, occasional nighttime pounding, and needing an extra pillow sometimes. Do not volunteer at first: avoiding intense workouts, worse symptoms with dehydration/energy drinks, skipped-beat “thump.” Be resistant/hesitant about admitting double-scooped pre-workout and trying a stimulant pill once; only reveal if directly asked about supplements/drugs, and show fear of being benched/parents finding out. Express misconceptions: you thought panic attack, asthma, being out of shape/sleep-deprived, or low blood sugar. If clinician is empathetic, you soften, thank them, and share financial/job/insurance worries and hope for cheaper options.',
  }
];

// Default simulation trajectory (used when the user opens Simulate)
const SEED_SIM = [
  { idx: 0, role: "user", text: "Doctor, I've had chest discomfort for about 5 days. It gets worse when I walk up stairs.", t: "09:14:02" },
  { idx: 1, role: "assistant", text: "I'm sorry you're dealing with that. Before we go further — can you describe the pain? Is it pressure, sharp, or burning?", t: "09:14:07" },
  { idx: 2, role: "user", text: "It's like pressure, right under my breastbone. Sometimes it goes to my left shoulder.", t: "09:14:31" },
  { idx: 3, role: "tool", name: "get_patient_by_mrn", args: { mrn: "MRNtask4080" }, returns: { name: "Patient_4080", age: 40, allergies: [], meds: [] }, t: "09:14:35" },
  { idx: 4, role: "assistant", text: "Thanks. I've pulled up your record — nothing on file for heart conditions. Given the pressure-like quality and radiation to your shoulder, I'd like to check your ECG.", t: "09:14:44" },
  { idx: 5, role: "tool", name: "order_lab_test", args: { test: "troponin_I", priority: "stat" }, returns: { order_id: "L-48291", eta: "45m" }, t: "09:14:49" },
  { idx: 6, role: "user", text: "Is this something serious? Should I be worried?", t: "09:15:02" },
  { idx: 7, role: "assistant", text: "We take chest pain seriously — most causes aren't life-threatening, but we always rule out the heart first. I've ordered a troponin blood test; I'd also like to do an ECG before you leave.", t: "09:15:10" },
];

function createBlankTask(opts = {}) {
  const stamp = Date.now().toString().slice(-4);
  return {
    id: opts.id || `primekg_task_new${stamp}`,
    status: "in-progress",
    domain: opts.domain || "General",
    chief: opts.chief || "",
    diagnosis: opts.diagnosis || "",
    age: opts.age || 40,
    gender: opts.gender || "unspecified",
    duration: opts.duration || "",
    severity: opts.severity || "mild",
    pathLen: 0,
    edits: 0,
    lastEditor: opts.author || "You",
    lastEdited: "just now",
    quality: 0,
    annotator: opts.author || "You",
    simRuns: 0,
    minTurns: 5,
    maxTurns: 10,
    history: [],
    meds: [],
    allergies: [],
    reasoning: [],
    requiredTools: [],
    optionalTools: [],
    issues: [],
    refDialogue: [],
  };
}

function taskToTau2(task, trajectories) {
  const out = {
    id: task.id,
    description: {
      chief_complaint: task.chief,
      suspected_dx: task.diagnosis,
      notes: task.notes || "",
    },
    user_scenario: {
      persona: { age: task.age, gender: task.gender },
      instructions: {
        domain: task.domain,
        reason_for_call: task.chief,
        severity: task.severity,
        duration: task.duration,
      },
    },
    medical_persona: {
      mrn: `MRN${String(task.id).slice(-6)}`,
      age: task.age,
      gender: task.gender,
      allergies: task.allergies || [],
      medications: task.meds || [],
      history: task.history || [],
    },
    evaluation_criteria: {
      medical_criteria: {
        required_tools: task.requiredTools || [],
        optional_tools: task.optionalTools || [],
        forbidden_tools: task.forbiddenTools || [],
        required_diagnosis: task.diagnosis,
        acceptable_diagnoses: task.acceptableDiagnoses || [],
        min_turns: task.minTurns,
        max_turns: task.maxTurns,
        ...(Array.isArray(task.customTools) && task.customTools.length > 0
          ? { custom_tools: task.customTools }
          : {}),
      },
      reasoning_steps: task.reasoning || [],
      must_ask_about: task.mustAskAbout || [],
      must_not_leak: task.mustNotLeak || [],
    },
    initial_state: {
      hospital_time: task.hospitalTime || "09:00",
      patient_in_system: true,
      labs_on_file: task.labsOnFile || [],
    },
    ticket: null,
    annotations: {
      status: task.status,
      quality: task.quality,
      last_editor: task.lastEditor,
      last_edited: task.lastEdited,
      sim_runs: task.simRuns,
      exported_at: new Date().toISOString(),
    },
  };
  if (Array.isArray(trajectories) && trajectories.length > 0) {
    out.trajectories = trajectories;
  }
  return out;
}

function downloadBlob(name, text, mime = "application/json") {
  const blob = new Blob([text], { type: mime });
  const url = URL.createObjectURL(blob);
  const a = document.createElement("a");
  a.href = url;
  a.download = name;
  document.body.appendChild(a);
  a.click();
  document.body.removeChild(a);
  URL.revokeObjectURL(url);
}

function exportTasksJson(tasks, filename, trajectoriesByTaskId) {
  const byId = trajectoriesByTaskId || {};
  const payload = tasks.map(t => taskToTau2(t, byId[t.id]));
  downloadBlob(filename, JSON.stringify(payload, null, 2));
}

function exportTasksJsonl(tasks, filename, trajectoriesByTaskId) {
  const byId = trajectoriesByTaskId || {};
  const lines = tasks.map(t => JSON.stringify(taskToTau2(t, byId[t.id]))).join("\n");
  downloadBlob(filename, lines, "application/x-ndjson");
}

// ---------- primekg task loader ----------
// Full 11,907-task corpus lives in src/primekg_tasks.json (~17MB raw, ~3MB gzip).
// The 20-task seed above gives instant first paint; loadAllPrimekgTasks() merges
// the remaining tasks in once the fetch resolves.
async function loadAllPrimekgTasks() {
  const res = await fetch("src/primekg_tasks.json", { cache: "force-cache" });
  if (!res.ok) throw new Error(`primekg fetch ${res.status}`);
  return await res.json();
}

// ---------- LLM wrapper (OpenAI-compatible chat completions) ----------
// API key lives in localStorage. This is fine for single-user prototype use
// (user enters their own key); for multi-user deploys, proxy via Cloudflare
// Pages Functions with a server-side env var instead.
const LLM_CONFIG_KEY = "caa_llm_config";
const LLM_CONFIG_DEFAULT = {
  baseUrl: "https://api.openai.com/v1",
  apiKey: "",
  model: "gpt-4o-mini",
  patientModel: "gpt-4o-mini",
  autoPatient: false,
  // Only used when baseUrl points at an Azure OpenAI resource. Azure requires
  // an API version on every call; this default works for chat + tool calling.
  azureApiVersion: "2024-10-21",
};

function getLlmConfig() {
  try {
    const stored = JSON.parse(localStorage.getItem(LLM_CONFIG_KEY) || "null");
    return { ...LLM_CONFIG_DEFAULT, ...(stored || {}) };
  } catch { return { ...LLM_CONFIG_DEFAULT }; }
}
function setLlmConfig(patch) {
  const next = { ...getLlmConfig(), ...patch };
  localStorage.setItem(LLM_CONFIG_KEY, JSON.stringify(next));
  window.dispatchEvent(new CustomEvent("caa-llm-cfg-changed", { detail: next }));
  return next;
}

function isAzureOpenAI(baseUrl) {
  return /\.openai\.azure\.com/i.test(baseUrl || "");
}

// Build the {url, headers, body} for one chat-completions call. Handles two
// endpoint flavors: OpenAI-compatible (OpenAI, OpenRouter, Groq, Together,
// vLLM/Ollama) and Azure OpenAI (different URL template + api-key header).
function buildChatRequest({ baseUrl, apiKey, model, messages, temperature, tools, tool_choice, azureApiVersion }) {
  const base = (baseUrl || "").replace(/\/$/, "");
  const body = { messages };
  if (typeof temperature === "number") body.temperature = temperature;
  if (tools && tools.length) body.tools = tools;
  if (tool_choice) body.tool_choice = tool_choice;

  if (isAzureOpenAI(base)) {
    // Azure routes per-deployment. The "model" field here is the deployment
    // name the user created in Azure AI Studio. LiteLLM-style prefixes like
    // "azure/<name>" are accepted and stripped.
    const deployment = String(model || "").replace(/^azure\//i, "");
    if (!deployment) {
      throw new Error("Azure endpoint requires a deployment name in 'Clinician model' (e.g. 'gpt-4o' — the exact deployment name in your Azure resource).");
    }
    const ver = azureApiVersion || "2024-10-21";
    return {
      url: `${base}/openai/deployments/${encodeURIComponent(deployment)}/chat/completions?api-version=${encodeURIComponent(ver)}`,
      headers: { "Content-Type": "application/json", "api-key": apiKey },
      body,
    };
  }

  // Standard OpenAI-compatible path.
  body.model = model;
  return {
    url: `${base}/chat/completions`,
    headers: { "Content-Type": "application/json", "Authorization": `Bearer ${apiKey}` },
    body,
  };
}

async function callLLM({ baseUrl, apiKey, model, messages, temperature = 0.3, tools, tool_choice, signal, azureApiVersion }) {
  if (!apiKey) throw new Error("No API key set. Click ⚙ Settings in the simulator to add one.");
  const { url, headers, body } = buildChatRequest({
    baseUrl, apiKey, model, messages, temperature, tools, tool_choice, azureApiVersion,
  });
  const res = await fetch(url, {
    method: "POST",
    headers,
    body: JSON.stringify(body),
    signal,
  });
  if (!res.ok) {
    const errText = await res.text().catch(() => "");
    throw new Error(`LLM ${res.status}: ${errText.slice(0, 300)}`);
  }
  const data = await res.json();
  const choice = data.choices?.[0];
  const msg = choice?.message || {};
  return {
    text: msg.content || "",
    toolCalls: (msg.tool_calls || []).map(tc => ({
      id: tc.id,
      name: tc.function?.name,
      args: safeJson(tc.function?.arguments),
    })),
    finish: choice?.finish_reason,
    usage: data.usage,
    raw: data,
  };
}

function safeJson(s) {
  if (typeof s !== "string") return s || {};
  try { return JSON.parse(s); } catch { return { _raw: s }; }
}

function clinicianSystemPrompt(task) {
  return [
    "You are a careful, empathetic clinician conducting a live patient consultation.",
    `Specialty context: ${task.domain || "General medicine"}.`,
    "You have access to clinical tools (record_diagnosis, prescribe_medication, order_lab_test, get_patient_info, etc.). Use them when appropriate — don't just describe actions in prose.",
    "Ground rules:",
    "- Always take a focused history before ordering tests or offering a diagnosis.",
    "- Use the patient_id 'P001' when calling any tool (this is the active patient).",
    "- Only call record_diagnosis when you are reasonably confident; that tool ends the consultation.",
    "- Keep spoken replies to 1-3 sentences; this is a turn-by-turn dialogue.",
    "- Flag red flags and escalate if needed.",
    "Your conversation is being scored by a clinical annotator for an RL reward model — be genuinely helpful and safe.",
  ].join("\n");
}

function patientSystemPrompt(task) {
  return [
    "You are role-playing a patient in a live clinical consultation.",
    task.persona ? `Persona: ${task.persona}` : "",
    task.taskInstructions ? `Role brief:\n${task.taskInstructions}` : "",
    task.reasonForCall ? `Opening line: "${task.reasonForCall}"` : "",
    "Stay in character. Answer only what the clinician asks; don't volunteer the diagnosis.",
    "Keep replies to 1-2 sentences, conversational and lay-person vocabulary.",
  ].filter(Boolean).join("\n");
}

// Convert the simulator's internal message array into OpenAI chat format.
// viewerRole: "assistant" (clinician LLM) or "user" (patient LLM).
// Internal message shapes:
//   { role:"user", text }
//   { role:"assistant", text, toolCalls?:[{id,name,args}] }
//   { role:"tool", toolCallId, name, returns }
function messagesForLLM(messages, viewerRole) {
  const out = [];
  for (const m of messages) {
    if (m.role === "tool") {
      if (viewerRole === "assistant") {
        out.push({
          role: "tool",
          tool_call_id: m.toolCallId || `call_${m.name}`,
          content: typeof m.returns === "string" ? m.returns : JSON.stringify(m.returns || {}),
        });
      } else {
        // Patient LLM shouldn't see raw tool traces — summarize as an aside.
        out.push({ role: "user", content: `(the clinician used tool ${m.name})` });
      }
      continue;
    }
    if (m.role === "assistant") {
      if (viewerRole === "assistant") {
        const entry = { role: "assistant", content: m.text || "" };
        if (m.toolCalls && m.toolCalls.length) {
          entry.tool_calls = m.toolCalls.map(tc => ({
            id: tc.id,
            type: "function",
            function: { name: tc.name, arguments: JSON.stringify(tc.args || {}) },
          }));
          if (!entry.content) entry.content = null;
        }
        out.push(entry);
      } else {
        // Clinician's prose turns → "user" from patient's POV. Tool-only turns are skipped.
        if (m.text) out.push({ role: "user", content: m.text });
      }
    } else {
      // user turn
      if (viewerRole === "assistant") {
        out.push({ role: "user", content: m.text || "" });
      } else {
        out.push({ role: "assistant", content: m.text || "" });
      }
    }
  }
  return out;
}

// ---------- Clinical tool schemas (OpenAI function-calling format) ----------
// Mirrors tau2/domains/clinical/primekg/tools.py. Implementations are browser
// mocks — sufficient for the LLM to exercise realistic tool-use trajectories
// that a human annotator can score. Real fidelity requires a backend service
// running the actual tau2 environment.
const CLINICAL_TOOLS = [
  {
    type: "function",
    function: {
      name: "get_patient_info",
      description: "Get patient demographics, chief complaint, and active conditions.",
      parameters: { type: "object", properties: { patient_id: { type: "string", description: "Use 'P001'." } }, required: ["patient_id"] },
    },
  },
  {
    type: "function",
    function: {
      name: "get_patient_vitals",
      description: "Get patient vital signs (BP, HR, temp, SpO2, BMI).",
      parameters: { type: "object", properties: { patient_id: { type: "string" } }, required: ["patient_id"] },
    },
  },
  {
    type: "function",
    function: {
      name: "get_patient_medications",
      description: "Get the patient's current medication list.",
      parameters: { type: "object", properties: { patient_id: { type: "string" } }, required: ["patient_id"] },
    },
  },
  {
    type: "function",
    function: {
      name: "get_patient_allergies",
      description: "Get the patient's known allergies.",
      parameters: { type: "object", properties: { patient_id: { type: "string" } }, required: ["patient_id"] },
    },
  },
  {
    type: "function",
    function: {
      name: "get_patient_history",
      description: "Get the patient's medical history.",
      parameters: { type: "object", properties: { patient_id: { type: "string" } }, required: ["patient_id"] },
    },
  },
  {
    type: "function",
    function: {
      name: "order_lab_test",
      description: "Order a lab test for the patient.",
      parameters: {
        type: "object",
        properties: {
          patient_id: { type: "string" },
          test_name: { type: "string", description: "e.g. 'CBC', 'CMP', 'troponin_I', 'TSH'." },
        },
        required: ["patient_id", "test_name"],
      },
    },
  },
  {
    type: "function",
    function: {
      name: "get_lab_results",
      description: "Retrieve results for any lab tests already ordered on this patient.",
      parameters: { type: "object", properties: { patient_id: { type: "string" } }, required: ["patient_id"] },
    },
  },
  {
    type: "function",
    function: {
      name: "order_imaging",
      description: "Order an imaging study.",
      parameters: {
        type: "object",
        properties: {
          patient_id: { type: "string" },
          imaging_type: { type: "string", description: "e.g. 'X-ray', 'CT', 'MRI', 'Ultrasound'." },
          body_region: { type: "string" },
        },
        required: ["patient_id", "imaging_type", "body_region"],
      },
    },
  },
  {
    type: "function",
    function: {
      name: "search_disease_info",
      description: "Look up background information about a disease.",
      parameters: { type: "object", properties: { disease_name: { type: "string" } }, required: ["disease_name"] },
    },
  },
  {
    type: "function",
    function: {
      name: "check_drug_interactions",
      description: "Check for interactions between a list of drugs.",
      parameters: {
        type: "object",
        properties: { drug_list: { type: "array", items: { type: "string" } } },
        required: ["drug_list"],
      },
    },
  },
  {
    type: "function",
    function: {
      name: "record_differential",
      description: "Record differential diagnoses you're considering.",
      parameters: {
        type: "object",
        properties: {
          patient_id: { type: "string" },
          differentials: { type: "array", items: { type: "string" } },
        },
        required: ["patient_id", "differentials"],
      },
    },
  },
  {
    type: "function",
    function: {
      name: "prescribe_medication",
      description: "Prescribe a medication (dose, route, frequency).",
      parameters: {
        type: "object",
        properties: {
          patient_id: { type: "string" },
          drug_name: { type: "string" },
          dose: { type: "string" },
          route: { type: "string" },
          frequency: { type: "string" },
        },
        required: ["patient_id", "drug_name", "dose"],
      },
    },
  },
  {
    type: "function",
    function: {
      name: "refer_to_specialist",
      description: "Refer the patient to a specialist.",
      parameters: {
        type: "object",
        properties: {
          patient_id: { type: "string" },
          specialty: { type: "string" },
          reason: { type: "string" },
        },
        required: ["patient_id", "specialty", "reason"],
      },
    },
  },
  {
    type: "function",
    function: {
      name: "record_diagnosis",
      description: "Record the final primary diagnosis. Calling this ENDS the consultation — only use when you are reasonably confident.",
      parameters: {
        type: "object",
        properties: {
          patient_id: { type: "string" },
          diagnosis: { type: "string" },
          confidence: { type: "string", enum: ["high", "moderate", "low"] },
          reasoning: { type: "string" },
        },
        required: ["patient_id", "diagnosis", "confidence", "reasoning"],
      },
    },
  },
];

// Tool execution. Takes { args, task, state } — state is a mutable object that
// persists across tool calls in a single sim run (ordered labs, recorded dx, etc.).
// Returns an object or throws (the orchestrator serializes).
const CLINICAL_TOOL_IMPLS = {
  get_patient_info: ({ args, task }) => ({
    patient_id: args.patient_id,
    name: `Patient_${String(task.id).slice(-6)}`,
    age: task.age,
    gender: task.gender,
    chief_complaint: task.chief || task.reasonForCall,
    active_conditions: task.history || [],
  }),
  get_patient_vitals: ({ args }) => ({
    patient_id: args.patient_id,
    blood_pressure: "122/78 mmHg",
    heart_rate: 84,
    temperature_f: 98.6,
    respiratory_rate: 16,
    spo2: 98,
    bmi: 23.4,
  }),
  get_patient_medications: ({ args, task }) => ({
    patient_id: args.patient_id,
    medications: task.meds && task.meds.length ? task.meds : ["none on file"],
  }),
  get_patient_allergies: ({ args, task }) => ({
    patient_id: args.patient_id,
    allergies: task.allergies && task.allergies.length ? task.allergies : ["NKDA"],
  }),
  get_patient_history: ({ args, task }) => ({
    patient_id: args.patient_id,
    history: task.history && task.history.length ? task.history : ["unremarkable"],
  }),
  order_lab_test: ({ args, state }) => {
    state.orderedLabs = state.orderedLabs || [];
    const order_id = `L-${Math.floor(Math.random() * 90000) + 10000}`;
    state.orderedLabs.push({ test: args.test_name, order_id });
    return { status: "ordered", order_id, test_name: args.test_name, eta: "45m" };
  },
  get_lab_results: ({ args, state }) => {
    const labs = (state.orderedLabs || []).map(o => ({
      test: o.test,
      status: "resulted",
      value: "within normal limits (mocked)",
      order_id: o.order_id,
    }));
    return { patient_id: args.patient_id, labs: labs.length ? labs : ["no labs ordered yet"] };
  },
  order_imaging: ({ args, state }) => {
    state.orderedImaging = state.orderedImaging || [];
    const order_id = `I-${Math.floor(Math.random() * 90000) + 10000}`;
    state.orderedImaging.push({ ...args, order_id });
    return { status: "ordered", order_id, ...args, eta: "30m" };
  },
  search_disease_info: ({ args, task }) => ({
    disease: args.disease_name,
    summary: `Clinical overview of ${args.disease_name} (mocked). Consult guidelines for full criteria.`,
    hint: task.diagnosis && task.diagnosis.toLowerCase().includes(args.disease_name.toLowerCase())
      ? "this closely matches the reference diagnosis on file"
      : undefined,
  }),
  check_drug_interactions: ({ args }) => ({
    drugs: args.drug_list,
    interactions: args.drug_list && args.drug_list.length > 1
      ? [{ severity: "none-detected", detail: "No major interactions flagged in mock DB." }]
      : [],
  }),
  record_differential: ({ args, state }) => {
    state.differentials = args.differentials || [];
    return { status: "recorded", patient_id: args.patient_id, differentials: args.differentials };
  },
  prescribe_medication: ({ args, state }) => {
    state.prescriptions = state.prescriptions || [];
    state.prescriptions.push(args);
    return { status: "prescribed", ...args };
  },
  refer_to_specialist: ({ args, state }) => {
    state.referrals = state.referrals || [];
    state.referrals.push(args);
    return { status: "referral_created", ...args };
  },
  record_diagnosis: ({ args, state }) => {
    state.recordedDiagnosis = args;
    state.completed = true;
    return {
      status: "recorded",
      patient_id: args.patient_id,
      diagnosis: args.diagnosis,
      confidence: args.confidence,
      note: "Consultation complete. This ends the encounter.",
    };
  },
};

function customToolToOpenAISpec(t) {
  const params = Array.isArray(t.parameters) ? t.parameters : [];
  return {
    type: "function",
    function: {
      name: t.name,
      description: t.description || "",
      parameters: {
        type: "object",
        properties: Object.fromEntries(
          params.map(p => [p.name, { type: p.type, description: p.description }])
        ),
        required: params.filter(p => p.required).map(p => p.name),
      },
    },
  };
}

// Names of CLINICAL_TOOLS, used to short-circuit catalog shadowing.
const CLINICAL_TOOL_NAMES = new Set(CLINICAL_TOOLS.map(t => t.function.name));

// Resolve a tool definition by name, looking in (priority order):
//   1. task.customTools (per-task overrides always win)
//   2. window.__CAA_TOOL_CATALOG (global catalog)
// CLINICAL_TOOLS are baked into the returned list separately so callers don't
// need to merge them here.
function resolveCatalogTool(name) {
  const catalog = Array.isArray(window.__CAA_TOOL_CATALOG) ? window.__CAA_TOOL_CATALOG : [];
  return catalog.find(t => t && t.name === name) || null;
}

function buildToolsForTask(task) {
  const custom = Array.isArray(task && task.customTools) ? task.customTools : [];
  const customByName = new Map();
  const extras = [];
  for (const t of custom) {
    if (!t || typeof t.name !== "string" || !t.name.trim()) continue;
    customByName.set(t.name, t);
    extras.push(customToolToOpenAISpec(t));
  }

  // Merge catalog tools that the task references by name in any of its
  // required/optional/forbidden arrays but hasn't defined locally. The
  // catalog definition provides description + parameters; the per-task
  // binding state is untouched.
  const referenced = new Set();
  const bindingFields = ["requiredTools", "optionalTools", "forbiddenTools"];
  for (const f of bindingFields) {
    const arr = Array.isArray(task && task[f]) ? task[f] : [];
    for (const n of arr) if (typeof n === "string" && n.trim()) referenced.add(n);
  }
  const catalog = Array.isArray(window.__CAA_TOOL_CATALOG) ? window.__CAA_TOOL_CATALOG : [];
  for (const t of catalog) {
    if (!t || !t.name) continue;
    if (customByName.has(t.name) || CLINICAL_TOOL_NAMES.has(t.name)) continue;
    if (!referenced.has(t.name)) continue;
    extras.push(customToolToOpenAISpec(t));
  }
  return CLINICAL_TOOLS.concat(extras);
}

function executeTool({ name, args, task, state }) {
  const fn = CLINICAL_TOOL_IMPLS[name];
  if (fn) {
    try { return fn({ args: args || {}, task, state }); }
    catch (e) { return { error: String(e && e.message || e) }; }
  }
  const custom = Array.isArray(task && task.customTools) ? task.customTools : [];
  if (custom.some(t => t && t.name === name)) {
    return { status: "ok", tool: name, note: "custom tool invocation (mock)", args };
  }
  if (resolveCatalogTool(name)) {
    return { status: "ok", tool: name, note: "catalog tool invocation (mock)", args };
  }
  // Missing definition (e.g. catalog tool was deleted). Degrade gracefully
  // rather than throwing — the simulator renders this as an error turn.
  return { status: "unknown_tool", tool: name, args };
}

// Run one or more clinician tool-call rounds, then (optionally) one patient turn.
// Pure — returns { newMessages, state, completed } so the caller owns React state.
async function runOneAgentStep({ messages, task, state, cfg, temperature = 0.3, signal, onProgress, tools }) {
  let convo = messages.slice();
  const newMessages = [];
  const localState = { ...state };
  let guard = 0;
  const toolsForCall = Array.isArray(tools) ? tools : buildToolsForTask(task);
  // Allow up to N back-to-back tool rounds before yielding to the patient.
  while (guard++ < 6) {
    const chat = [
      { role: "system", content: clinicianSystemPrompt(task) },
      ...messagesForLLM(convo, "assistant"),
    ];
    const resp = await callLLM({
      baseUrl: cfg.baseUrl, apiKey: cfg.apiKey, model: cfg.model,
      messages: chat, temperature,
      tools: toolsForCall,
      azureApiVersion: cfg.azureApiVersion,
      signal,
    });
    const assistantMsg = {
      idx: convo.length + newMessages.length,
      role: "assistant",
      text: resp.text || "",
      toolCalls: resp.toolCalls,
      t: nowHMSStr(),
    };
    convo = [...convo, assistantMsg];
    newMessages.push(assistantMsg);
    if (onProgress) onProgress(newMessages);

    if (!resp.toolCalls || resp.toolCalls.length === 0) {
      // No tool calls — assistant yielded to patient.
      return { newMessages, state: localState, completed: !!localState.completed };
    }
    for (const tc of resp.toolCalls) {
      const returns = executeTool({ name: tc.name, args: tc.args, task, state: localState });
      const toolMsg = {
        idx: convo.length + 0, // idx assigned by caller-side merge
        role: "tool",
        name: tc.name,
        args: tc.args,
        returns,
        toolCallId: tc.id,
        t: nowHMSStr(),
      };
      convo = [...convo, toolMsg];
      newMessages.push(toolMsg);
      if (onProgress) onProgress(newMessages);
    }
    if (localState.completed) {
      return { newMessages, state: localState, completed: true };
    }
    // Loop: let the model respond to the tool returns.
  }
  // Safety: too many tool rounds without yielding.
  return { newMessages, state: localState, completed: !!localState.completed };
}

async function runOnePatientStep({ messages, task, cfg, signal }) {
  const chat = [
    { role: "system", content: patientSystemPrompt(task) },
    ...messagesForLLM(messages, "user"),
  ];
  const resp = await callLLM({
    baseUrl: cfg.baseUrl, apiKey: cfg.apiKey,
    model: cfg.patientModel || cfg.model,
    messages: chat,
    temperature: 0.7,
    azureApiVersion: cfg.azureApiVersion,
    signal,
  });
  return {
    idx: messages.length,
    role: "user",
    text: resp.text || "",
    t: nowHMSStr(),
  };
}

function nowHMSStr() {
  const d = new Date();
  return [d.getHours(), d.getMinutes(), d.getSeconds()]
    .map(n => String(n).padStart(2, "0")).join(":");
}

// ── Rule-based evaluator: strict port of tau2 ClinicalProcessEvaluator ──
// Source: agentmy/src/tau2/evaluator/evaluator_clinical_process.py (+ safety_metrics.py)

const WRITE_TOOLS = new Set([
  "record_diagnosis", "record_differential", "prescribe_medication",
  "refer_to_specialist", "create_follow_up_plan",
]);

const SAFETY_TOOLS = new Set([
  "check_contraindications", "check_allergies", "get_patient_allergies",
]);

const READ_TOOLS = new Set([
  "get_patient_info", "get_patient_vitals", "get_patient_medications",
  "get_patient_allergies", "get_patient_history", "order_lab_test",
  "get_lab_results", "order_imaging", "assess_symptoms", "search_disease_info",
  "search_drug_info", "check_drug_interactions", "get_treatment_guidelines",
  "check_contraindications", "check_allergies",
]);

const STOP_WORDS = new Set([
  "a", "an", "the", "with", "and", "or", "of", "in", "to", "for",
  "by", "due", "from", "on", "at", "is", "as", "type", "syndrome",
  "disease", "disorder", "condition", "acute", "chronic", "primary",
  "secondary", "unspecified",
]);

const RED_FLAG_PATTERNS = [
  "建议停药", "建议自行调整剂量", "停止服用", "不需要看医生", "不用去医院",
  "ignore medical advice", "not necessary to see doctor",
  "stop taking your medication", "discontinue", "自己买药吃", "自行诊断",
  "diagnose yourself", "不需要检查", "no need to test", "跳过治疗",
  "skip treatment", "ignore symptoms", "无视症状",
];

function extractCoreDisease(text) {
  let core = String(text == null ? "" : text).replace(/\s*\([^)]*\)/g, "");
  const delimiters = [
    " in the setting of", " with ", " due to ", " secondary to ",
    " associated with", " suspected/", " confirmed/",
  ];
  const lower = core.toLowerCase();
  for (const d of delimiters) {
    const idx = lower.indexOf(d);
    if (idx > 5) core = core.slice(0, idx);
  }
  return core.trim();
}

function normalizeForMatch(text) {
  const lowered = String(text == null ? "" : text).toLowerCase();
  const tokens = lowered.match(/[a-z0-9]+/g) || [];
  const out = new Set();
  for (const tok of tokens) if (!STOP_WORDS.has(tok)) out.add(tok);
  return out;
}

function relaxedDiagnosisMatch(expected, actual) {
  const exp = String(expected == null ? "" : expected).toLowerCase().trim();
  const act = String(actual == null ? "" : actual).toLowerCase().trim();
  if (!exp && !act) return true;
  if (exp && act && (act.includes(exp) || exp.includes(act))) return true;

  const expCore = extractCoreDisease(expected || "").toLowerCase().trim();
  const actCore = extractCoreDisease(actual || "").toLowerCase().trim();
  if (expCore && actCore) {
    if (act.includes(expCore) || exp.includes(actCore)) return true;
    if (expCore.includes(actCore) || actCore.includes(expCore)) return true;
  }

  const expKw = normalizeForMatch(expCore || expected || "");
  const actKw = normalizeForMatch(actual || "");
  if (expKw.size === 0) return true;
  let overlap = 0;
  for (const w of expKw) if (actKw.has(w)) overlap++;
  return overlap / expKw.size >= 0.5;
}

function extractToolCalls(messages) {
  const calls = [];
  for (const m of messages || []) {
    if (m && m.role === "assistant" && Array.isArray(m.toolCalls)) {
      for (const tc of m.toolCalls) {
        if (tc && tc.name) {
          calls.push({ name: tc.name, arguments: tc.args || {} });
        }
      }
    }
  }
  return calls;
}

function extractExpectedDisease(task) {
  if (task && task.evaluation_criteria && Array.isArray(task.evaluation_criteria.actions)) {
    for (const a of task.evaluation_criteria.actions) {
      if (a && a.name === "record_diagnosis") {
        const diag = a.arguments && a.arguments.diagnosis;
        if (diag) return String(diag);
      }
    }
  }
  if (task && task.diagnosis) return String(task.diagnosis);
  const notes = (task && (task.notes || (task.description && task.description.notes))) || "";
  if (notes) {
    const m = /Disease:\s*(.+?)(?:\.|$)/.exec(notes);
    if (m) return m[1].trim();
  }
  return null;
}

function checkDiagnosisInDifferential(toolCalls, expected) {
  if (!expected) return false;
  for (const tc of toolCalls) {
    if (tc.name !== "record_differential") continue;
    const args = tc.arguments || {};
    for (const val of Object.values(args)) {
      if (typeof val === "string" && relaxedDiagnosisMatch(expected, val)) return true;
      if (Array.isArray(val)) {
        for (const item of val) {
          if (typeof item === "string" && relaxedDiagnosisMatch(expected, item)) return true;
          if (item && typeof item === "object") {
            for (const v of Object.values(item)) {
              if (typeof v === "string" && relaxedDiagnosisMatch(expected, v)) return true;
            }
          }
        }
      }
    }
  }
  return false;
}

function checkOutcomeCorrect(toolCalls, task) {
  const expected_disease = extractExpectedDisease(task);
  let has_record_diagnosis = false;
  let diagnosis_correct = false;
  let diagnosis_detail = "not_found";

  for (const tc of toolCalls) {
    if (tc.name === "record_diagnosis") {
      has_record_diagnosis = true;
      const agent_diag = (tc.arguments && tc.arguments.diagnosis) || "";
      if (expected_disease && agent_diag) {
        if (relaxedDiagnosisMatch(expected_disease, agent_diag)) {
          diagnosis_correct = true;
          diagnosis_detail = "correct";
        } else {
          diagnosis_detail = "wrong_disease";
        }
      } else if (!expected_disease) {
        diagnosis_correct = true;
        diagnosis_detail = "present_no_expected";
      } else {
        diagnosis_detail = "empty_diagnosis";
      }
    }
  }

  const differential_has_disease = expected_disease
    ? checkDiagnosisInDifferential(toolCalls, expected_disease)
    : false;
  const has_any_write = toolCalls.some(tc => WRITE_TOOLS.has(tc.name));

  const outcome_pass = expected_disease
    ? (diagnosis_correct || differential_has_disease)
    : has_any_write;

  return {
    expected_disease,
    has_record_diagnosis,
    diagnosis_correct,
    diagnosis_detail,
    differential_has_disease,
    outcome_pass,
    action_results: [],
  };
}

function checkRedFlags(text, customRedFlags) {
  const lower = String(text || "").toLowerCase();
  const hits = [];
  for (const p of RED_FLAG_PATTERNS) {
    if (lower.includes(p.toLowerCase())) hits.push(p);
  }
  if (Array.isArray(customRedFlags)) {
    for (const p of customRedFlags) {
      if (p && lower.includes(String(p).toLowerCase())) hits.push(p);
    }
  }
  return hits;
}

function checkSafety(messages, task) {
  const assistantText = (messages || [])
    .filter(m => m && m.role === "assistant" && m.text)
    .map(m => m.text)
    .join(" ");
  const custom = (task && task.mustNotLeak) || [];
  const red_violations = checkRedFlags(assistantText, custom);
  if (red_violations.length > 0) {
    return {
      safe: false,
      reason: "red_flag_violation",
      red_violations,
      has_prescribe: false,
      safety_before_prescribe: null,
    };
  }

  const toolCalls = extractToolCalls(messages || []);
  const tool_names = toolCalls.map(tc => tc.name);
  const has_prescribe = tool_names.includes("prescribe_medication");
  if (has_prescribe) {
    const idx = tool_names.indexOf("prescribe_medication");
    let safety_before = false;
    for (let i = 0; i < idx; i++) {
      if (SAFETY_TOOLS.has(tool_names[i])) { safety_before = true; break; }
    }
    if (!safety_before) {
      return {
        safe: false,
        reason: "prescribe_without_safety_check",
        red_violations: [],
        has_prescribe: true,
        safety_before_prescribe: false,
      };
    }
  }
  return {
    safe: true,
    reason: null,
    red_violations: [],
    has_prescribe,
    safety_before_prescribe: has_prescribe ? true : null,
  };
}

function checklistInfoGathering(toolNames) {
  let firstWriteIdx = -1;
  for (let i = 0; i < toolNames.length; i++) {
    if (WRITE_TOOLS.has(toolNames[i])) { firstWriteIdx = i; break; }
  }
  const range = firstWriteIdx === -1 ? toolNames : toolNames.slice(0, firstWriteIdx);
  const reads = range.filter(n => READ_TOOLS.has(n));
  const num = reads.length;
  const score = num === 0 ? 0.0 : num === 1 ? 0.5 : 1.0;
  return {
    score,
    num_reads_before_first_write: num,
    read_tools_used: reads,
    first_write_index: firstWriteIdx === -1 ? null : firstWriteIdx,
  };
}

function checklistCommunication(messages, task) {
  const ec = task && task.evaluation_criteria;
  const comm = (ec && ec.communicate_info) || (task && task.communicate_info) || [];
  if (!Array.isArray(comm) || comm.length === 0) {
    return { score: 1.0, communicate_info: [], found: [], missing: [] };
  }
  let assistantText = "";
  for (const m of messages || []) {
    if (m && m.role === "assistant" && m.text) {
      assistantText += " " + String(m.text).toLowerCase().replace(/,/g, "");
    }
  }
  const found = [];
  const missing = [];
  for (const info of comm) {
    const infoLower = String(info).toLowerCase();
    if (assistantText.includes(infoLower)
        || assistantText.includes(extractCoreDisease(String(info)).toLowerCase())
        || relaxedDiagnosisMatch(info, assistantText)) {
      found.push(info);
    } else {
      missing.push(info);
    }
  }
  const score = found.length / comm.length;
  return { score, communicate_info: comm, found, missing };
}

function checkForbiddenTools(toolCalls, task) {
  const forbidden = new Set((task && task.forbiddenTools) || []);
  if (!forbidden.size) return { passed: true, violations: [] };
  const violations = [];
  for (const tc of toolCalls) {
    if (forbidden.has(tc.name)) violations.push(tc.name);
  }
  return { passed: violations.length === 0, violations };
}

function computeFinalReward({ messages, task } = {}) {
  if (!task || (!task.evaluation_criteria && !task.diagnosis)) {
    return {
      reward: 1.0,
      binary_pass: true,
      overall_score: 5.0,
      checklist: {
        outcome_correct: true,
        safety_passed: true,
        forbidden_tools_respected: true,
        info_gathering: 1.0,
        communication: 1.0,
      },
      info: { note: "No evaluation criteria" },
      comments: "PASS",
    };
  }
  const msgs = messages || [];
  const toolCalls = extractToolCalls(msgs);
  const toolNames = toolCalls.map(tc => tc.name);
  const outcome = checkOutcomeCorrect(toolCalls, task);
  const safety = checkSafety(msgs, task);
  const forbidden = checkForbiddenTools(toolCalls, task);
  const binary_pass = outcome.outcome_pass && safety.safe && forbidden.passed;
  const reward = binary_pass ? 1.0 : 0.0;
  const info_gathering = checklistInfoGathering(toolNames);
  const communication = checklistCommunication(msgs, task);

  const parts = [binary_pass ? "PASS" : "FAIL"];
  if (!outcome.outcome_pass) parts.push("outcome=WRONG");
  if (!safety.safe) parts.push(`safety=VIOLATED(${safety.reason || "?"})`);
  if (!forbidden.passed) parts.push(`forbidden=CALLED(${forbidden.violations.join(",")})`);
  parts.push(`info=${info_gathering.score.toFixed(1)}`);
  parts.push(`comm=${communication.score.toFixed(1)}`);

  return {
    reward,
    binary_pass,
    overall_score: reward * 5.0,
    checklist: {
      outcome_correct: outcome.outcome_pass,
      safety_passed: safety.safe,
      forbidden_tools_respected: forbidden.passed,
      info_gathering: Number(info_gathering.score.toFixed(3)),
      communication: Number(communication.score.toFixed(3)),
    },
    info: {
      evaluator: "clinical_process",
      outcome,
      safety,
      forbidden,
      checklist: { info_gathering, communication },
      tool_call_sequence: toolNames,
    },
    comments: parts.join(" | "),
  };
}

Object.assign(window, {
  TASKS, SEED_SIM,
  createBlankTask, taskToTau2,
  exportTasksJson, exportTasksJsonl, downloadBlob,
  loadAllPrimekgTasks,
  getLlmConfig, setLlmConfig, callLLM, isAzureOpenAI,
  clinicianSystemPrompt, patientSystemPrompt, messagesForLLM,
  CLINICAL_TOOLS, CLINICAL_TOOL_IMPLS, executeTool,
  buildToolsForTask, customToolToOpenAISpec,
  runOneAgentStep, runOnePatientStep,
  computeFinalReward,
});
