| 
      
        2024-09-12
      
      ยง
     | 
  
    
  | 07:22 | 
  <jayme@deploy1003> | 
  helmfile [aux-k8s-eqiad] START helmfile.d/admin 'apply'. | 
  [production] | 
            
  | 07:22 | 
  <jayme@deploy1003> | 
  helmfile [dse-k8s-eqiad] DONE helmfile.d/admin 'apply'. | 
  [production] | 
            
  | 07:21 | 
  <jayme@deploy1003> | 
  helmfile [dse-k8s-eqiad] START helmfile.d/admin 'apply'. | 
  [production] | 
            
  | 07:21 | 
  <jayme@deploy1003> | 
  helmfile [ml-staging-codfw] DONE helmfile.d/admin 'apply'. | 
  [production] | 
            
  | 07:21 | 
  <jayme@deploy1003> | 
  helmfile [ml-staging-codfw] START helmfile.d/admin 'apply'. | 
  [production] | 
            
  | 07:21 | 
  <jayme@deploy1003> | 
  helmfile [ml-serve-codfw] DONE helmfile.d/admin 'apply'. | 
  [production] | 
            
  | 07:20 | 
  <jayme@deploy1003> | 
  helmfile [ml-serve-codfw] START helmfile.d/admin 'apply'. | 
  [production] | 
            
  | 07:20 | 
  <jayme@deploy1003> | 
  helmfile [ml-serve-eqiad] DONE helmfile.d/admin 'apply'. | 
  [production] | 
            
  | 07:20 | 
  <jayme@deploy1003> | 
  helmfile [ml-serve-eqiad] START helmfile.d/admin 'apply'. | 
  [production] | 
            
  | 07:20 | 
  <jayme@deploy1003> | 
  helmfile [staging-codfw] DONE helmfile.d/admin 'apply'. | 
  [production] | 
            
  | 07:20 | 
  <jayme@deploy1003> | 
  helmfile [staging-codfw] START helmfile.d/admin 'apply'. | 
  [production] | 
            
  | 07:19 | 
  <jayme@deploy1003> | 
  helmfile [staging-eqiad] DONE helmfile.d/admin 'apply'. | 
  [production] | 
            
  | 07:19 | 
  <jayme@deploy1003> | 
  helmfile [staging-eqiad] START helmfile.d/admin 'apply'. | 
  [production] | 
            
  | 07:19 | 
  <jayme@deploy1003> | 
  helmfile [codfw] DONE helmfile.d/admin 'apply'. | 
  [production] | 
            
  | 07:19 | 
  <jayme@deploy1003> | 
  helmfile [codfw] START helmfile.d/admin 'apply'. | 
  [production] | 
            
  | 07:19 | 
  <jayme@deploy1003> | 
  helmfile [eqiad] DONE helmfile.d/admin 'apply'. | 
  [production] | 
            
  | 07:19 | 
  <jayme@deploy1003> | 
  helmfile [eqiad] START helmfile.d/admin 'apply'. | 
  [production] | 
            
  | 07:18 | 
  <slyngshede@cumin1002> | 
  END (PASS) - Cookbook sre.idm.logout (exit_code=0) Logging Sandeeps out of all services on: 2298 hosts | 
  [production] | 
            
  | 07:18 | 
  <slyngshede@cumin1002> | 
  START - Cookbook sre.idm.logout Logging Sandeeps out of all services on: 2298 hosts | 
  [production] | 
            
  | 07:10 | 
  <ladsgroup@cumin1002> | 
  dbctl commit (dc=all): 'Depooling db2212 (T371742)', diff saved to https://phabricator.wikimedia.org/P69016 and previous config saved to /var/cache/conftool/dbconfig/20240912-071034-ladsgroup.json | 
  [production] | 
            
  | 07:10 | 
  <ladsgroup@cumin1002> | 
  END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 12:00:00 on db2212.codfw.wmnet with reason: Maintenance | 
  [production] | 
            
  | 07:10 | 
  <ladsgroup@cumin1002> | 
  START - Cookbook sre.hosts.downtime for 12:00:00 on db2212.codfw.wmnet with reason: Maintenance | 
  [production] | 
            
  | 07:09 | 
  <jayme@cumin1002> | 
  START - Cookbook sre.kafka.roll-restart-reboot-brokers rolling restart_daemons on A:kafka-main-codfw | 
  [production] | 
            
  | 06:58 | 
  <arnaudb@cumin1002> | 
  START - Cookbook sre.mysql.clone of db2129.codfw.wmnet onto db2229.codfw.wmnet | 
  [production] | 
            
  | 06:56 | 
  <arnaudb@cumin1002> | 
  dbctl commit (dc=all): 'Cloning db2129 in db2229 for T373579', diff saved to https://phabricator.wikimedia.org/P69015 and previous config saved to /var/cache/conftool/dbconfig/20240912-065641-arnaudb.json | 
  [production] | 
            
  | 06:55 | 
  <arnaudb@cumin1002> | 
  END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 1 day, 0:00:00 on db2229.codfw.wmnet with reason: provisionning db2229.codfw.wmnet - T373579 | 
  [production] | 
            
  | 06:55 | 
  <arnaudb@cumin1002> | 
  START - Cookbook sre.hosts.downtime for 1 day, 0:00:00 on db2229.codfw.wmnet with reason: provisionning db2229.codfw.wmnet - T373579 | 
  [production] | 
            
  | 06:55 | 
  <arnaudb@cumin1002> | 
  END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 1 day, 0:00:00 on db2129.codfw.wmnet with reason: provisionning db2229.codfw.wmnet - T373579 | 
  [production] | 
            
  | 06:55 | 
  <arnaudb@cumin1002> | 
  START - Cookbook sre.hosts.downtime for 1 day, 0:00:00 on db2129.codfw.wmnet with reason: provisionning db2229.codfw.wmnet - T373579 | 
  [production] | 
            
  | 06:34 | 
  <jayme@cumin1002> | 
  END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 1 day, 0:00:00 on kafka-main[2004,2009].codfw.wmnet with reason: Hardware refresh | 
  [production] | 
            
  | 06:34 | 
  <jayme@cumin1002> | 
  START - Cookbook sre.hosts.downtime for 1 day, 0:00:00 on kafka-main[2004,2009].codfw.wmnet with reason: Hardware refresh | 
  [production] | 
            
  | 06:33 | 
  <jayme> | 
  evacuating leadership for all partitions assigned to broker id 2004 on kafka-main-codfw - T363210 | 
  [production] | 
            
  | 06:19 | 
  <arnaudb@cumin1002> | 
  END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 1:00:00 on 25 hosts with reason: Primary switchover s3 T374421 | 
  [production] | 
            
  | 06:19 | 
  <arnaudb@cumin1002> | 
  START - Cookbook sre.hosts.downtime for 1:00:00 on 25 hosts with reason: Primary switchover s3 T374421 | 
  [production] | 
            
  | 06:16 | 
  <ladsgroup@cumin1002> | 
  END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 12:00:00 on db2202.codfw.wmnet with reason: Maintenance | 
  [production] | 
            
  | 06:16 | 
  <ladsgroup@cumin1002> | 
  START - Cookbook sre.hosts.downtime for 12:00:00 on db2202.codfw.wmnet with reason: Maintenance | 
  [production] | 
            
  | 06:16 | 
  <ladsgroup@cumin1002> | 
  dbctl commit (dc=all): 'Repooling after maintenance db2188 (T371742)', diff saved to https://phabricator.wikimedia.org/P69014 and previous config saved to /var/cache/conftool/dbconfig/20240912-061639-ladsgroup.json | 
  [production] | 
            
  | 06:05 | 
  <arnaudb@cumin1002> | 
  dbctl commit (dc=all): 'T374592', diff saved to https://phabricator.wikimedia.org/P69013 and previous config saved to /var/cache/conftool/dbconfig/20240912-060550-arnaudb.json | 
  [production] | 
            
  | 06:03 | 
  <arnaudb@cumin1002> | 
  dbctl commit (dc=all): 'Promote es2038 to es7 primary and set section read-write T374592', diff saved to https://phabricator.wikimedia.org/P69012 and previous config saved to /var/cache/conftool/dbconfig/20240912-060308-arnaudb.json | 
  [production] | 
            
  | 06:02 | 
  <arnaudb> | 
  Starting es7 codfw failover from es2039 to es2038 - T374592 | 
  [production] | 
            
  | 06:01 | 
  <ladsgroup@cumin1002> | 
  dbctl commit (dc=all): 'Repooling after maintenance db2188', diff saved to https://phabricator.wikimedia.org/P69011 and previous config saved to /var/cache/conftool/dbconfig/20240912-060131-ladsgroup.json | 
  [production] | 
            
  | 05:59 | 
  <arnaudb@cumin1002> | 
  dbctl commit (dc=all): 'Set es2038 with weight 0 T374592', diff saved to https://phabricator.wikimedia.org/P69010 and previous config saved to /var/cache/conftool/dbconfig/20240912-055903-arnaudb.json | 
  [production] | 
            
  | 05:58 | 
  <arnaudb@cumin1002> | 
  END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 1:00:00 on 6 hosts with reason: Primary switchover es7 T374592 | 
  [production] | 
            
  | 05:58 | 
  <arnaudb@cumin1002> | 
  START - Cookbook sre.hosts.downtime for 1:00:00 on 6 hosts with reason: Primary switchover es7 T374592 | 
  [production] | 
            
  | 05:46 | 
  <ladsgroup@cumin1002> | 
  dbctl commit (dc=all): 'Repooling after maintenance db2188', diff saved to https://phabricator.wikimedia.org/P69009 and previous config saved to /var/cache/conftool/dbconfig/20240912-054624-ladsgroup.json | 
  [production] | 
            
  | 05:44 | 
  <arnaudb@cumin1002> | 
  START - Cookbook sre.hosts.reimage for host db1246.eqiad.wmnet with OS bookworm | 
  [production] | 
            
  | 05:31 | 
  <ladsgroup@cumin1002> | 
  dbctl commit (dc=all): 'Repooling after maintenance db2188 (T371742)', diff saved to https://phabricator.wikimedia.org/P69008 and previous config saved to /var/cache/conftool/dbconfig/20240912-053116-ladsgroup.json | 
  [production] | 
            
  | 04:37 | 
  <ladsgroup@cumin1002> | 
  dbctl commit (dc=all): 'Depooling db2188 (T371742)', diff saved to https://phabricator.wikimedia.org/P69007 and previous config saved to /var/cache/conftool/dbconfig/20240912-043701-ladsgroup.json | 
  [production] | 
            
  | 04:36 | 
  <ladsgroup@cumin1002> | 
  END (PASS) - Cookbook sre.hosts.downtime (exit_code=0) for 12:00:00 on db2188.codfw.wmnet with reason: Maintenance | 
  [production] | 
            
  | 04:36 | 
  <ladsgroup@cumin1002> | 
  START - Cookbook sre.hosts.downtime for 12:00:00 on db2188.codfw.wmnet with reason: Maintenance | 
  [production] |