| 
      
        2021-05-25
      
      §
     | 
  
    
  | 18:16 | 
  <razzi> | 
  sudo systemctl start all failed units from `systemctl list-units --state=failed` on an-launcher1002 | 
  [analytics] | 
            
  | 18:14 | 
  <razzi> | 
  sudo systemctl start eventlogging_to_druid_navigationtiming_hourly.service | 
  [analytics] | 
            
  | 18:01 | 
  <razzi> | 
  manually edit /etc/hadoop/conf/capacity-scheduler.xml to make queues running and sudo -u yarn kerberos-run-command yarn yarn rmadmin -refreshQueues | 
  [analytics] | 
            
  | 17:52 | 
  <razzi> | 
  sudo -u yarn kerberos-run-command yarn yarn rmadmin -refreshQueues on an-master1001 and an-master1002 | 
  [analytics] | 
            
  | 17:28 | 
  <razzi> | 
  sudo systemctl restart refine_eventlogging_legacy | 
  [analytics] | 
            
  | 17:28 | 
  <razzi> | 
  sudo -u yarn kerberos-run-command yarn yarn rmadmin -refreshQueues to enable submitting jobs once again | 
  [analytics] | 
            
  | 17:07 | 
  <razzi> | 
  re-enabled puppet on an-masters and an-launcher | 
  [analytics] | 
            
  | 17:04 | 
  <razzi> | 
  sudo -u hdfs kerberos-run-command hdfs hdfs dfsadmin -safemode leave | 
  [analytics] | 
            
  | 17:03 | 
  <razzi> | 
  sudo -u hdfs /usr/bin/hdfs haadmin -failover an-master1002-eqiad-wmnet an-master1001-eqiad-wmnet | 
  [analytics] | 
            
  | 16:43 | 
  <razzi> | 
  sudo systemctl restart hadoop-hdfs-namenode on an-master1001 | 
  [analytics] | 
            
  | 16:38 | 
  <razzi> | 
  sudo -u hdfs kerberos-run-command hdfs hdfs dfsadmin -saveNamespace | 
  [analytics] | 
            
  | 16:35 | 
  <razzi> | 
  sudo -u hdfs kerberos-run-command hdfs hdfs dfsadmin -safemode enter | 
  [analytics] | 
            
  | 16:28 | 
  <razzi> | 
  sudo -u hdfs /usr/bin/hdfs haadmin -failover an-master1002-eqiad-wmnet an-master1001-eqiad-wmnet | 
  [analytics] | 
            
  | 16:23 | 
  <razzi> | 
  sudo -u hdfs kerberos-run-command hdfs hdfs dfsadmin -safemode leave | 
  [analytics] | 
            
  | 16:06 | 
  <razzi> | 
  sudo systemctl restart hadoop-hdfs-namenode | 
  [analytics] | 
            
  | 15:52 | 
  <razzi> | 
  checkpoint hdfs with  sudo -u hdfs kerberos-run-command hdfs hdfs dfsadmin -saveNamespace | 
  [analytics] | 
            
  | 15:51 | 
  <razzi> | 
  enable safe mode on an-master1001 with sudo -u hdfs kerberos-run-command hdfs hdfs dfsadmin -safemode enter | 
  [analytics] | 
            
  | 15:36 | 
  <razzi> | 
  disable puppet on an-master1001.eqiad.wmnet and an-master1002.eqiad.wmnet again | 
  [analytics] | 
            
  | 15:35 | 
  <razzi> | 
  re-enable puppet on an-masters, run puppet, and sudo -u yarn kerberos-run-command yarn yarn rmadmin -refreshQueues | 
  [analytics] | 
            
  | 15:32 | 
  <razzi> | 
  disable puppet on an-master1001.eqiad.wmnet and an-master1002.eqiad.wmnet | 
  [analytics] | 
            
  | 14:39 | 
  <razzi> | 
  stop puppet on an-launcher and stop hadoop-related timers | 
  [analytics] | 
            
  | 01:09 | 
  <razzi> | 
  sudo -u hdfs /usr/bin/hdfs haadmin -failover an-master1002-eqiad-wmnet an-master1001-eqiad-wmnet | 
  [analytics] | 
            
  | 01:07 | 
  <razzi> | 
  sudo -u hdfs /usr/bin/hdfs haadmin -failover an-master1001-eqiad-wmnet an-master1002-eqiad-wmnet | 
  [analytics] |