openstack monitoring script

#오픈스택 모니터링 스크립트 1.1

#!/bin/bash
# 컴퓨트 노드 컨테이너와 로그 확인, 오픈스텍 서비스 확인, 호스트 내의 VM 상태 확인 스크립트 입니다.
# 사용 방법 : overcloud 권한 상태에서 수행 추천 -> vi temp_mon.sh 로 파일 생성 후 해당 스크립트 전체 복사 및 저장 -> 스크립트 파일 권한 부여 -> 스크립트 실행 및 결과 팡ㄹ 출력
# 실행 >> su - stack >> source ~/overcloudrc ; vi ./temp_mon.sh ; chmod 755 ./temp_mon.sh ; ./temp_mon.sh > temp_mon.sh.log 2>&1
# 필요에 따라 주석처리한 부분은 해제 후 사용하세요


# 컨트롤러 노드 내의 컨테이너 정상일 경우 갯수
CON_CNT=0
# 컴퓨트 노드 내의 컨테이너 정상일 경우 갯수
COM_CNT=16

# 컨트롤러 노드 IP
CON_HOSTS="172.23.167.103"
# 컴퓨트 노드 IP
COM_HOSTS="172.23.167.150"

# 컨트롤러 노드 점검
#for host in $CON_HOSTS
#do
#  echo "========== $host ============"
#  echo ">>>>>> Check container's status <<<<<<"
#  cnt=$(ssh root$host "sudo docker ps | grep -v IMAGE | wc -l")
#  cnt=$(ssh root$host "sudo podman ps | grep -v IMAGE | wc -l")
#  if [[ $cnt -eq $CON_CNT ]]; then
#    echo "The $host containers count is $cnt. This is normal."
#  else
#    echo "Please check container's status"
#    ssh root@$host "sudo docker ps"
#    ssh root@$host "sudo podman ps"
#  fi

#  echo ">>>>>> Check service logs <<<<<<"
#  ssh root@$host "echo 'tail /var/log/containers/*/*.log | grep -i error | wc -l' > mon-logs.sh"
#  err_cnt=$(ssh root@$host "sudo sh mon-logs.sh")
#  if [[ $err_cnt -eq 0 ]]; then
#    echo "The $host has no error logs. This system is normal."
#  else
#    echo "Please check service logs"
#    ssh root@$host "echo 'tail /var/log/containers/*/*.log | grep -i error' > mon-logs.sh"
#    ssh root@$host "sudo sh mon-logs.sh"
#  fi
#done

# 컴퓨트 노드 점검
for host in $COM_HOSTS
do
    echo "========== $host ============"
  echo ">>>>>> Check container's status <<<<<<"
#  cnt=$(ssh heat-admin@$host "sudo docker ps | grep -v IMAGE | wc -l")
  cnt=$(ssh heat-admin@$host "sudo podman ps | grep -v IMAGE | wc -l")
  if [[ $cnt -eq $COM_CNT ]]; then
    echo "The $host containers count is $cnt. This is normal."
  else
    echo "Please check container's status"
#    ssh heat-admin@$host "sudo docker ps"
    ssh heat-admin@$host "sudo podman ps"
  fi

  echo ">>>>>> Check Compute node Status <<<<<<"
  echo "===========$host uptime==========="
  ssh heat-admin@$host "sudo uptime"
  echo "===========$host disk status==========="
  ssh heat-admin@$host "sudo df -h"
  echo "===========$host network status==========="
  echo "=====all stat======"
  ssh heat-admin@$host "sudo ip a |grep DOWN ; arp -n |grep incomplete ; ip ro"
  echo "=====ping test======"
  ssh heat-admin@$host "sudo ping $(sudo route |grep default |awk '{print $2}') -c 3 |tail -3"
  ssh heat-admin@$host "sudo ping 8.8.8.8 -c 3 |tail -3"
  echo "===========$host iscsiadm status==========="
  ssh heat-admin@$host "sudo netstat -an | grep 7072"
  ssh heat-admin@$host "sudo iscsiadm -m session -P 3 |grep State"
  echo "===========$host mdm status==========="
  ssh heat-admin@$host "sudo /opt/emc/scaleio/sdc/bin/drv_cfg --query_mdms"
  echo "===========$host chronyc status==========="
  ssh heat-admin@$host "sudo date ; sudo chronyc tracking ; sudo chronyc sources ; sudo chronyc sourcestats"
  echo "================================="

  echo ">>>>>> Check service logs <<<<<<"
  ssh heat-admin@$host "echo 'tail /var/log/containers/*/*.log | grep -i error | wc -l' > mon-logs.sh"
  err_cnt=$(ssh heat-admin@$host "sudo sh mon-logs.sh")
  if [[ $err_cnt -eq 0 ]]; then
    echo "The $host has no error logs. This system is normal."
  else
    echo "Please check service logs"
    ssh heat-admin@$host "echo 'tail /var/log/containers/*/*.log | grep -i error' > mon-logs.sh"
    ssh heat-admin@$host "sudo sh mon-logs.sh"
  fi
done

#echo "#============================"
#echo "# Check OpenStack Services "
#echo "#============================"
#source /home/stack/overcloudrc
source ~/overcloudrc

echo "openstack compute service list"
openstack compute service list
echo "openstack volume service list"
openstack volume service list
echo "openstack network agent list"
openstack network agent list

echo "#============================"
echo "# Check VM STATUS in HOST "
echo "#============================"

touch ~/chost.list ;
for chost_ip in $COM_HOSTS ;
do
openstack hypervisor list --long |grep -w "$chost_ip" |awk '{print $4}' >> ~/chost.list ;
done
for X in $(openstack server list --all --long |grep "$(cat ~/chost.list)" |awk '{print $2}') ;
do
echo "==========$X==========";
openstack server show $X |egrep '(name|status|hypervisor_hostname)' |egrep -v '(name=|:hostname|_name|key|flavor|security)' |grep -v '^$' |grep -v -e '^[[:space:]]*$' |awk '{ gsub(/ /,""); print }' ;
done

cat /dev/null > ~/chost.list
cat ./mon.sh.temp |grep -v "*" > ./mon.sh.log
rm ./mon.sh.temp