1. 获取ip、cpu、内存信息、loadAverage
ip使用ifconfig获得
localip=$(ifconfig eth0 | grep 'inet addr' | awk '{print $2}' | cut -f2 -d:)
空闲cpu由mpstat获得
cpuidle=$(mpstat | grep all | awk '{print $11}')
内存使用情况由free获得
freemem=$(free | grep Mem | awk '{print $4}')
totalmem=$(free | grep Mem | awk '{print $2}')
pcent=$(free | grep Mem | awk '{print $4/$2}')
loadAverage由uptime获得
loadavg1=$(uptime | awk '{print $10}' | cut -f1 -d,)
loadavg5=$(uptime | awk '{print $11}' | cut -f1 -d,)
loadavg15=$(uptime | awk '{print $12}')
. /etc/profile AlertNginxCpu=50 AlertNginxMem=50 AlertPcent=0.5 AlertCpu=50 AlertLoad=1 msg= Subject='服务器超载警报' mailto(){ /application/search/sendmail.py "$Subject" "$msg" msg= Subject='服务器超载警报' } date=$(date) localip=$(/sbin/ifconfig eth0 | grep 'inet addr' | awk '{print $2}' | cut -f2 -d:) nginxcpu=$(ps axu|grep nginx |grep search | grep worker | awk '{print $3}') nginxmempcent=$(ps axu|grep nginx |grep search | grep worker | awk '{print $4}') nginxmem=$(ps axu|grep nginx |grep search | grep worker | awk '{print $6}') cpuidle=$(mpstat | grep all | awk '{print $11}') freemem=$(free | grep Mem | awk '{print $4}') totalmem=$(free | grep Mem | awk '{print $2}') freemempcent=$(free | grep Mem | awk '{print $4/$2}') freeswap=$(free | grep Swap | awk '{print $4}') totalswap=$(free | grep Swap | awk '{print $2}') freeswappcent=$(free | grep Swap | awk '{print $4/$2}') loadavg1=$(uptime | awk '{print $10}' | cut -f1 -d,) loadavg5=$(uptime | awk '{print $11}' | cut -f1 -d,) loadavg15=$(uptime | awk '{print $12}') echo $date echo $localip echo '空闲cpu:'$cpuidle echo '空闲内存:'$freemem echo '总内存:'$totalmem echo '空闲内存比:'$freemempcent echo '空闲交换内存:'$freeswap echo '总交换内存:'$totalswap echo '空闲交换内存比:'$freeswappcent echo $loadavg1 echo $loadavg5 echo $loadavg15 alldata=''$date'| '$localip'| nginx:'$nginxcpu','$nginxmempcent'('$nginxmem')| cpu_idle:'$cpuidle'| Mem:'$freemem'/'$totalmem'='$freemempcent'| Swap:'$freeswap'/'$totalswap'='$freeswappcent'| loadavg:'$loadavg1','$loadavg5','$loadavg15'' echo $alldata >> monitor.log if [ $(echo "$nginxcpu >= $AlertNginxCpu"|bc) = 1 ]; then msg=''$alldata'| nginx cpu'$nginxcpu':高于'$AlertNginxCpu'' Subject=''$Subject':'$localip': nginx cpu'$nginxcpu'高于'$AlertNginxCpu'' echo $msg >> monitor.log mailto fi if [ $(echo "$nginxmempcent >= $AlertNginxMem"|bc) = 1 ]; then msg=''$alldata'| nginx mem'$nginxmempcent':高于'$AlertNginxMem'' Subject=''$Subject':'$localip': nginx mem'$nginxmempcent'高于'$AlertNginxMem'' echo $msg >> monitor.log mailto fi if [ $(echo "$freeswappcent <= $AlertPcent"|bc) = 1 ]; then msg=''$alldata'| 剩余虚拟内存占比'$freeswappcent':低于'$AlertPcent'' Subject=''$Subject':'$localip':剩余虚拟内存占比'$freeswappcent'低于'$AlertPcent'' echo $msg >> monitor.log mailto fi if [ $(echo "$cpuidle <= $AlertCpu"|bc) = 1 ]; then msg=''$alldata'| cpu空闲时间'$cpuidle':低于'$AlertCpu'' Subject=''$Subject':'$localip':cpu空闲时间'$cpuidle'低于'$AlertCpu'' echo $msg >> monitor.log mailto fi if [ $(echo "$loadavg1 >= $AlertLoad"|bc) = 1 ]; then msg=''$alldata'| load'$loadavg1':高于'$AlertLoad'' Subject=''$Subject':'$localip':load'$loadavg1'高于'$AlertLoad'' echo $msg >> monitor.log mailto fi if [ -n $msg ];then echo '系统运行正常' fi
2. 空闲cpu小于cpu报警阈值或空闲内存比例低于内存报警阈值或loadAverage超过阈值时发送邮件报警
#! /usr/bin/env python import smtplib import sys from email.mime.text import MIMEText mailto_list=[""] mail_host = "smtp.126.com" mail_user = "monitor_algo" mail_pass = "" mail_postfix="126.com" def send_mail(to_list, sub, context): me = mail_user + "<"+mail_user+"@"+mail_postfix+">" msg = MIMEText(context) msg['Subject'] = sub msg['From'] = me msg['To'] = ";".join(to_list) try: send_smtp = smtplib.SMTP() send_smtp.connect(mail_host) send_smtp.login(mail_user, mail_pass) send_smtp.sendmail(me, to_list, msg.as_string()) send_smtp.close() return True except (Exception, e): print(str(e)) return False if __name__=="__main__": # print ("start") # for a in range(1, len(sys.argv)): # print sys.argv[a] if (True == send_mail(mailto_list,sys.argv[1],sys.argv[2])): pass # print ("sucess") else: pass # print ("failed")
3. 加入crontab定时任务
注意一点,crontan中的环境变量与用户环境的环境变量不一样,因其不会从缺省的用户profile文件中读入环境变量参数,最简单的方法是用source命令(.)
*/1 * * * * . /etc/profile; sh /application/search/monitor.sh