【直播】三剑客实战Nginx日志分析

less nginx.log | awk ‘$10’|sort -n|awk ‘NR<=3{print $1}’

find_top_3(){
sort nginx.log | uniq -c | sort -nr -t ’ ’ -k 1 | head -n 3
}

less nginx.log | awk ‘{sum+=$(NF-1)}END{print "Average = ", sum/NR}’

find_error_log() {

less nginx.log | awk -F " " '{print $1}' | uniq -c | sort -r | head -3

}

cat nginx.log |awk -F " " ‘{print $1}’ |uniq -c |sort -nr |head -n 3

awk ‘{print $1}’ nginx.log |sort |uniq -c|sort -nr|sed -n “1,3p”

less nginx.log | awk '$7 == "/"' | awk '{ sum += $(NF-1)} END {print sum/NR}'
2 个赞
url_avg_time() {
    awk '$7~/^\/$/' nginx.log | awk 'BEGIN{$i=$14}{$sum+=$i}END{print $sum/NR}'
}
url_avg_time() {
     less nginx.log | awk '{sum+=$(NF-1)}END{print "Average = ", sum/NR}'
}

less nginx.log |awk ‘$7=="/"{sum += $(NF-1);} END {print "average = "sum/NR}’

忘了筛选首页了。。。

less nginx.log | awk '{print $1}'

课后作业

  • 请求数量
url_summary(){
    less nginx.log | awk '{print $7}' | sed -E "s@/topics/[0-9]+@/topics/int@g" | sed -E "s@replies/[0-9]+@replies/int@g" | sed -E "s@/2018/[0-9a-z\-]+@/2018/id@g" | sed -E "s@/2017/[0-9a-z\-]+@/2017/id@g" | sed -E "s@/2016/[0-9a-z\-]+@/2016/id@g" | sed -E "s@/2015/[0-9a-z\-]+@/2015/id@g" | sed -E "s@/2014/[0-9a-z\-]+@/2014/id@g" | sed -E "s@/avatar/[0-9]+@/avatar/int@g" | sed -E "s@/avatar/int/[0-9a-z]+@/avatar/int/id@g" | sort | uniq -c
}
  • 前十排序
url_summary(){
    less nginx.log | awk '{print $7}' | sed -E "s@/topics/[0-9]+@/topics/int@g" | sed -E "s@replies/[0-9]+@replies/int@g" | sed -E "s@/2018/[0-9a-z\-]+@/2018/id@g" | sed -E "s@/2017/[0-9a-z\-]+@/2017/id@g" | sed -E "s@/2016/[0-9a-z\-]+@/2016/id@g" | sed -E "s@/2015/[0-9a-z\-]+@/2015/id@g" | sed -E "s@/2014/[0-9a-z\-]+@/2014/id@g" | sed -E "s@/avatar/[0-9]+@/avatar/int@g" | sed -E "s@/avatar/int/[0-9a-z]+@/avatar/int/id@g" | sort | uniq -c | sort -rn | head -10
}

cat nginx.log | awk ‘{print $1" “$7” "$9}’|sort -n|uniq -c |sort -n -r | head -10

  1. 找出log中的404,500的报错
find_error_log(){
    less nginx.log | awk '$9~/404|500/'
}

2.找出访问量最高的ip, 统计分析,取出top3的ip和数量

find_top_3(){
    less nginx.log | awk '{print $1}' | sort | uniq -c | sort -nr | head -n 3
}

3.找出首页访问的平均相应时间

url_avg_time(){
    less nginx.log | awk '$7 == "/"'| awk 'BEGIN{avg=0; print "avg:"}{print $(NF-1)"+";avg+=$(NF-1)} END{print avg/NR}'
}

课后作业:
找出访问量最高的页面地址 借助于sed的统计分析

/topics/16689/replies/124751/edit 把数字替换为 /topics/int/replies/int/edit
/_img/uploads/photo/2018/c54755ee-6bfd-489a-8a39-81a1d7551cbd.png!large 变成 /_img
/uploads/photo/2018/id.png!large
/topics/9497 改成 /topics/int
其他规则参考如上

输出

url pattern对应的请求数量
取出top 10请求量的url pattern

类似
288 url1
3000 url2

把数字替换成int
/topics/16689/replies/124751/edit 把数字替换为 /topics/int/replies/int/edit
/topics/9497 改成 /topics/int

less nginx.log | awk '{print $7}' | sed -E 's#([0-9]+)#int#g'

url pattern对应的请求数量
取出top 10请求量的url pattern

url_pattern(){
    less nginx.log | awk '{print $7}' | sort | uniq -c | sort -nr | head -10
}
url_topten() {
    less nginx.log | awk '{print $7}'| sed -r 's/([0-9]+)/int/g'|sed -r 's/^\/_img\/.*$/\/_img/g'|sed -r 's/^\/uploads.*large$/\/uploads/g'|sed -r 's/^\/uploads.*/\/uploads/g'|sort -n | uniq -c |sort -nr |head -n 10
}

cat nginx.log | awk ‘{print $7}’ | sed -e ‘s#[0-9]+/#test/#g’ \

-e ‘s#[?|!].*##g’ \

-e ‘s#[0-9]+$#test#g’ \

-e ‘s#[0-9a-z-]*.png#.png#g’ \

-e ‘s#[0-9a-z-]*.jpg#.jpg#g’ \

-e ‘s#[0-9a-z-]*.gif#.gif#g’ \

-e ‘s#[0-9a-z-]*.jpeg#.jpeg#g’ \

-e ‘s#[a-zA-Z0-9%&$@_]{15,}#:string:#g’ \

-e ‘s#^/[0-9a-zA-Z]*$#/user#g’ \

-e ‘s#%21large##g’ \

-e ‘s#/cable#/cable===#g’ \

-e ‘s#/hogwarts#/hogwarts===#g’ |

sed -E ‘s#[a-zA-Z0-9%&$@_]{20,}#:string:#g’ | sort | uniq -c | sort -nr | head -10

url_summary(){
    awk '{print $7}' nginx.log | sed 's/?.*$//' | sed 's/\/[0-9]\+/\/int/g' | sed 's/\/[0-9a-z-]\+\./\/id\./' | sort | uniq -c | sort -nr | head -10
}

本来这个应该够用了。。。但是匹配出来会有一些id.rar id.tar.gz id.json 这些按理说应该不能转成id,查了半天想在后面加(?!json)排除json,但是sed里面这个不成功,python里面用/[0-9a-z-]+\\.(?!json)是能成功匹配图片id类型排除掉.json的,不知道为啥。。

less nginx.log | awk '{print $7}' \
| sed "s@/topics/[0-9]*@/topics/int@g" \
| sed "s@/replies/[0-9]*@/replies/int@g" \
| sed "s@/2018/[a-z0-9\-]*.@/2018/id.@g" \
| sed "s@/2017/[a-z0-9\-]*.@/2017/id.@g" \
| sed "s@/2016/[a-z0-9]*.@/2016/id.@g" \
| sed "s@/2015/[a-z0-9]*.@/2015/id.@g"\
| sed "s@/2014/[a-z0-9]*.@/2014/id.@g" \
| sed "s@/avatar/[0-9]*@/avatar/int@g" \
| sed "s@/avatar/int/[0-9a-z]*.@/avatar/int/id.@g" \
| sort | uniq -c | sort -nr |head -n 10
less nginx.log |awk '{print $7}' |sed -r 's#[0-9]+#xxx#g' |sed -r 's#(^.*/)[0-9a-zA-Z-]+\.[a-z]{3,4}(.*$)#\1yyy.png\2#g' |sed -r 's#\?.*$##g' |sed -r 's#(^.*yyy.png).*$#\1#g'|sort |uniq -c |sort -nr |head -10