Linux与Shell实战之Nginx日志分析

less nginx.log | grep "/topics" | awk '{sum+=$17} END {print sum/NR}'
grep "/topics" nginx.log | awk '{print $(NF-1)}' | grep -P '\d{3}?$' | awk '{sum += $0;}END {if(sum==0)print 0;else print sum/NR}'
1 个赞
url_summary(){
cat nginx.log |awk '{print $11}' | sort| uniq -c |sort -nr | head -10 | sed 's/((2(5[0-5]|[0-4]\d))|[0-1]?\d{1,2})(\.((2(5[0-5]|[0-4]\d))|[0-1]?\d{1,2})){3}/ip/g'
}
url_summary() { 
    awk '{print $7}' nginx.log |sed -r 's/\/[0-9]+\//\/int\//g;s/\/[0-9a-z-]+\./\/id\./;s/\/[0-9]+$/\/int/'|sort|uniq -c|sort -rn|head -10; 
}
@shell.ceshiren.com ~]$ url_summary 
    584 /cable
    237 /topics/int
    147 /topics/int/replies/int/edit
     94 /
     44 /uploads/photo/int/id.png!large
     42 /topics/int/replies/int/reply_suggest
     41 /topics/int/show_wechat
     40 /_img/uploads/photo/int/id.png!large
     24 /topics/int/replies/int/reply_to
     21 /uploads/user/avatar/id.jpg!md

课后作业 url聚类 url_summary

函数:

url_summary ()
{
    awk '{print $7}' nginx.log | sed -E 's/[A-Za-z0-9\-]+\.png/id\.png/g;s/[A-Za-z0-9\-]+\.jpg/id\.jpg/g;s/[A-Za-z0-9\-]+\.gif/id\.gif/g;s/\/[0-9]+/\/int/g;s/page\=[0-9]+/page\=int/g;s/code\=[A-Za-z0-9]+/code\=id/g;s/state\=[A-Za-z0-9]+/state\=id/g;s/client_id\=[A-Za-z0-9]+/client_id\=id/g' | sort | uniq -c | sort -rn | head -10
}

函数结果:

@shell.ceshiren.com ~]$ url_summary
    584 /cable
    237 /topics/int
    147 /topics/int/replies/int/edit
     94 /
     44 /uploads/photo/int/id.png!large
     42 /topics/int/replies/int/reply_suggest
     41 /topics/int/show_wechat
     40 /_img/uploads/photo/int/id.png!large
     37 /topics/int?locale=zh-CN
     33 /topics/int?locale=en
1 个赞
find_before_and_after() {
    grep -A1 -B1 ' 500 ' nginx.log; 
}
@shell.ceshiren.com ~]$ find_before_and_after 
139.180.132.174 - - [05/Dec/2018:00:09:20 +0000] "GET /bbs.zip HTTP/1.1" 404 1264 "-" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36" 0.011 0.011 .
139.180.132.174 - - [05/Dec/2018:00:09:12 +0000] "GET /__zep__/js.zip HTTP/1.1" 500 2183 "-" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36" 0.018 0.018 .
141.8.142.131 - - [05/Dec/2018:00:09:12 +0000] "GET /topics/14442 HTTP/1.1" 200 21980 "-" "Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)" 0.195 0.195 .
url_summary(){ awk '{print $7}' nginx.log |grep  topics.*edit | awk -F / '{print $0}'  |sed -e 's/\/topics\/[0-9].*\/replies\/[0-9].*/\/topics\/int\/replies\/int/'| less; }

image

url_summary()
{
awk '{print $7}' nginx.log | sed 's#/topics/[0-9]*/replies/[0-9]*/edit#/topics/int/replies/int/edit#g' |sed 's#/_img/uploads/photo/2018/[0-9a-zA-Z]*-.*#/_img/uploads/photo/2018/id.png!large#g'|sed 's#/topics/[0-9]*#/topics/int#g'|sort|uniq -c|sort -rn |head -10
}

课后作业

  • 函数如下
url_summary() 
{ 
     awk '{print $7}' ~/nginx.log | sed -r '/\/photo\//!s/(\/|offset=|page=)[0-9]+([\/\?$]?)/\1int\2/g;/(^\/[a-zA-Z_0-9]+$)|([0-9]+$)|(int$)/!s/(\/)([a-zA-Z0-9]+|[a-zA-Z0-9\-]+)(\.png|\.jpg|\.gif|\.jpeg)/\1id\3/g' | sort | uniq -c | sort -nrk 1| head -10;
}
1 个赞

找出500错误时候的上下文,找出500错误记录的前一行与后一行 考察grep高级用法
find_before_and_after(){ grep -1a " 500" nginx.log; }

课后作业
url聚类函数:

url_summary () 
{ 
    awk '{print $7}' nginx.log | sed -r 's/\/topics\/[0-9]/\/topics\/int/g;s/\/replies\/[0-9]+\//\/relies\/int+\//g;s/\/[0-9a-zA-Z-]+\./\/id\./;s/\/[0-9]+$/\/int/' | sort | uniq -c | sort -rn | head -10
}

执行结果:
image

url_summary(){ 
awk '{print $7}' nginx.log|sed -E 's#\/[0-9]+\/#\/int\/#g;s#\/[0-9a-z-]+\.#\/id\.#g;s#\/[0-9]+$#\/int#g'|sort|uniq -c|sort -rn|head -10;
}
url_summary(){
    cat nginx.log | awk '{print $7}' | sed  -r 's/[A-Za-z0-9\-]+\.png/id\.png/g; s/[A-Za-z0-9\-]+\.jpg/id\.jpg/g;s/[0-9]+/int/g' |  sort| uniq -c |sort -nr | head -10 ;
}
url_summary() { awk '{print $7}' nginx.log | sed -r 's/\/topics\/[0-9]+/\/topics\/int/g;s/\/replies\/[0-9]+/\/replies\/int/g;s/\/_img\/uploads\/photo\/[0-9]+\/[0-9a-zA-Z-]+\./\/_img\/uploads\/photo\/id\./g' | sort | uniq -c | sort -nr | head -10;  }

image

课后作业 url聚类 url_summary

url_summary(){ awk '{print $7}' nginx.log | sed -r 's/[^/]*(jpg|gif|jpeg|png)/picID/;s|/topics/[0-9]+|/topics/int|;s|/replies/[0-9]+|/replies/int|;s|/avatar/[0-9]+|/avatar/int|;s|page=[0-9]+|page=int|;s|offset=[0-9]+|offset=int|' | sort | uniq -c | sort -nr | head -10;}

url_summary(){ awk '{print $7}' nginx.log | sed -r 's/\/topics\/[0-9]+\/replies\/[0-9]+/\/topics\/int\/replies\/int/g;s/[0-9a-zA-Z-]+(.\png|.\gif|.\jpeg|.\jpg)/id\1/g;s/\/topics\/[0-9]+/\/topics\/int/g' | sort | uniq -c | sort -nr | head -10 }

url_summay(){ }课后作业:

url_summary () 
{ 
    awk '{print $7}' nginx.log | sed 's/\?.*//g' | sed 's/%21/\!/g' | sed -r 's/\/avatar\/[0-9]*/\/avatar\/int/g' | sed -r 's/\/topics\/[0-9]*/\/topics\/int/g' | sed -r 's/\/replies\/[0-9]*/\/replies\/int/g' | sed -r 's/\/[0-9a-zA-Z\-]*\.png/\/id.png/g' | sed -r 's/\/[0-9a-zA-Z\-]*\.jpg/\/id.jpg/g' | sed -r 's/\/[0-9a-zA-Z\-]*\.jpeg/\/id.jpeg/g' | sed -r 's/\/[0-9a-zA-Z\-]*\.gif/\/id.gif/g' | sort | uniq -c | sort -nr | head -10
}

url_summary(){  awk '{print $7}' nginx.log | sed 's#[0-9]\+#int#g' | sed -r 's#(.+)/[^/].*(png|jpg|gif|jpeg.*)#\1/id\.\2#g' | sort | uniq -c | sort -nr | head -10; }

image

url_summary(){ awk '{print $7}' nginx.log | sed -r 's#/(topics|replies|avatar)/[0-9]+#/\1/int#g' | sed -r 's#(.+)/[^/].*(png|jpg|gif|jpeg.*)#\1/id\.\2#g' | sort | uniq -c | sort -nr | head -10;}

image

不明白为什么结果不一样。。。。

课后作业:

url_summary(){ 
awk '{print $7}' nginx.log | sed 's/ics\/[0-9]\{2,\}/ics\/int/g' | sed 's/ies\/[0-9]\{2,\}/ies\/int/g' | sed 's/tar\/[0-9a-zA-Z]\{1,\}/tar\/int/g' | sed 's/[A-Za-z0-9\-]\{1,\}\.png/id.png/g' | sed 's/[A-Za-z0-9\-]\{1,\}\.jpg/id.jpg/g' |sed 's/[A-Za-z0-9\-]\{1,\}\.gif/id.gif/g' | sort | uniq -c | sort -nr |head -n10; 
}

结果:
584 /cable
237 /topics/int
147 /topics/int/replies/int/edit
94 /
42 /topics/int/replies/int/reply_suggest
41 /topics/int/show_wechat
37 /topics/int?locale=zh-CN
33 /topics/int?locale=en
28 /topics/int?locale=zh-TW
26 /uploads/photo/2017/id.png!large

课后作业
`
url_summary(){
awk ‘{print$7}’ nginx.log |sed -e ‘s/16689/int/g’ -e ‘s/124751/int/g’ -e ‘s/c54755ee-6bfd-489a-8a39-81a1d7551cbd/id/g’ -e ‘s/9497/int/g’ |sort |uniq -c |sort -nr
}

`