三剑客实战Nginx日志分析

修改作业如下:

url_summary(){

awk '{print $7}' nginx.log | sed -E 's@/[0-9]+@/int@g;s/[0-9a-z]{8}-[0-9a-z]{4}-[0-9a-z]{4}-[0-9a-z]{4}-[0-9a-z]{12}.png/id.png/g;s/[0-9a-z]{8}-[0-9a-z]{4}-[0-9a-z]{4}-[0-9a-z]{4}-[0-9a-z]{12}.jpg/id.jpg/g;s/[0-9a-z]{8}-[0-9a-z]{4}-[0-9a-z]{4}-[0-9a-z]{4}-[0-9a-z]{12}.gif/id.gif/g' | sort | uniq -c | sort -nr | head -20

}

课后作业 url_summary

找出访问量最高的页面地址 借助于sed的统计分析

url_summary(){
awk ‘{print $7}’ nginx.log | sed -e ‘s@/topics/[0-9]@/topics/int@g;s@/replies/[0-9]@/replies/int@g;s@[a-zA-Z0-9-].png@/id.png@g;s@[a-zA-Z0-9-].jpg@/id.jpg@g’ | sort | uniq -c | sort -rn | head -20
}

ulr_summary(){
awk ‘{print$7}’ nginx.log | sed -e ‘s@/topics/[0-9]@/topics/int@g;s@/replies/[0-9]@/replies/int@g;s@/avatar/[0-9]@/avatar/int@g;s@/[a-zA-Z0-9-].jpg@/id.jpg@g;s@/[a-zA-Z0-9-]*.png@/id.png@g’| sort | uniq -c | sort -nr | head -20
}

url_summary(){ awk ‘{print$7}’ nginx.log | sed -e ‘s#/topics/[0-9]#/topics/int#g;s#/replies/[0-9]#/replies/int#g;s#/avatar/[0-9]#/avatar/int#g;s#/[a-zA-Z0-9-].jpg#/id.jpg#g;s#/[a-zA-Z0-9-]*.png#/id.png#g’| sort | uniq -c | sort -nr | head -20;}
image

url_summary(){
awk '{print $7}' nginx.log | sed -E 's@/[0-9]+@/int@g;s/[a-zA-Z0-9-]+.jpg/id.jpg/g;s/[a-zA-Z0-9-]+.png/id.png/g;s/[a-zA-Z0-9-]+.gif/id.gif/g' | sort | uniq -c | sort -nr | head -20;
}

url_summary() { awk ‘{print $7}’ nginx.log | sed -E ‘s@/[0-9]+@/int@g;s/[a-zA-Z0-9-]+.jpg/id.jpg/g;s/[a-zA-Z0-9-]+.png/id.png/g;s/[a-zA-Z0-9-]+.gif/id.gif/g’ | sort | uniq -c | sort -nr | head -20; }

url_summary() { awk ‘{print $7}’ /tmp/nginx.log | sed ‘s#/topics/[0-9]#/topics/int#g;s#/replies/[0-9]#/replies/int#g;s#/[0-9a-z].png#/id.png#g;s#/[0-9a-z].jpg#/id.jpg#;s#/[0-9a-z]*.gif#/id.gif#g’ | sort | uniq -c | sort -nr |head -20; }

url_summary(){ awk ‘{print $7}’ nginx.log |sed -E ‘s@[0-9]+@int@g;s@/_img/uploads/photo/2018/[a-z0-9]+.png!large@/_img/uploads/photo/2018/id.png!large@g;s@/topics/[0-9]+@/topocs/int@g’|sort|uniq -c|sort -nr|head -20;}

url_summary(){ awk '{print $7}' nginx.log | sed -E 's@/[0-9]+@/int@g;s@/[0-9a-zA-Z-]+.png@/id.png@;s@/[0-9a-zA-Z-]+.jpg@/id.jpg@;s@/[0-9a-zA-Z-]+.gif@/id.gif@;s@/[0-9a-zA-Z-]+.jpeg@/id.jpeg@'|sort|uniq -c|sort -nr|head -20;}

func(){
> awk '{print $7}' nginx.log |sed -E 's#/2018/\w{8}-\w{4}-\w{4}-\w{4}-\w{12}#/2018/id#g;s#/topics/[0-9]+#/topics/int#g;s#/replies/[0-9]+#/replies/int#g' |sort|uniq -c|sort -nr|head -20
> }

image

url_summary() {awk ‘{print $7}’ nginx.log | sed -E ‘s/[0-9]+ /int/g;s/[0-9a-z-]+.png/id.png/g;s/[0-9a-z-]+.jpg/id.jpg/g’ | sort | uniq -c | s ort -nr | head -20; }
image

课间作业:
(1)找出log中的404 500的报错:
find_error_log() {
grep -E ‘(" 404 |" 500)’ nginx.log
}


(2)找出500错误时候的上下文:
find_before(){
grep ‘\s500\s’ -C 1 nginx.log
}

找出500错误的前两行:
find_before(){
grep ‘\s500\s’ -B 2 nginx.log
}

(3)找出访问量最高的ip, 统计分析,取出top3
find_top_3(){
awk ‘{print$1}’ nginx.log | sort | uniq -c | sort -nr | head -3
}
image
课后作业:
找出访问量最高的页面地址 借助于sed的统计分析
/topics/16689/replies/124751/edit 把数字替换为 /topics/int/replies/int/edit
/_img/uploads/photo/2018/c54755ee-6bfd-489a-8a39-81a1d7551cbd.png!large 变成 /_img/uploads/photo/2018/id.png!large
/topics/9497 改成 /topics/int
url_summary(){
cat nginx.log | grep ‘/topics/’ nginx.log | sed -e ‘s@/replies/[0-9]@/replies/int@’ | sed -e 's@/topics/[0-9]@/topics/int@’ | sed -e ‘s/[a-zA-Z0-9-]+.png/id.png/g’ | awk ‘{print $7}’ | sort | uniq -c | sort -nr | head -20
}

url_summary() { 
    awk -F ' ' '{print $7}' /tmp/nginx.log | \
    sed -E 's#/[0-9]+#/int#g;s#[a-z0-9-]+.jpg#id.jpg#g;s#[a-z0-9-]+.png#id.png#g;s#[a-z0-9-]+.gif#id.gif#g' | \
    sort | uniq -c | sort -nr | head -20; 
}

image

url_summary() { awk ‘{print $7}’ nginx.log | sed -E ‘s@/[0-9]+@/int@g;s/[a-zA-Z0-9-]+.jpg/id.jpg/g;s/[a-zA-Z0-9-]+.png/id.png/g;s/[a-zA-Z0-9-]+.gif/id.gif/g’ | sort | uniq -c | sort -nr | head -20; }

image

url_summary(){ awk '{print $7}' nginx.log | sed -e 's#/topics/[0-9]\+#/topics/int#g' | sed -e 's#/replies/[0-9]\+#/replies/int#g' | sed -E 's/[a-zA-Z0-9-]+.(png|jpg)/id.png/g' |sort |uniq -c | sort -nr | head -20;}

#张琴作业

image image

url_summary() { 
awk '{print $7}' nginx.log | sed -E 's@/[0-9]+@/int@g;s/[a-zA-Z0-9-]+.jpg/id.jpg/g;s/[a-zA-Z0-9-]+.png/id.png/g;s/[a-zA-Z0-9-]+.gif/id.gif/g' | sort | uniq -c | sort -nr | head -20; 
}

image

实现代码:
url_summary(){ awk ‘{print $7}’ nginx.log | sed -E ‘s@/topics/[0-9]@/topics/int@ ; s@/replies/[0-9]@/replies/int@ ; s#/[a-zA-Z0-9-]+.png#/id.png# ; #/topics/[0-9]*#/topics/int#’ | sort | uniq -c | sort -nr | head -20;}
实现结果:

url_summary(){ awk ‘{print $7}’ nginx.log |sed -E ‘s#/topics/[0-9]#/topics/int#g;s#/replies/[0-9]#replies/int#g;s#([0-9a-z-]).png#int.png#g;s#([0-9a-z-]).jpg#int.jpg#’|sort|uniq -c|sort -nr|head -20 }
image