三剑客实战Nginx日志分析

url_summary(){
awk ‘{print $7}’ nginx.log | sed ‘s/[0-9a-z]{8}-[0-9a-z]{4}-[0-9a-z]{4}-[0-9a-z]{4}-[0-9a-z]{12}.png/id.png/g;s/[0-9a-z]{8}-[0-9a-z]{4}-[0-9a-z]{4}-[0-9a-z]{4}-[0-9a-z]{12}.jpg/id.jpg/g;s/[0-9a-z]{8}-[0-9a-z]{4}-[0-9a-z]{4}-[0-9a-z]{4}-[0-9a-z]{12}.gif/id.gif/g;s//[0-9]{1,}//int/g’ | sort | uniq -c | sort -nr | head -20
}

linux实战作业

url_summary(){
cat nginx.log | sed -e 's@/topics/[0-9]*$@/topics/int@g;s#/topics/[0-9]*/replies/[0-9]*/edit#/topics/int/replies/int#g;s@/topics/[0-9]*@/topics/int@g;s@/_img/uploads/photo/[0-9]*/[0-9a-z]*-[0-9a-z]*-[0-9a-z]*-[0-9a-z]*-[0-9a-z]*.png!large@ _img/uploads/photo/year/id.png!large@g;s#/_img/uploads/user/avatar/[0-9]*/[0-9a-z]*.jpg!sm#_img/uploads/user/avatar/int/id.jpg!sm#g;s@/topics/int?order_by=created_at&@/topics/int?order_by=created_at@g;s#/topics/int?order_by=like&#/topics/int?order_by=like#g;s@/uploads/user/avatar/[0-9]*/[0-9a-z]*.jpg!sm@/uploads/user/avatar/int/id.jpg!sm@g' | awk '{print $7}' | sort |uniq -c |sort -nr |head -20}

Linux作业_看图王

url_summary(){ awk ‘{print $7}’ nginx.log | sed -r ‘/^/topic/s@[0-9]+@int@g;s@[0-9a-zA-Z-]+.png@id.png@;s@[0-9a-zA-Z-]+.jpg@id.jpg@;s@[0-9a-zA-Z-]+.gif@id.gif@’ | sort | uniq -c | sort -nr | head -20;}

find_error

find_error() {
cat nginx.log | grep '[[:space:]]\+\(404\|500\)'

}

find_before

find_before(){
	cat nginx.log | grep -B2 '[[:space:]]\+500'
}

find_top_3

find_top_3(){
	cat nginx.log | awk '{print $1}' | sort | uniq -c | sort -nr | head -3
}

url_summary()

url_summary(){ 
    awk '{print $7}' /tmp/nginx.log | sed 's@/topics/[0-9]*/replies/[0-9]*/edit@/topics/int/replies/int/edit@;s@/[a-zA-Z0-9]*.jpg@/id.jpg@;s@/topics/[0-9]*@/topic/int@;s@?locale.*@?locale@;s@?order_by.*@?order_by@' | sort | uniq -c |sort -nr | head -20; 
}

image

/topics/16689/replies/124751/edit 把数字替换为 /topics/int/replies/int/edit

echo "/topics/16689/replies/124751/edit" |sed -r 's/[0-9]{5,}/int/g'

/_img/uploads/photo/2018/c54755ee-6bfd-489a-8a39-81a1d7551cbd.png!large 变成/_img/uploads/photo/2018/id.png!large

echo "/_img/uploads/photo/2018/c54755ee-6bfd-489a-8a39-81a1d7551cbd.png\!large"|sed -r 's/[0-9a-z]*-[0-9a-z]*-[0-9a-z]*-[0-9a-z]*-[0-9a-z]*/id/g'

/topics/9497 改成 /topics/int

echo "/topics/9497t" |sed -r 's/[0-9]{5,}/int/g'

其他规则参考如上输出

url pattern对应的请求数量取出top 20请求量的url pattern类似nnn urlxxxmmm urlxxx

url_summary(){ grep '/topics/' nginx.log |sed 's@/topics/[0-9]*@/topics/number@'|awk '{print $7}'|sort|uniq -c|sort -nr|head -20 ;}

cat nginx.log | awk ‘{print $7}’ | sed -E ‘s/[0-9]+/int/g;s/[0-9a-z-]+.png/id.png/g;s/[0-9a-z-]+.jpg/id.jpg/g;s/[0-9a-z-]+.gif/id.gif/g’ | sort | uniq -c | sort -nr | head -20

修改作业如下:

url_summary(){

awk '{print $7}' nginx.log | sed -E 's@/[0-9]+@/int@g;s/[0-9a-z]{8}-[0-9a-z]{4}-[0-9a-z]{4}-[0-9a-z]{4}-[0-9a-z]{12}.png/id.png/g;s/[0-9a-z]{8}-[0-9a-z]{4}-[0-9a-z]{4}-[0-9a-z]{4}-[0-9a-z]{12}.jpg/id.jpg/g;s/[0-9a-z]{8}-[0-9a-z]{4}-[0-9a-z]{4}-[0-9a-z]{4}-[0-9a-z]{12}.gif/id.gif/g' | sort | uniq -c | sort -nr | head -20

}

课后作业 url_summary

找出访问量最高的页面地址 借助于sed的统计分析

url_summary(){
awk ‘{print $7}’ nginx.log | sed -e ‘s@/topics/[0-9]@/topics/int@g;s@/replies/[0-9]@/replies/int@g;s@[a-zA-Z0-9-].png@/id.png@g;s@[a-zA-Z0-9-].jpg@/id.jpg@g’ | sort | uniq -c | sort -rn | head -20
}

ulr_summary(){
awk ‘{print$7}’ nginx.log | sed -e ‘s@/topics/[0-9]@/topics/int@g;s@/replies/[0-9]@/replies/int@g;s@/avatar/[0-9]@/avatar/int@g;s@/[a-zA-Z0-9-].jpg@/id.jpg@g;s@/[a-zA-Z0-9-]*.png@/id.png@g’| sort | uniq -c | sort -nr | head -20
}

url_summary(){ awk ‘{print$7}’ nginx.log | sed -e ‘s#/topics/[0-9]#/topics/int#g;s#/replies/[0-9]#/replies/int#g;s#/avatar/[0-9]#/avatar/int#g;s#/[a-zA-Z0-9-].jpg#/id.jpg#g;s#/[a-zA-Z0-9-]*.png#/id.png#g’| sort | uniq -c | sort -nr | head -20;}
image

url_summary(){
awk '{print $7}' nginx.log | sed -E 's@/[0-9]+@/int@g;s/[a-zA-Z0-9-]+.jpg/id.jpg/g;s/[a-zA-Z0-9-]+.png/id.png/g;s/[a-zA-Z0-9-]+.gif/id.gif/g' | sort | uniq -c | sort -nr | head -20;
}

url_summary() { awk ‘{print $7}’ nginx.log | sed -E ‘s@/[0-9]+@/int@g;s/[a-zA-Z0-9-]+.jpg/id.jpg/g;s/[a-zA-Z0-9-]+.png/id.png/g;s/[a-zA-Z0-9-]+.gif/id.gif/g’ | sort | uniq -c | sort -nr | head -20; }

url_summary() { awk ‘{print $7}’ /tmp/nginx.log | sed ‘s#/topics/[0-9]#/topics/int#g;s#/replies/[0-9]#/replies/int#g;s#/[0-9a-z].png#/id.png#g;s#/[0-9a-z].jpg#/id.jpg#;s#/[0-9a-z]*.gif#/id.gif#g’ | sort | uniq -c | sort -nr |head -20; }

url_summary(){ awk ‘{print $7}’ nginx.log |sed -E ‘s@[0-9]+@int@g;s@/_img/uploads/photo/2018/[a-z0-9]+.png!large@/_img/uploads/photo/2018/id.png!large@g;s@/topics/[0-9]+@/topocs/int@g’|sort|uniq -c|sort -nr|head -20;}

url_summary(){ awk '{print $7}' nginx.log | sed -E 's@/[0-9]+@/int@g;s@/[0-9a-zA-Z-]+.png@/id.png@;s@/[0-9a-zA-Z-]+.jpg@/id.jpg@;s@/[0-9a-zA-Z-]+.gif@/id.gif@;s@/[0-9a-zA-Z-]+.jpeg@/id.jpeg@'|sort|uniq -c|sort -nr|head -20;}

func(){
> awk '{print $7}' nginx.log |sed -E 's#/2018/\w{8}-\w{4}-\w{4}-\w{4}-\w{12}#/2018/id#g;s#/topics/[0-9]+#/topics/int#g;s#/replies/[0-9]+#/replies/int#g' |sort|uniq -c|sort -nr|head -20
> }

image

url_summary() {awk ‘{print $7}’ nginx.log | sed -E ‘s/[0-9]+ /int/g;s/[0-9a-z-]+.png/id.png/g;s/[0-9a-z-]+.jpg/id.jpg/g’ | sort | uniq -c | s ort -nr | head -20; }
image

课间作业:
(1)找出log中的404 500的报错:
find_error_log() {
grep -E ‘(" 404 |" 500)’ nginx.log
}


(2)找出500错误时候的上下文:
find_before(){
grep ‘\s500\s’ -C 1 nginx.log
}

找出500错误的前两行:
find_before(){
grep ‘\s500\s’ -B 2 nginx.log
}

(3)找出访问量最高的ip, 统计分析,取出top3
find_top_3(){
awk ‘{print$1}’ nginx.log | sort | uniq -c | sort -nr | head -3
}
image
课后作业:
找出访问量最高的页面地址 借助于sed的统计分析
/topics/16689/replies/124751/edit 把数字替换为 /topics/int/replies/int/edit
/_img/uploads/photo/2018/c54755ee-6bfd-489a-8a39-81a1d7551cbd.png!large 变成 /_img/uploads/photo/2018/id.png!large
/topics/9497 改成 /topics/int
url_summary(){
cat nginx.log | grep ‘/topics/’ nginx.log | sed -e ‘s@/replies/[0-9]@/replies/int@’ | sed -e 's@/topics/[0-9]@/topics/int@’ | sed -e ‘s/[a-zA-Z0-9-]+.png/id.png/g’ | awk ‘{print $7}’ | sort | uniq -c | sort -nr | head -20
}

url_summary() { 
    awk -F ' ' '{print $7}' /tmp/nginx.log | \
    sed -E 's#/[0-9]+#/int#g;s#[a-z0-9-]+.jpg#id.jpg#g;s#[a-z0-9-]+.png#id.png#g;s#[a-z0-9-]+.gif#id.gif#g' | \
    sort | uniq -c | sort -nr | head -20; 
}

image