三剑客实战Nginx日志分析

课后作业

url_summary() { awk '{print $7}' nginx.log  | sed -e 's@/topics/[0-9]*@/topics/int@' -e 's@/replies/[0-9]*/edit@/replies/int/edit@' -e 's@[0-9a-z-]*.png@id.png@' -e 's@[0-9a-z-]*.jpg@id.jpg@'  -e 's@[0-9a-z-]*.jpg@id.jpg@' |awk -F "?" '{print $1}'|sort | uniq -c | sort -nr |head -20; }

image

url_summary(){ awk '{print $7}' /tmp/nginx.log | sed 's@/topics/[0-9]*/replies/[0-9]*/edit@/topics/int/replies/int/edit@;s@/[a-zA-Z0-9]*.jpg@/id.jpg@;s@/topics/[0-9]*@/topic/int@;s@?locale.*@?locale@;s@?order_by.*@?order_by@' | sort | uniq -c |sort -nr | head -20; }

image

url_summary_lili(){
 awk '{print $7}' nginx.log \
|sed 's#/uploads/user/avatar.*#/uploads/user/avatar#' \
|sed 's#/uploads/photo/2018/.*large#/uploads/photo/2018/large#' \
|sed 's#/uploads/photo/2017/.*large#/uploads/photo/2017/large#'|sed 's#/topics/[0-9].*/replies/[0-9].*/edit#/topics/int/replies/int/edit#'|sed 's#/topics/[0-9].*/replies/[0-9].*/reply_suggest#/topics/int/replies/int/reply_suggest#'|sed 's#/api/v3/topics.*#/api/v3/topics#'|sed 's#/account/auth/github.*#/account/auth/github#'|sed 's#/topics/[0-9]/replies/[0-9].*/reply_to#/topics/int/replies/int/reply_to#'|sed 's#/topics/[0-9].*/show_wechat#/topics/num/show_wechat#'|sed 's#/haluuand/topics.*#/haluuand/topics#'|sed 's#/photo/.*#/photo/#'|sed 's#/syyair/following.*#/syyair/following#'|sed 's#/topics/int/replies/int/reply_suggest.*#/topics/int/replies/int/reply_suggest#'|sed 's#/topics/[0-9].*/replies/[0-9].*/reply_to#/topics/int/replies/int/reply_to#'|sed 's#/topics/[0-9][0-9].*#/topics/num#'|sed 's#/topics/node.*#/topics/node#'|sort|uniq -c|sort -nr|head ;}

课间作业

find_error_log

find_error_log() {
cat nginx.log | grep '[[:space:]]\+\(404\|500\)'

}

find_before

find_before(){
	cat nginx.log | grep -B2 '[[:space:]]\+500'
}

find_top_3

find_top_3(){
	cat nginx.log | awk '{print $1}' | sort | uniq -c | sort -nr | head -3
}

课后作业 url_summary

url_summary(){
	cat nginx.log |awk '{print $7}' |sed -E 's#^/[[:alpha:]]+/[0-9]+[^?,/]#topics/int#g' |sed -E 's#/replies/[0-9]+/edit#/replies/int/edit#g' | sed -E 's#(/[0-9]+)/([0-9,a-z,A-Z,-])+.(jpg|png)+(!large)#\1/id.\3\4#g' | sort | uniq -c | sort -nr | head -20
	}

image

url_summary(){ awk ‘{print $7}’ nginx.log | sed ‘s#?.*##’ | sed -E ‘s#/[0-9]+#/int#g;s#/[a-zA-Z0-9-]+.(jpg|png|jpeg|gif)#/id.\1#g’ | sort | uniq -c | sort -nr | head -20 ;}

课间作业
1:find_error_log(){ grep -E ‘(" 404 |" 500 )’ nginx.log ;}
2:find_before(){ grep ‘" 500 ’ -C 1 nginx.log ;}
3:find_top_10(){ awk ‘{print $1}’ nginx.log | sort |uniq -c | sort -nr | head -10 ;}
4:url_summary(){ awk ‘{print $7}’ /tmp/nginx.log | sed ‘s@/topics/[0-9]/replies/[0-9]/edit@/topics/int/replies/int/edit@;s@/[a-zA-Z0-9].jpg@/id.jpg@;s@/topics/[0-9]@/topic/int@’ | sort | uniq -c |sort -nr | head -20;}
课后作业
1:
awk ‘{print $7}’ /tmp/nginx.log | sed ‘s@/topics/[0-9]/replies/[0-9]/edit@/topics/int/replies/int/edit@;s@/[a-zA-Z0-9].jpg@/id.jpg@;s@/topics/[0-9]@/topic/int@’ | sort | uniq -c |sort -nr | head -1


2:
grep ‘/topics/16689/replies/124751/edit’ nginx.log | sed ‘s@/topics/[0-9]/replies/[0-9]/edit@/topics/int/replies/int/edit/@’
grep ‘/_img/uploads/photo/2018’ nginx.log | sed ‘s@/_img/uploads/photo/2018/c54755ee-6bfd-489a-8a39-81a1d7551cbd.png!large@/_img/uploads/photo/2018/id.png!large@’
grep ‘/topics/9497’ nginx.log | sed ‘s@/topics/[0-9]@/topics/int@’
3;
url_summary(){ awk ‘{print $7}’ /tmp/nginx.log | sed 's@/topics/[0-9]
/replies/[0-9]/edit@/topics/int/replies/int/edit@;s@/[a-zA-Z0-9].jpg@/id.jpg@;s@/topics/[0-9]*@/topic/int@’ | sort | uniq -c |sort -nr | head -20;}

url_summary(){ awk ‘{print $7}’ /tmp/nginx.log | sed -E ‘s@/[0-9]+@/int@g;s/[a-zA-Z0-9-]+.png/id.png/g’ | sort -n| uniq -c | sort -nr | head -20 ;}

s/// 当匹配内容出现 /的时候,可以用 @ 或者 # 替换。s###

url_summary() { awk '{print $7}' nginx.log | sed -e 's#/[0-9]*#/int#;s#[a-zA-Z0-9]*.jpg#id.jpg#;s@[a-zA-Z0-9-]*.png@id.png@;s/[a-zA-Z0-9-]*.gif/id.gif/' | sort | uniq -c | sort -nr | head -20; }

's#/[0-9]*#/int# 的后面没有加g

url_summary() { awk '{print $7}' nginx.log | sed -E 's#/[0-9]+#/int#g;s#[a-zA-Z0-9]*.jpg#id.jpg#g;s@[a-zA-Z0-9-]*.png@id.png@g;s/[a-zA-Z0-9-]*.gif/id.gif/g' | sort | uniq -c | sort -nr | head -20; }

这个是正确的

url_summary() { awk '{print $7}' nginx.log | sed -E 's#/[0-9]*#/int#g;s#[a-zA-Z0-9]*.jpg#id.jpg#g;s@[a-zA-Z0-9-]*.png@id.png@g;s/[a-zA-Z0-9-]*.gif/id.gif/g' | sort | uniq -c | sort -nr | head -20; }

这个是s#/[0-9]#/int#g中[0-9]后面用的是不是+,


总结:*是匹配零次多次,+是匹配1次或多次 g是全部替换,这几个没用好


作业内容:
url_summary(){ awk ‘{print $7}’ nginx.log |sed -E ‘s/[0-9]+/int/g;s/[0-9a-z]+.png/id.png/g;s/[0-9a-z]+.jpg/id.jpg/g;s/[0-9a-z].gif/id.gif/g’|sort|uniq -c|sort -nr|head -20;}

url_summary(){ awk '{print $7}' nginx.log
|sed -E 's#/\w*(-*\w*)+.png#/id.png#g'
|sed -E 's#/[0-9]+#/int#g'
|sed -E 's#\?.*##'
|sort|uniq -c|sort -nr|head -20;}

image

url_summary() { awk '{print $7}' nginx.log | sed -E 's@/[0-9]+@/int@g;s/[a-zA-Z0-9-]+.jpg/id.jpg/g;s/[a-zA-Z0-9-]+.png/id.png/g;s/[a-zA-Z0-9-]+.gif/id.gif/g' | sort | uniq -c | sort -nr | head -20; }

1607840875(1)

胡玉琦的课后作业

url_summary()
{cat nginx.log | sed -E 's@/topics/[0-9]+/replies/[0-9]+/edit@/topics/int/replies/int/edit@ ; s@/_img/uploads/photo/2018/.*.png!large@/_img/uploads/photo/2018/id.png!large@ ; s@/topics/[0-9]* @/topics/int @'|awk '{print $7}'|sort|uniq -c|sort -nr|head -20; }

url_summary(){ awk '{print $7}' nginx.log | sed 's@/topics/[0-9]*@/topics/int@; s@/replies/[0-9]*@/replices/int@; s@/photo/[0-9]*@/photo/2018@; s@/[0-9a-z-]*.png@/id.png@; s@/[0-9a-z-]*.jpg@/id.jpg@' | sort |uniq -c | sort -nr | head -20;}

访问量最高的页面地址 使用sed进行统计分析
url_summary(){
awk ‘{print $7}’ nginx.log | sed ‘s@/topics/[0-9]@/topics/int@’ | sed 's@/replies/[0-9]@/topics/int@’ | sed -E ‘s@/_img/uploads/photo/2018/[0-9a-z-]+.png@/_img/uploads/photo/2018/id.png@’ | sed -E ‘s@/[0-9a-z-]+.jpg@/id.jpg@’ |sort|uniq -c|sort -nr|head -20
}

url_summary() { awk '{print $7}' nginx.log|sed 's@/topics/[0-9]*@/topics/int@;s@/topics/int/replies/[0-9]*@/topics/int/replies/int@;s@/[0-9a-z-]*.png@/id.png@;s@/[0-9a-z-]*.gif@/id.gif@;s@/[0-9a-z-]*.jpg@/id.jpg@'|sort|uniq -c|sort -nr|head -20 ; }

url_summary()
{ awk ‘{print $7}’ nginx.log |sed -E ‘s@/topics/[0-9]@/topics/int@g; s@/replies/[0-9]@/replies/int@g;s@/avatar/[0-9]@/avatar/int@g;s@/[0-9a-z].jpg@/id.jpg@g;s@[0-9a-z]*.png@/id.png@g’ |sort|uniq -c| sort -nr|head -20;}

url_summary(){
awk ‘{print $7}’ nginx.log | sed ‘s@/topics/[0-9]@/topics/int@g;
s@/replies/[0-9]
@/topics/int@g;
s@/avatar/[0-9]*@/avatar/int@g;
s@/[a-zA-Z0-9-].jpg@/id.jpg@g;
s@/[a-zA-Z0-9-].png@/id.png@g’| sort | uniq -c | sort -nr |head -20; }

awk ‘{print $7}’ ./nginx.log| sed ‘s@/topics/[0-9]/replies/[0-9]/edit@/topics/int/replies/int/edit@’| sed ‘s@/img/uploads/photo/2018/[0-9a-z-]+.png!large@/_img/uploads/photo/2018/id.png!large@’| sed ‘s@/topics/[0-9]*@/topics/int@’|sort|uniq -c|sort -nr|head -20

代码:
url_summary() { awk ‘{print $7}’ nginx.log | sed -E ‘s@/topics/[0-9]@/topics/int@g; s@/replies/[0-9]@/replies/int@g; s@/avatar/[0-9]@/avatar/int@g; s@/[0-9a-z].jpg@/id.jpg@g; s@[0-9a-z]*.png@/id.png@g’ | sort | uniq -c | sort -nr | head -20;}
图片: