【直播】三剑客实战Nginx日志分析

url_summary(){
less nginx.log | awk '{print $7}' | sed "s#/topics/[0-9]*#/topics/int#g" | sed "s#/replies/[0-9]*#/replies/int#g" | sed "s#/[a-z0-9].*.png#/id.png#g" | sort | uniq -c | sort -nr | head -10
}

url_summary(){
less nginx.log | awk ‘{print$7}’ | sort | sed -e ‘s#/topics/[0-9]/replies/[0-9]/#/topics/int/replies/int/#g’ -e ‘s#/_img/uploads/photo/2018/.*.png!large#/_img/uploads/photo/2018/id.png!large#g’ | uniq -c | sort -nr | head -10
}

url_summary(){
less nginx.log | awk '{print $7}' | sed -e 's/^\(\/topics\/\)[0-9]*\(\/.*$\)/\1\int\2/; s/^\(\/topics\/\)[0-9]*$/\1int/; s/^\(\/topics\/\)[0-9]*(\?).*$/\1\int/p; s/\(\/replies\/\)[0-9]*\(\/.*$\)/\1int\2/; s#^\(\/_img.*\/photo\/20[1-2][0-9]\/\).*\(\.png.*$\)#\1id\2#' | sort | uniq -c | sort -nr | head -10;
}
url_summary(){
less nginx.log | awk '{print $7}' | sed 's#s/[0-9]*#s/int#g' | sed 's#/[a-z0-9]*.png#/id.png#g'| sed 's#/[0-9a-z]*.jpg#/id.jpg#g'| sort | uniq -c | sort -nr | head -10
}


课后作业:

 url_summary(){
    less nginx.log | awk '{print $7}' | sed -E -e 's#/[0-9]+#/int#g' -e 's#/[a-z0-9-]*\.jpe?#/id.jp#g' -e 's#/[0-9a-z-]*\.png#/id.png#g' -e 's#/[0-9a-z-]*\.gif#/id.gif#g' -e 's#\!.*#!xxx#g' -e 's#=.*#=xxx#g'| sort | uniq -c | sort -nr | head -10
    }
url_summary(){
less nginx.log | awk '{print $7}'| sed -r 's/([0-9]+)/id/g'|sed -r 's/^\/_img\/.*$/\/_img/g'|sed -r 's/^\/uploads.*large$/\/uploads/g'|sed -r 's/^\/uploads.*/\/uploads/g'|sort -n | uniq -c |sort -nr |head -n 10
}

课后作业

less nginx.log | awk '{print $7}' | \
sed -r 's#topics\/[0-9]+#topics\/int#g' | \
sed -r 's#replies\/[0-9]+#replies\/int#g' | \
sed -r 's#\/_img\/uploads\/photo\/2018\/.*\.png#\/_img\/uploads\/photo\/2018\/id\id\.png#g' | \
sed -r 's#\/_img\/uploads\/photo\/2017\/.*\.png#\/_img\/uploads\/photo\/2017\/id\.png#g' | \
sort | uniq -c | sort -nr | awk 'NR<11'

课后作业:

url_summary(){
less nginx.log | awk '{print $7}' | \
sed 's#s/[0-9]*#s/int#g' | \
sed 's/\([0-9][0-9]*\/\).*\(.jpg\)/\1id\2/g' | \
sed 's/\([0-9][0-9]*\/\).*\(.png\)/\1id\2/g' | \
sed 's/\([0-9][0-9]*\/\).*\(.gif\)/\1id\2/g' | \
sed 's/\([0-9][0-9]*\/\).*\(.jpeg\)/\1id\2/g' | \
sort | uniq -c | sort -nr | head -10
}

image

url_summary(){
less nginx.log | awk '{print $7}' | sed -r 's/topics\/([0-9])+/topics\/int/g' | sed -r 's/_img\/uploads\/photo\/2018\/.*(.png)/_img\/uploads\/photo\/2018\/id\.png/g' | sed -r 's/(topic\/)[0-9]*/(topic\/)int/g' | sort  | uniq -c | sort -nr | head -10
}

作业0702

less nginx.log | awk '{print $7}' | sed -E 's#([0-9]+)#int#g' | sed -E 's#(photo/int/[0-9a-z].*)#photo/int/_img#g' | sed -E 's#(avatar/int/[0-9a-z].*)#avatar/int/_img#g' | sed -E 's#(avatar/[0-9a-z].*)#avatar/_img#g' | sort | uniq -c | sort -rn | head -10

url_summary(){
less nginx.log | awk '{print $7}' | sed -r 's#topics\/[0-9]+#topics\/int#g' | sed -r 's#replies\/[0-9]+#replies\/int#g'|sed -r 's#_img\/uploads\/photo\/2017\/.*\.png+#_img\/uploads\/photo\/2017\/id\.png#g' |sed -r 's#_img\/uploads\/photo\/2018\/.*\.png+#_img\/uploads\/photo\/2018\/id\.png#g'| sed -r 's#uploads\/photo\/2017\/.*\.png+#uploads\/photo\/2017\/id\.png#g'| sed -r 's#photo\/2015\/.*\.png+#photo\/2015\/id.png#g'| sed -r 's#photo\/2014\/.*\.png+#photo\/2014\/id.png#g' |sort |uniq -c|sort -nr|head -10
}

图片

课后作业 url_summary

url_summary(){
awk '{print $7}' nginx.log| sed -e 's/^\(\/topics\/\)[0-9]*\(\/.*$\)/\1\int\2/; s/^\(\/topics\/\)[0-9]*$/\1int/; s/^\(\/topics\/\)[0-9]*(\?).*$/\1\int/p; s/\(\/replies\/\)[0-9]*\(\/.*$\)/\1int\2/; s#^\(\/_img.*\/photo\/20[1-2][0-9]\/\).*\(\.png.*$\)#\1id\2#' | sort | uniq -c | sort -nr | head -10;
}

课后作业:
‘’‘
url_summary() {

less nginx.log | awk '{print $7}' | sed -E 's#([0-9]+)#int#g' | sed -E 's#(int/.*.png)#id.png#g' | sort | uniq -c | sort -gr | head -10

}
’‘’
image

url_symmary(){
cat nginx.log | sed -e ‘s/"/topics/16689/replies/124751/edit"/"/topics/int/replies/int/edit"/g’ |sed -e ‘s/"/_img/uploads/photo/2018/c54755ee-6bfd-489a-8a39-81a1d755cbd.png!large"/"/_img/uploads/photo/2018/id.png!large"/g’|sed -e ‘s"/topics/9497/"/"/topics/int"/g’
cat nginx.log | sed -e ‘s/"/topics/16689/replies/124751/edit"/"/topics/int/replies/int/edit"/g’ |sed -e ‘s/"/_img/uploads/photo/2018/c54755ee-6bfd-489a-8a39-81a1d755cbd.png!large"/"/_img/uploads/photo/2018/id.png!large"/g’|sed -e ‘s"/topics/9497/"/"/topics/int"/g’|awk ‘print{$7}’|sort -n|uniq -c|sort -rn|head -10
}
手机上传,请忽略格式,另外感觉题目不太清晰,第一次看题以为是直接替换,第二次看题看到有要求,不上传了,手机编辑太难了,awk里面替换前加上正则表达式

less nginx.log | awk '{print $7}' | sed -r 's#(topics|replies)\/[0-9]+#\1/int#g' |sed -r 's#(_img\/uploads\/photo|uploads\/photo|photo|uploads\/user\/avatar)\/(2017|2018|2016|2015|2014)\/.*\.(png|jpg|gif)+#\1\/\2\/id\.\3#g' |sed -r 's#/(avatar)/[0-9]+#/\1/int#g'|sed -r 's#int\/.*\.(png|jpg|gif)+#int\/id\.\1#g'|sort |uniq -c|sort -nr|head -10

url_summary(){
less nginx.log |sed -e 's#/[0-9]+#/int#g' -e 's#/[a-z0-9-]*\.jpe?#/id.jp#g' -e 's#/[0-9a-z-]*\.png#/id.png#g' -e 's#/[0-9a-z-]*\.gif#/id.gif#g' -e 's#\!.*#!xxx#g' -e 's#=.*#=xxx#g' | awk ‘{print $1, $2}’ | uniq -c | sort -nr | head -n 10
}
#!/usr/bin/bash

script_path=$(cd "$(dirname "$0")"; pwd) 
log_file=$script_path/nginx.log
#正则 /topics/16689/replies/124751/edit 把数字替换为 /topics/int/replies/int/edit
reg_01='\/topics\/([0-9]+)\/replies\/([0-9]+)\/edit'
#正则 /_img/uploads/photo/2018/c54755ee-6bfd-489a-8a39-81a1d7551cbd.png!large 变成 /_img/uploads/photo/2018/id.png!large
reg_02='^\/photo\/[0-9]{4}\/.*'
#正则 /topics/9497 改成 /topics/int
reg_03='\/topics\/([0-9]+)'


url_summary(){
less $log_file | awk '{print $7}' \
|sed -r 's#$reg_01#\/topics\/int\/replies\/int\/edit#g' \
|sed -r 's#$reg_02#\/_img\/uploads\/photo\/int\/id#g' \
|sed -r 's#reg_03#\/topics\/int#g' \
|sort | uniq -c| sort -nr | head -n 10
}

echo 'top 10请求量的url如下:'
url_summary
2 个赞

因为shell中支持的正则表达式和python是不一样的,python扩展了很多强大的功能~

‘’’
url_summary() {

less nginx.log | awk '{print $7}' | 

sed -r 's#\/topics\/([0-9]+)\/replies\/([0-9]+)\/edit#\/topics\/int\/replies\/int\/edit#g' | 

sed -r 's#^\/.*\/photo\/([0-9]{4})\/.*#\/_img\/uploads\/photo\/2018\/id.png!large#g' | 

sed -r 's#\/topics\/([0-9]+)#\/topics\/int#g' | 

sort | uniq -c | sort -nr | head -10

}
‘’’
image