Linux与Shell实战(一)

学习资料

  • 高级Bash脚本编程指南
  • LINUX与UNIX SHELL编程指南
  • 鸟哥的Linux私房菜
  • IBM DeveloperWorks
  • 阮一峰的《bash脚本教程》
  • Google

编辑器与代码格式化

  • vs code
  • shell format插件
url_summary() {
}

演练数据

把/tmp/nginx.log文件复制到自己的主目录下,用来演练

实战内容

  • 日志数据检索
    • 找出log中的404 500的报错 考察严谨性,某次训练没有一人做对
    • 找出500错误时候的上下文 考察grep高级用法
  • 日志数据统计
    • 找出访问量最高的ip 统计分析
  • 数据文件修改
    • 找出访问量最高的页面地址 借助于sed的统计分析

find_error_log

编写一个函数 find_error_log()
找出log中的404 500的报错 考察严谨性,某次训练没有一人做对

回复的时候按照这个格式

find_error_log() {
#awk '$9~/404|500/{print $9}' nginx.log | sort | uniq -c
#grep -e " 500 "  -e " 404 " nginx.log |less
}

find_before

找出500错误时候的上下文,找出500错误的前两行 考察grep高级用法

find_before(){
}

find_top_10

找出访问量最高的ip, 统计分析,取出top10


find_top_10(){

}

url_summary

找出访问量最高的页面地址 借助于sed的统计分析

  • /topics/16689/replies/124751/edit 把数字替换为 /topics/int/replies/int/edit
  • /_img/uploads/photo/2018/c54755ee-6bfd-489a-8a39-81a1d7551cbd.png!large 变成 /_img/uploads/photo/2018/id.png!large
  • /topics/9497 改成 /topics/int
  • 其他规则参考如上

输出

  • url pattern对应的请求数量
  • 取出top 20请求量的url pattern

类似
nnn urlxxx
mmm urlxxx

url_summary(){

}

url_avg_time

统计访问首页路径 / 的平均响应时间

url_avg_time(){
#less nginx.log | awk '$7=="/"{print $(NF-2)}' | awk '{t+=$1}END{print t/NR}'
}

性能统计

打印阿里云盾进程的cpu、mem利用率与平均利用率。

[root@shell.testing-studio.com ~]$ top -b -d 1 -n 10 | grep -i yundun$ --line-buffered | awk '{c+=$9;m+=$10;print $9,$10}END{print "";print c/NR,m/NR}'
0.0 2.0
4.9 2.0
1.0 2.0
2.0 2.0
4.0 2.0
3.0 2.0
2.9 2.0
3.0 2.0
3.0 2.0
3.0 2.0

2.68 2

find_error_log(){grep -e 404 -e 500 /tmp/nginx.log
}

cat nginx.log| grep -E ’ 404|500’|awk ‘{print $0}’

find_top_10(){awk ‘{ips[$1]++} END{for(i in ips){print i,ips[i]}}’ nginx.log | sort -k2rn | head -10
}

awk ‘{print $1}’ nginx.log | sort | uniq -c | sort -n -k 1 -r | head -n 10

awk -F" " ‘{print $1}’ nginx.log |sort|uniq -c|sort -nrk 1 -t’ '|awk -F" " ‘{print $2}’|head -10

cat nginx.log|awk -F" " ‘{print $1}’|sort|uniq -c|sort -nrk 1 -t’ '|awk -F" " ‘{print $1}’|head -10

[root@shell.testing-studio.com ~]$ find_top_10
    282 216.244.66.241
    130 136.243.151.90
    110 127.0.0.1
     74 144.76.81.72
     69 115.236.50.18
     52 40.77.167.22
     45 40.77.167.60
     45 40.77.167.1
     42 141.8.142.131
     42 139.180.132.174

[root@shell.testing-studio.com ~]$ less nginx.log | awk '{print $1}' | sort | uniq -c | sort -rn | head -10
    282 216.244.66.241
    130 136.243.151.90
    110 127.0.0.1
     74 144.76.81.72
     69 115.236.50.18
     52 40.77.167.22
     45 40.77.167.60
     45 40.77.167.1
     42 141.8.142.131
     42 139.180.132.174


[root@shell.testing-studio.com ~]$ type find_top_10
find_top_10 是函数

find_top_10 ()
{
    less nginx.log | awk '{print $1}' | sort | uniq -c | sort -rn | awk 'NR<=10'
}

只写了cpu的

top -b -d 1 -n 10 | grep -i YunDun$ | awk '{print $9}' | awk '{t+=$1}END{print t,t/NR}'

1 个赞

top -b -d 1 -n 10 |grep -i yundun | awk ‘{cpu+=$9;mem+=$10;}END{print cpu/NR,mem/NR}’

find_error_log(){
cat nginx.log | while read line;
do
responcode=echo ${line} | cut -d '"' -f 3 | cut -d ' ' -f 2
if [[ ${responcode} == ‘404’ ]] || [[ ${responcode} == ‘500’ ]];then
echo $line
fi
done
};

find_before () 
{ 
    grep -e ' 500 ' -B 2 nginx.log
}
find_top_10 () 
{ 
    cat nginx.log | awk '{print $1}' | sort | uniq -c | sort -nr | awk 'NR<=10'
}
find_before () 
{ 
    less nginx.log | grep --color=auto -B 2 ' 500 '
}

[21146916@shell.testing-studio.com ~]$ find_before 
123.127.112.18 - - [05/Dec/2018:00:09:18 +0000] "GET /cable HTTP/1.1" 101 1017 "-" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36" 70.577 70.577 .
139.180.132.174 - - [05/Dec/2018:00:09:20 +0000] "GET /bbs.zip HTTP/1.1" 404 1264 "-" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36" 0.011 0.011 .
139.180.132.174 - - [05/Dec/2018:00:09:12 +0000] "GET /__zep__/js.zip HTTP/1.1" 500 2183 "-" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36" 0.018 0.018 .

url_summary () 
{ 
    less nginx.log | awk '{print $7}' | sed 's#/topics/[0-9]*#topics/int#g' | sed 's#replies/[0-9]*#replies/int#g' | sed 's#2018/[a-zA-Z0-9-]*#id#g' | sort | uniq -c | sort -nr | head -20
}
[21146916@shell.testing-studio.com ~]$ url_summary 
    584 /cable
    237 topics/int
    147 topics/int/replies/int/edit
     94 /
     42 topics/int/replies/int/reply_suggest
     41 topics/int/show_wechat
     37 topics/int?locale=zh-CN
     33 topics/int?locale=en
     28 topics/int?locale=zh-TW
     24 topics/int/replies/int/reply_to
     18 /uploads/photo/id.png!large
     15 /_img/uploads/photo/id.png!large
     13 /uploads/photo/id.gif!large
     13 /_img/uploads/photo/id.gif!large
     12 topics/int?order_by=created_at&
      9 topics/int?order_by=created_at
      7 topics/int?order_by=like&
      7 /account/sign_in
      6 topics/int?order_by=like
      6 topics/intfeed
url_avg_time () 
{ 
    less nginx.log | awk '$7=="/"{print $7,$(NF-2)}' | awk 'BEGIN{a=0}{a+=$2}END{print a/NR}'
}

[21146916@shell.testing-studio.com ~]$ url_avg_time 
0.091266
[21146916@shell.testing-studio.com ~]$ top -b -d 1 -n 10  | grep -i -n  'yundun$' | awk 'BEGIN{c=0;m=0}{print $9,$10;c+=$9;m+=$10}END{print ""  ;print c/NR,m/NR}'
0.0 2.8
3.0 2.8
3.0 2.8
3.0 2.8
1.0 2.8
4.0 2.8
3.0 2.8
3.0 2.8
3.0 2.8
2.0 2.8

2.5 2.8
[21146916@shell.testing-studio.com ~]$

大佬666
[a-zA-Z] 可以写成[a-Z]

好的,谢谢大佬,我试试

1.find_error_log(){ less nginx.log | awk '$9~/404|500/ {print $9}' | sort | uniq -c | sort -nr }
输出结果
266 404
1 500

2.find_before(){ less nginx.log | grep ' 500 ' -B 2 -n }
输出结果
1454-123.127.112.18 - - [05/Dec/2018:00:09:18 +0000] “GET /cable HTTP/1.1” 101 1017 “-” “Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36” 70.577 70.577 .
1455-139.180.132.174 - - [05/Dec/2018:00:09:20 +0000] “GET /bbs.zip HTTP/1.1” 404 1264 “-” “Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36” 0.011 0.011 .
1456:139.180.132.174 - - [05/Dec/2018:00:09:12 +0000] “GET /zep/js.zip HTTP/1.1” 500 2183 “-” “Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36” 0.018 0.018 .

3.find_top_10(){ less nginx.log | awk '{print $1}' | sort | uniq -c | sort -rn | head }
输出结果
282 216.244.66.241
130 136.243.151.90
110 127.0.0.1
74 144.76.81.72
69 115.236.50.18
52 40.77.167.22
45 40.77.167.60
45 40.77.167.1
42 141.8.142.131
42 139.180.132.174

4.less nginx.log | awk '{print $7}' | sed '{s#2018/[a-Z0-9-]*#2018/id#g; s#39358/[a-Z0-9]*#39358/id#g; s#topics/[0-9]*#topics/int#g; s#replies/[0-9]*#replies/int#g}' | sort | uniq -c | sort -nr | head -n 20
输出结果
584 /cable
237 /topics/int
147 /topics/int/replies/int/edit
94 /
42 /topics/int/replies/int/reply_suggest
41 /topics/int/show_wechat
37 /topics/int?locale=zh-CN
33 /topics/int?locale=en
28 /topics/int?locale=zh-TW
24 /topics/int/replies/int/reply_to
18 /uploads/photo/2018/id.png!large
15 /_img/uploads/photo/2018/id.png!large
13 /uploads/photo/2018/id.gif!large
13 /_img/uploads/photo/2018/id.gif!large
12 /topics/int?order_by=created_at&
9 /topics/int?order_by=created_at
7 /topics/int?order_by=like&
7 /account/sign_in
6 /topics/int?order_by=like
6 /topics/intfeed
5.url_avg_time () { less nginx.log | awk '$7=="/"{print $(NF-2)}' | awk '{t+=$1}END{print t/NR}' }
输出结果
0.091266

find_before(){
    # 前后2行
    grep -C 2 -e " 500 " nginx.log
    # 前2行
    grep -B 2 -e " 500 " nginx.log
    # 后2行
    grep -A 2 -e " 500 " nginx.log
}

find_ip_top_10(){
    awk '{print $1}' nginx.log | sort | uniq -c | sort -rn | head -10
}

# sed
# 曲线救国吗?哈哈哈哈
$ echo "/topics/16689/replies/124751/edit" | sed 's/[0-9]/i/g' | sed 's/\bi*i\b/int/g'

url_summary

找出访问量最高的页面地址 借助于sed的统计分析

  • /topics/16689/replies/124751/edit 把数字替换为 /topics/ int /replies/ int /edit
  • /_img/uploads/photo/2018/c54755ee-6bfd-489a-8a39-81a1d7551cbd.png!large 变成 /_img/uploads/photo/2018/ id .png!large
  • /topics/9497 改成 /topics/ int
  • 其他规则参考如上

输出

  • url pattern对应的请求数量
  • 取出top 20请求量的url pattern
url_summary ()                                                                                │
{                                                                                             │
    less nginx.log | awk '{print $7}' | sed -e 's#/replies/[0-9]*#/replies/int#g' | sed -e 's#│
/topics/[0-9]*#/topics/int#g' | sed -e's#/[a-z0-9-]*.png!large#/id.png!large#g' | sort | uniq │
-c | sort -rn | head -20                                                                      │
} 

    584 /cable                                                                                │
    237 /topics/int                                                                           │
    147 /topics/int/replies/int/edit                                                          │
     94 /                                                                                     │
     42 /topics/int/replies/int/reply_suggest                                                 │
     41 /topics/int/show_wechat                                                               │
     37 /topics/int?locale=zh-CN                                                              │
     33 /topics/int?locale=en                                                                 │
     28 /topics/int?locale=zh-TW                                                              │
     26 /uploads/photo/2017/id.png!large                                                      │
     25 /_img/uploads/photo/2017/id.png!large                                                 │
     24 /topics/int/replies/int/reply_to                                                      │
     18 /uploads/photo/2018/id.png!large                                                      │
     15 /_img/uploads/photo/2018/id.png!large                                                 │
     12 /topics/int?order_by=created_at&                                                      │
      9 /topics/int?order_by=created_at                                                       │
      7 /topics/int?order_by=like&                                                            │
      7 /account/sign_in                                                                      │
      6 /topics/int?order_by=like                                                             │
      6 /topics/intfeed

find_top_10(){
less nginx.log | awk ‘{print $1}’ | sort | uniq -c | sort -rn | head -10
}

find_error_log_1(){
awk ‘$9~/404|500/{print $9}’ nginx.log |sort | uniq -c
}
find_error_log_2(){
cat nginx.log | grep -ioE “HTTP/1.[1|0]”[[:blank:]][0-9]{3}" | awk ‘$2~/404|500/{print $2}’ | sort | uniq -c
}
find_error_log_3(){
cat nginx.log | grep -ioE “HTTP/1.[1|0]”[[:blank:]][0-9]{3}" | awk ‘100<$2&&$2<200{print $2}’‘200<=$2&&$2<300{print $2}’‘300<=$2&&$2<400{print $2}’‘400<=$2&&$2<500{print $2}’‘500<=$2{print $2}’ | sort | uniq -c
}
433 101
814 200
304 301
152 302
5 304
3 401
266 404
2 422
2 499
1 500

‘’’bash
$ find_top_10(){ awk ’ {print $1}’ nginx.log | sort | uniq -c | sort -rn | head -10; }
‘’’

[75547300@shell.testing-studio.com ~]$ find_top_10
282 216.244.66.241
130 136.243.151.90
110 127.0.0.1
74 144.76.81.72
69 115.236.50.18
52 40.77.167.22
45 40.77.167.60
45 40.77.167.1
42 141.8.142.131
42 139.180.132.174