linux 中实现输出指定列元素的重复次数编号

发布时间 2023-07-12 23:40:52作者: 小鲨鱼2018

 

001、

[root@PC1 test01]# ls
a.txt
[root@PC1 test01]# cat a.txt       ## 测试数据
ERR219543       Africa
ERR219546       kkkk
ERR229467       qqqq
ERR229468       qqqq
ERR229471       mmnn
ERR219547       kkkk
ERR2214955      qqqq
ERR219544       Africa
ERR219545       Africa
ERR2214969      qqqq
[root@PC1 test01]# awk '{OFS = "\t"; ay[$2]++; print $0, ay[$2]}' a.txt   ## 借助于awk中数组来实现
ERR219543       Africa  1
ERR219546       kkkk    1
ERR229467       qqqq    1
ERR229468       qqqq    2
ERR229471       mmnn    1
ERR219547       kkkk    2
ERR2214955      qqqq    3
ERR219544       Africa  2
ERR219545       Africa  3
ERR2214969      qqqq    4

 

002、

[root@PC1 test01]# ls
a.txt
[root@PC1 test01]# cat a.txt
ERR219543       Africa
ERR219546       kkkk
ERR229467       qqqq
ERR229468       qqqq
ERR229471       mmnn
ERR219547       kkkk
ERR2214955      qqqq
ERR219544       Africa
ERR219545       Africa
ERR2214969      qqqq                                ## 输出编号
[root@PC1 test01]# cut -f 2 a.txt | sort | uniq | while read i; do grep "[[:space:]]$i" a.txt | awk '{OFS = "\t"; print $0, NR}' >> xxx; done
[root@PC1 test01]# cut -f 1 a.txt | while read i; do grep "$i[[:space:]]" xxx >> yyy; done
[root@PC1 test01]# cat yyy                          ## 原序输出
ERR219543       Africa  1
ERR219546       kkkk    1
ERR229467       qqqq    1
ERR229468       qqqq    2
ERR229471       mmnn    1
ERR219547       kkkk    2
ERR2214955      qqqq    3
ERR219544       Africa  2
ERR219545       Africa  3
ERR2214969      qqqq    4