001、shell实现
root@PC1:/home/test2# ls test.txt root@PC1:/home/test2# cat test.txt ## 测试数据, 删除第一次匹配xpehh以外的所有匹配xpehh的行 01 ee ff 02 ee de 03 dd ee 04 jj xpehh 05 jj kk 06 ee de 07 mm xpehh 08 ff ww 09 jj kk 10 mm xpehh 11 dd ee root@PC1:/home/test2# awk 'BEGIN{idx = 0} {if($0 ~ /xpehh/ && idx == 0 || $0 !~ /xpehh/) {print $0}; if($0 ~ /xpehh/) {idx++}}' test.txt 01 ee ff 02 ee de 03 dd ee 04 jj xpehh 05 jj kk 06 ee de 08 ff ww 09 jj kk 11 dd ee
002、python实现
root@PC1:/home/test2# ls test.py test.txt root@PC1:/home/test2# cat test.txt 01 ee ff 02 ee de 03 dd ee 04 jj xpehh 05 jj kk 06 ee de 07 mm xpehh 08 ff ww 09 jj kk 10 mm xpehh 11 dd ee root@PC1:/home/test2# cat test.py #!/usr/bin/python in_file = open("test.txt", "r") out_file = open("result.txt", "w") lines = in_file.readlines() idx = 0 for i in lines: if idx == 0 and "xpehh" in i or "xpehh" not in i: out_file.write(i) if "xpehh" in i: idx = idx + 1 in_file.close() out_file.close() root@PC1:/home/test2# python test.py root@PC1:/home/test2# ls result.txt test.py test.txt root@PC1:/home/test2# cat result.txt ## 结果文件 01 ee ff 02 ee de 03 dd ee 04 jj xpehh 05 jj kk 06 ee de 08 ff ww 09 jj kk 11 dd ee
003、R实现
dir() dat <- read.table("test.txt") result <- vector() idx = 0 for (i in 1:nrow(dat)) { if (sum(grepl("xpehh", dat[i,])) != 0 & idx == 0 | sum(grepl("xpehh", dat[i,])) == 0) { result <- c(result, i) } if(sum(grepl("xpehh", dat[i,])) != 0){ idx = idx + 1 } } final <- dat[result,] final