Python: checking file and outputting to a new file -
i have sample filename file:
chr7 149601 merged_del_2_39754 t . 141.35 . ac=0;af=0.00;an=2;dp=37;mq=37.00;mq0=0;1000galt=<del>;af1000g=0.09;afr_af=0.01;amr_af=0.03;asn_af=0.27;eur_af=0.04;ts=hpgom;tsseq=t,t,g,t,t;canc=t;ganc=t;oanc=t;msc=0.000;grp=-2.16;map20=1 gt:dp:gq:pl:a:c:g:t:ir 0/0:37:99:0,111,1458:0,0:0,0:0,0:18,18:0 chr7 149616 rs190051229 c . 108.65 . ac=0;af=0.00;an=2;dp=35;mq=37.00;mq0=0;1000galt=t;af1000g=0.00;asn_af=0.01;cpg;ts=hpgom;tsseq=c,c,c,c,c;canc=c;ganc=c;oanc=c;msc=0.000;grp=-2.15;map20=1 gt:dp:gq:pl:a:c:g:t:ir 0/0:35:78.65:0,79,1305:0,0:17,17:0,0:0,0:0 chr7 149628 rs3814456 . 129.31 . ac=0;af=0.00;an=2;dp=37;mq=37.00;mq0=0;1000galt=g;af1000g=0.14;afr_af=0.16;amr_af=0.07;asn_af=0.27;eur_af=0.06;ts=hpgom;tsseq=a,a,a,a,a;canc=a;ganc=a;oanc=a;msc=0.000;grp=-2.23;map20=1 gt:dp:gq:pl:a:c:g:t:ir 0/0:37:99:0,99,1290:14,22:0,0:0,0:0,0:0 chr7 149634 rs146001818 g t 1375.63 . ac=2;af=1.00;an=2;baseqranksum=0.456;dp=39;dels=0.00;fs=0.000;hrun=0;haplotypescore=0.9997;mq=37.00;mq0=0;mqranksum=1.641;qd=35.27;readposranksum=1.459;1000galt=t;af1000g=0.01;afr_af=0.01;amr_af=0.01;eur_af=0.03;ts=hpgom;tsseq=g,g,g,g,g;canc=g;ganc=g;oanc=g;msc=0.001;grp=0.0686;map20=1;ann=t|upstream_gene_variant|modifier|loc100507642|loc100507642|transcript|nr_108064.1|noncoding||n.-1g>t|||||84|,t|upstream_gene_variant|modifier|loc100507642|loc100507642|transcript|nr_108065.1|noncoding||n.-1g>t|||||84|,t|intergenic_region|modifier|loc100507642|loc100507642|intergenic_region|loc100507642||||||||| gt:dp:gq:pl:a:c:g:t:ir 1/1:39:74.36:1409,74,0:0,0:0,0:0,1:15,22:0 chr7 149645 rs112562180 c . 165.42 . ac=0;af=0.00;an=2;dp=46;mq=37.00;mq0=0;1000galt=a;af1000g=0.02;afr_af=0.03;amr_af=0.03;eur_af=0.02;ts=hpgom;tsseq=c,c,c,c,c;canc=c;ganc=c;oanc=c;msc=0.000;grp=-1.93;map20=1 gt:dp:gq:pl:a:c:g:t:ir 0/0:46:99:0,135,1758:0,0:22,22:0,0:1,0:0 chr7 149659 rs79606188 t . 195.53 . ac=0;af=0.00;an=2;dp=55;mq=37.00;mq0=0;1000galt=a;af1000g=0.02;afr_af=0.07;amr_af=0.01;ts=hpgom;tsseq=t,t,t,t,g;canc=t;ganc=t;oanc=t;msc=0.005;grp=0.0203;map20=1 gt:dp:gq:pl:a:c:g:t:ir 0/0:55:99:0,166,2189:0,0:0,0:0,0:26,28:0 chr7 149724 rs193238495 c . 216.56 . ac=0;af=0.00;an=2;dp=62;mq=37.18;mq0=0;1000galt=t;af1000g=0.00;afr_af=0.01;cpg;ts=hpgom;tsseq=c,c,c,c,c;canc=c;ganc=c;oanc=c;msc=0.000;grp=-0.139;map20=1 gt:dp:gq:pl:a:c:g:t:ir 0/0:62:99:0,187,2385:0,0:37,24:0,0:0,0:0 chr7 149765 rs3814455 c . 198.52 . ac=0;af=0.00;an=2;dp=56;mq=37.73;mq0=0;1000galt=t;af1000g=0.54;afr_af=0.35;amr_af=0.60;asn_af=0.31;eur_af=0.79;ts=hpgom;tsseq=c,c,c,c,c;canc=c;ganc=c;oanc=c;msc=0.000;grp=-0.494;map20=1 gt:dp:gq:pl:a:c:g:t:ir 0/0:56:99:0,169,2174:0,0:22,32:0,0:0,0:0 chr7 149785 rs185668085 c . 192.52 . ac=0;af=0.00;an=2;dp=54;mq=37.76;mq0=0;1000galt=g;af1000g=0.01;asn_af=0.04;ts=hpgom;tsseq=c,c,c,c,c;canc=c;ganc=c;oanc=c;msc=0.002;grp=-0.216;map20=1 gt:dp:gq:pl:a:c:g:t:ir 0/0:54:99:0,163,2135:0,0:19,33:0,0:0,0:0 chr7 149825 rs189449059 c . 156.38 . ac=0;af=0.00;an=2;dp=42;mq=37.71;mq0=0;1000galt=t;af1000g=0.00;ts=hpgom;tsseq=c,c,c,c,-;canc=c;ganc=c;oanc=c;msc=0.000;grp=0.693;map20=1 gt:dp:gq:pl:a:c:g:t:ir 0/0:42:99:0,126,1609:0,0:17,24:0,0:0,0:0 chr7 149863 . g 407.49 . ac=1;af=0.50;an=2;baseqranksum=-1.315;dp=37;dels=0.00;fs=1.341;hrun=1;haplotypescore=1.9995;mq=37.00;mq0=0;mqranksum=-0.201;qd=11.01;readposranksum=1.469;ts=hpgom;tsseq=g,g,g,g,g;canc=g;ganc=g;oanc=g;msc=0.000;grp=-1.5;map20=1;ann=a|non_coding_exon_variant|modifier|loc100507642|loc100507642|transcript|nr_108064.1|noncoding|1/3|n.146g>a||||||,a|non_coding_exon_variant|modifier|loc100507642|loc100507642|transcript|nr_108065.1|noncoding|1/2|n.146g>a|||||| gt:dp:gq:pl:a:c:g:t:ir 0/1:37:99:437,0,759:6,8:0,0:13,9:0,0:0 chr7 149880 rs115127983 c . 108.24 . ac=0;af=0.00;an=2;dp=26;mq=37.00;mq0=0;1000galt=g;af1000g=0.04;afr_af=0.15;amr_af=0.02;cpg;ts=hpgom;tsseq=c,c,c,c,c;canc=c;ganc=c;oanc=c;msc=0.003;grp=-1.24;map20=1 gt:dp:gq:pl:a:c:g:t:ir 0/0:26:78.25:0,78,1029:0,0:15,10:0,0:0,0:0 chr7 150067 rs181041230 g . 138.34 . ac=0;af=0.00;an=2;dp=36;mq=38.37;mq0=0;1000galt=a;af1000g=0.00;afr_af=0.01;ts=hpgom;tsseq=g,g,g,g,g;canc=g;ganc=g;oanc=g;msc=0.005;grp=0.119;map20=1 gt:dp:gq:pl:a:c:g:t:ir 0/0:36:99:0,108,1425:0,0:0,0:16,19:0,0:0 chr7 150253 rs28397846 . 159.4 . ac=0;af=0.00;an=2;dp=43;mq=39.05;mq0=0;1000galt=g;af1000g=0.03;afr_af=0.14;amr_af=0.02;ts=hpgom;tsseq=a,a,a,-,g;canc=a;ganc=a;oanc=a;msc=0.000;grp=-2.18;map20=1 gt:dp:gq:pl:a:c:g:t:ir 0/0:43:99:0,129,1687:24,19:0,0:0,0:0,0:0 chr7 150280 rs139905037 . 159.4 . ac=0;af=0.00;an=2;dp=45;mq=38.96;mq0=0;1000galt=g;af1000g=0.00;asn_af=0.01;ts=hpgom;tsseq=a,a,a,-,-;canc=a;ganc=a;oanc=a;msc=0.000;grp=-0.168;map20=1 gt:dp:gq:pl:a:c:g:t:ir 0/0:45:99:0,129,1682:19,25:0,1:0,0:0,0:0 chr7 150353 rs75914010 . 162.42 . ac=0;af=0.00;an=2;dp=45;mq=37.00;mq0=0;1000galt=t;af1000g=0.02;afr_af=0.03;amr_af=0.02;eur_af=0.02;ts=hpgom;tsseq=a,a,a,a,a;canc=a;ganc=a;oanc=a;msc=0.000;grp=-0.647;map20=1 gt:dp:gq:pl:a:c:g:t:ir 0/0:45:99:0,132,1739:21,24:0,0:0,0:0,0:0 chr7 150356 rs185358707 c . 113.39 . ac=0;af=0.00;an=2;dp=43;mq=37.00;mq0=0;1000galt=t;af1000g=0.00;cpg;ts=hpgom;tsseq=c,c,c,c,c;canc=c;ganc=c;oanc=c;msc=0.000;grp=-1.59;map20=1 gt:dp:gq:pl:a:c:g:t:ir 0/0:43:83.39:0,83,1538:0,1:19,21:0,0:2,0:0
my goal save lines within specified range. have far.
#!/usr/bin/env python import sys file=open('filename') sys.stdout=open('mega1.txt', 'w') line in file: fields = line.strip().split() chrm = fields[0] pos = int(fields[1]) id1 = fields[2] if id1 in range(149601, 1149601): print line
i not sure why continues run.
for samplefile name, lines saved new mega1.txt file values in column2 fit range.
you might checking wrong data in if.
you checking id1
str
range(149601, 1149601)
.
you have check pos
range.
please try works.
#!/usr/bin/env python import sys fp=open('filename') sys.stdout = open('mega1.txt', 'w') line in fp: fields = line.strip().split() chrm = fields[0] pos = int(fields[1]) id1 = fields[2] if pos in range(149601, 1149601): print line
Comments
Post a Comment