本文整理汇总了Python中extern.run函数的典型用法代码示例。如果您正苦于以下问题:Python run函数的具体用法?Python run怎么用?Python run使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了run函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: test_query_with_otu_table_two_samples_same_sequence
def test_query_with_otu_table_two_samples_same_sequence(self):
with tempfile.NamedTemporaryFile() as f:
query = [self.headers,
# second sequence with an extra A at the end
['ribosomal_protein_L11_rplK_gpkg','maximal','CGTCGTTGGAACCCAAAAATGAAATAATATATCTTCACTGAGAGAAATGGTATTTATATA','7','4.95','Root; k__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'],
['ribosomal_protein_L11_rplK_gpkg','minimal','CGTCGTTGGAACCCAAAAATGAAATAATATATCTTCACTGAGAGAAATGGTATTTATATA','7','4.95','Root; k__Bacteria; p__Firmicutes; c__Bacilli']
] # converted A to T in the middle
query = "\n".join(["\t".join(x) for x in query])
f.write(query)
f.flush()
with tempdir.TempDir() as d:
cmd = "{} makedb --db {}/sdb --otu_table {}".format(
path_to_script, d, f.name)
extern.run(cmd)
cmd = "{} query --query_otu_table {} --db {}/sdb".format(
path_to_script,
f.name,
d)
expected = [['query_name','query_sequence','divergence','num_hits','sample','marker','hit_sequence','taxonomy'],
['maximal;ribosomal_protein_L11_rplK_gpkg','CGTCGTTGGAACCCAAAAATGAAATAATATATCTTCACTGAGAGAAATGGTATTTATATA','0','7','maximal','ribosomal_protein_L11_rplK_gpkg','CGTCGTTGGAACCCAAAAATGAAATAATATATCTTCACTGAGAGAAATGGTATTTATATA','Root; k__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'],
['maximal;ribosomal_protein_L11_rplK_gpkg','CGTCGTTGGAACCCAAAAATGAAATAATATATCTTCACTGAGAGAAATGGTATTTATATA','0','7','minimal','ribosomal_protein_L11_rplK_gpkg','CGTCGTTGGAACCCAAAAATGAAATAATATATCTTCACTGAGAGAAATGGTATTTATATA','Root; k__Bacteria; p__Firmicutes; c__Bacilli'],
['minimal;ribosomal_protein_L11_rplK_gpkg','CGTCGTTGGAACCCAAAAATGAAATAATATATCTTCACTGAGAGAAATGGTATTTATATA','0','7','maximal','ribosomal_protein_L11_rplK_gpkg','CGTCGTTGGAACCCAAAAATGAAATAATATATCTTCACTGAGAGAAATGGTATTTATATA','Root; k__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'],
['minimal;ribosomal_protein_L11_rplK_gpkg','CGTCGTTGGAACCCAAAAATGAAATAATATATCTTCACTGAGAGAAATGGTATTTATATA','0','7','minimal','ribosomal_protein_L11_rplK_gpkg','CGTCGTTGGAACCCAAAAATGAAATAATATATCTTCACTGAGAGAAATGGTATTTATATA','Root; k__Bacteria; p__Firmicutes; c__Bacilli'],
]
observed = subprocess.check_output(cmd, shell=True)
self.assertEqualOtuTable(expected, observed)
开发者ID:wwood,项目名称:singlem,代码行数:29,代码来源:test_makedb_and_query.py
示例2: _align_sequences
def _align_sequences(self, input_sequences_path, output_alignment_path,
threads):
'''Align sequences into alignment_file
Parameters
----------
input_sequences_path: str
path to input sequences in fasta format
output_alignment_path: str
path to output alignment path
threads: str
number of threads to use
Returns
-------
Nothing
'''
logging.debug("Aligning sequences using mafft")
cmd = "mafft --anysymbol --thread %s --auto /dev/stdin > %s" % (
threads,
output_alignment_path)
inputs = []
with open(input_sequences_path) as f:
for name,seq,_ in SequenceIO().each(f):
inputs.append('>%s' % name)
# Do not include * characters in the HMM, as this means tree
# insertion fails.
inputs.append(seq.replace('*',''))
extern.run(cmd, stdin="\n".join(inputs))
开发者ID:geronimp,项目名称:graftM,代码行数:28,代码来源:create.py
示例3: test_no_clustering
def test_no_clustering(self):
otu_table = [self.headers,['ribosomal_protein_L11_rplK_gpkg','minimal','GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','7','4.95','Root; k__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'],
['ribosomal_protein_L11_rplK_gpkg','minimal','GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATA','6','4.95','Root; k__Bacteria; p__Firmicutes; c__Bacilli'], #last base only is different to first sequence
['ribosomal_protein_S17_gpkg','minimal','GCTAAATTAGGAGACATTGTTAAAATTCAAGAAACTCGTCCTTTATCAGCAACAAAACGT','9','4.95','Root; k__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus']]
otu_table = "\n".join(["\t".join(x) for x in otu_table])
with tempfile.NamedTemporaryFile() as f:
f.write(otu_table)
f.flush()
with tempdir.TempDir() as d:
cmd = "{} makedb --db_path {}/db --otu_table {} --clustering_divergence 0".format(
path_to_script, d, f.name)
extern.run(cmd)
with tempfile.NamedTemporaryFile() as f2:
f2.write(">seq1\n")
# first sequence with an extra A at the start
f2.write("AGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC\n")
f2.flush()
# Querying the smafadb directly should show no clustering
cmd = "smafa query {} {}".format(
os.path.join(d,'db','ribosomal_protein_L11_rplK_gpkg.smafadb'),
f2.name)
out = extern.run(cmd)
self.assertEqual(
out,
'seq1\tAGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC\tGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATA\t2\t60\n'+
'seq1\tAGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC\tGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC\t1\t60\n')
开发者ID:wwood,项目名称:singlem,代码行数:29,代码来源:test_makedb_and_query.py
示例4: run
def run(self, input_sequence_file, input_sequence_type, daa_file_basename=None):
'''Run input sequences in either blastp or blastx mode against the
database specified in __init__.
Parameters
----------
input_sequence_file: str
path to query sequences
input_sequence_type: either 'nucleotide' or 'protein'
the input_sequences are this kind of sequence
Returns
-------
DiamondSearchResult
'''
cmd_list = ["diamond"]
if input_sequence_type == UnpackRawReads.PROTEIN_SEQUENCE_TYPE:
cmd_list.append('blastp')
elif input_sequence_type == UnpackRawReads.NUCLEOTIDE_SEQUENCE_TYPE:
cmd_list.append('blastx')
else:
raise Exception("Programming error")
basename = daa_file_basename
if basename is None:
with tempfile.NamedTemporaryFile(prefix='graftm_diamond') as t:
# we are just stealing the name, don't need the file itself
basename = t.name
for c in ['-k 1',
"-d",
self._database,
"-q",
"%s" % input_sequence_file,
"-a",
basename]:
cmd_list.append(c)
if self._threads:
cmd_list.append("--threads")
cmd_list.append(str(self._threads))
if self._evalue:
cmd_list.append("--evalue")
cmd_list.append(str(self._evalue))
cmd = ' '.join(cmd_list)
extern.run(cmd)
daa_name = "%s.daa" % basename
res = DiamondSearchResult.import_from_daa_file(daa_name)
if daa_file_basename is None:
# Diamond makes an extra file, need to remove this
os.remove(daa_name)
return res
开发者ID:geronimp,项目名称:graftM,代码行数:56,代码来源:diamond.py
示例5: test_biom_hello_world
def test_biom_hello_world(self):
insert_otu_table = [self.headers,
['4.12.ribosomal_protein_L11_rplK','insert','CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG','1','2.44','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'],
['4.12.ribosomal_protein_L11_rplK','insert','CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTtttCAAGCAGGTGTG','2','2.94','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales']]
with tempdir.TempDir() as tmp:
with tempfile.NamedTemporaryFile(suffix='.otu_table.csv') as n:
n.write("\n".join(["\t".join(x) for x in insert_otu_table]+['']))
n.flush()
extern.run("%s summarise --biom_prefix '%s' --input_otu_tables '%s'" % (
path_to_script, os.path.join(tmp,"mybiom"), n.name))
self.assertEqual(['mybiom.4.12.ribosomal_protein_L11_rplK.biom'], os.listdir(tmp))
self.assertEqual(
'# Constructed from biom file\n#OTU ID\tinsert\ttaxonomy\nRoot; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG\t1.0\tRoot; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales\nRoot; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTtttCAAGCAGGTGTG\t2.0\tRoot; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales',
extern.run("biom convert -i '%s' -o /dev/stdout --to-tsv --header-key taxonomy" % os.path.join(tmp,'mybiom.4.12.ribosomal_protein_L11_rplK.biom')))
开发者ID:wwood,项目名称:singlem,代码行数:14,代码来源:test_summariser.py
示例6: test_jplace_output
def test_jplace_output(self):
expected_jpace = {u'fields': [u'classification',
u'distal_length',
u'edge_num',
u'like_weight_ratio',
u'likelihood',
u'pendant_length'],
u'metadata': 'the_metadata',
u'placements':
[{
u'nm': [[u'CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG',
2]],
u'p': [[u'o__Bacillales',
0.0874346630859,
13,
0.333351177694,
-631.301684875,
0.150831104822],
[u'o__Bacillales',
0.0643521435547,
14,
0.333326655502,
-631.301758441,
0.15083915761],
[u'p__Firmicutes',
5.97534179688e-06,
15,
0.333322166804,
-631.301771907,
0.150839131805]]}],
u'tree': 'tree_thanks',
u'version': 3}
with tempdir.TempDir() as d:
cmd = "%s pipe --sequences %s --otu_table /dev/null --output_jplace %s"\
" --singlem_packages %s" % (
path_to_script,
os.path.join(path_to_data,'1_pipe','jplace_test.fna'),
os.path.join(d, "my_jplace"),
os.path.join(path_to_data,'4.12.22seqs.spkg'))
extern.run(cmd)
jplace_path = os.path.join(d, 'my_jplace_jplace_test_4.12.22seqs.jplace')
j = json.load(open(jplace_path))
j['tree'] = 'tree_thanks'
j['metadata'] = 'the_metadata'
self.assertEqual(expected_jpace, j)
# Make sure the guppy sing does not croak
extern.run("guppy sing -o /dev/null '%s'" % jplace_path)
开发者ID:wwood,项目名称:singlem,代码行数:49,代码来源:test_pipe.py
示例7: _create_dmnd_database
def _create_dmnd_database(self, unaligned_sequences_path, daa_output):
'''
Build a diamond database using diamond makedb
Parameters
----------
unaligned_sequences_path: str
path to a FASTA file containing unaligned sequences
daa_output: str
Name of output database.
'''
logging.debug("Building diamond database")
cmd = "diamond makedb --in '%s' -d '%s'" % (unaligned_sequences_path, daa_output)
extern.run(cmd)
开发者ID:geronimp,项目名称:graftM,代码行数:15,代码来源:create.py
示例8: global_search
def global_search(self, query_otu_table_collection,
subject_otu_table_collection, cluster_identity):
'''Search a query OTU table against a subject OTU table, yield over
UCEntry objects that have been modified so that the query
and subject are the relevant OtuTableEntry objects rather than
strings. Or they are None if there are no hits, since
--output_no_hits is used.
query_otu_table_collection: OtuTableCollection
subject_otu_table_collection: OtuTableCollection
cluster_identity: float or str
reject hits if have lower identity than this (implemented with vsearch --id).
'''
logging.info("Caching query OTUs")
query_otus = list(query_otu_table_collection)
logging.info("Caching target OTUs")
subject_otus = list(subject_otu_table_collection)
def name_to_index(name):
return int(string.split(name, ';')[0])
# write out fasta file numbered to corresponding to the OTU info
with tempfile.NamedTemporaryFile(prefix='singlem_q_for_vsearch') as query_f:
for i, u in enumerate(query_otus):
query_f.write(">%i;size=%i\n" % (i, u.count))
query_f.write(u.sequence.replace('-','')+"\n")
query_f.flush()
with tempfile.NamedTemporaryFile(prefix='singlem_db_for_vsearch') as db_f:
for i, u in enumerate(subject_otu_table_collection):
db_f.write(">%i;size=%i\n" % (i, u.count))
db_f.write(u.sequence.replace('-','')+"\n")
db_f.flush()
with tempfile.NamedTemporaryFile(prefix='singlem_uc') as uc:
command = "vsearch --usearch_global %s --db %s --uc %s --id %s --output_no_hits" % (query_f.name,
db_f.name,
uc.name,
str(cluster_identity))
logging.info("Running search")
extern.run(command)
logging.info("Finished running search")
with open(uc.name) as uc_read:
for uc_entry in UCFile(uc_read):
uc_entry.query = query_otus[name_to_index(uc_entry.query)]
if uc_entry.target is not None:
uc_entry.target = subject_otus[name_to_index(uc_entry.target)]
yield uc_entry
开发者ID:wwood,项目名称:singlem,代码行数:48,代码来源:sequence_searcher.py
示例9: test_cluster_across_samples_via_script
def test_cluster_across_samples_via_script(self):
e = [['gene','sample','sequence','num_hits','coverage','taxonomy'],
['4.11.ribosomal_protein_L10','minimal','TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACT','2','4.88','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus'],
['4.12.ribosomal_protein_L11_rplK','ma','TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACA','4','9.76','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales']
]
exp = "\n".join(["\t".join(x) for x in e]+[''])
with tempfile.NamedTemporaryFile(prefix='singlem_cluster') as f:
cmd = "%s summarise --cluster --cluster_id %f --input_otu_tables %s --output_otu_table /dev/stdout" % (
path_to_script, 58.5/60, f.name)
for l in ["\t".join(o) for o in e]:
f.write(l+"\n")
f.flush()
output = extern.run(cmd)
out_clusters = [o.split("\t") for o in output.split("\n")]
self.assertEqual(
[['gene', 'sample', 'sequence', 'num_hits', 'coverage', 'taxonomy'],
['4.12.ribosomal_protein_L11_rplK',
'ma',
'TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACA',
'4',
'9.76',
'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'],
['4.12.ribosomal_protein_L11_rplK',
'minimal',
'TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACA',
'2',
'4.88',
'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'],
['']],
out_clusters)
开发者ID:wwood,项目名称:singlem,代码行数:31,代码来源:test_clusterer.py
示例10: test_seqs_dna
def test_seqs_dna(self):
aln = '''>s1
ga-------------TATGGAGGAACACCAGTGGCGAAGGCGACTTTCTGGTCTGtaACTGACGCTGATGTG
>s2 asdas
ca---------GAGATATGGAGGAACACCAGTGGCGAAGGCGACTTTCTGGTCTGtaACTGACGCTGA----
>s3
ga-------------TATGGAGGAACACCAGTGGCGAAGGCGACTTTCTGGTCTGtaACTGGGCTGATGTG-
>d4
-g----------AGATATGGAGGAACACCAGTGGCGAAGGCGACTTTCTGGTCTGtaACTGACGCTGATG--
'''
expected = '''TATGGAGGAACACCAGTGGC
TATGGAGGAACACCAGTGGC
TATGGAGGAACACCAGTGGC
TATGGAGGAACACCAGTGGC
'''
with tempfile.NamedTemporaryFile() as a:
a.write(aln)
a.flush()
with tempfile.NamedTemporaryFile() as stderr:
cmd = "%s --debug seqs --alignment %s --alignment_type dna"\
" --window_size 20 2>%s" % (
path_to_script, a.name, stderr.name)
self.assertEqual('', extern.run(cmd))
# This includes ignored columns at the front, which were messing things up.
self.assertTrue('Found best section of the alignment starting from 14\n' in \
open(stderr.name).read())
开发者ID:wwood,项目名称:singlem,代码行数:26,代码来源:test_seqs.py
示例11: test_paired_reads_one_read_each_diamond_example
def test_paired_reads_one_read_each_diamond_example(self):
# Reads should be merged
expected = [
"\t".join(self.headers_with_extras),
'4.11.22seqs TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTA 2 4.88 2524614704 HWI-ST1243:156:D1K83ACXX:7:1106:18671:79482 seq2 60 60 False',
'']
inseqs = '''>HWI-ST1243:156:D1K83ACXX:7:1106:18671:79482 1:N:0:TAAGGCGACTAAGCCT
ATTAACAGTAGCTGAAGTTACTGACTTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTACGTCGTGCAGCTGAA
>seq2
AAAAAAAAAAAAAAAAA
'''
inseqs_reverse = '''>HWI-ST1243:156:D1K83ACXX:7:1106:18671:79482 1:N:0:TAAGGCGACTAAGCCT
AAAAAAAAAAAAAAAAA
>seq2
TTCAGCTGCACGACGTACCATAGTGTTTTTGTATACTTTATACTCAACACCAGCTTCACGTAATTGTGAACGTAAGTCAGTAACTTCAGCTACTGTTAAT
''' # reverse complement of the forward, so should collapse.
with tempfile.NamedTemporaryFile(suffix='.fa') as n:
n.write(inseqs)
n.flush()
with tempfile.NamedTemporaryFile(suffix='.fa') as n2:
n2.write(inseqs_reverse)
n2.flush()
cmd = "{} pipe --sequences {} --otu_table /dev/stdout --singlem_packages {} --reverse {} --output_extras --assignment_method diamond_example".format(
path_to_script,
n.name,
os.path.join(path_to_data,'4.11.22seqs.gpkg.spkg'),
n2.name)
self.assertEqualOtuTable(
list([line.split("\t") for line in expected]),
extern.run(cmd).replace(os.path.basename(n.name).replace('.fa',''),''))
开发者ID:wwood,项目名称:singlem,代码行数:31,代码来源:test_pipe.py
示例12: test_paired_reads_hello_world
def test_paired_reads_hello_world(self):
# Reads should be merged
expected = [
"\t".join(self.headers),
'4.11.22seqs TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTA 1 2.44 Root; d__Bacteria; p__Firmicutes',
'']
inseqs = '''>HWI-ST1243:156:D1K83ACXX:7:1106:18671:79482 1:N:0:TAAGGCGACTAAGCCT
ATTAACAGTAGCTGAAGTTACTGACTTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTACGTCGTGCAGCTGAA
'''
inseqs_reverse = '''>HWI-ST1243:156:D1K83ACXX:7:1106:18671:79482 1:N:0:TAAGGCGACTAAGCCT
TTCAGCTGCACGACGTACCATAGTGTTTTTGTATACTTTATACTCAACACCAGCTTCACGTAATTGTGAACGTAAGTCAGTAACTTCAGCTACTGTTAAT
''' # reverse complement of the forward, so should collapse.
with tempfile.NamedTemporaryFile(suffix='.fa') as n:
n.write(inseqs)
n.flush()
with tempfile.NamedTemporaryFile(suffix='.fa') as n2:
n2.write(inseqs_reverse)
n2.flush()
cmd = "{} pipe --sequences {} --otu_table /dev/stdout --singlem_packages {} --reverse {}".format(
path_to_script,
n.name,
os.path.join(path_to_data,'4.11.22seqs.gpkg.spkg'),
n2.name)
self.assertEqualOtuTable(
list([line.split("\t") for line in expected]),
extern.run(cmd).replace(os.path.basename(n.name).replace('.fa',''),''))
开发者ID:wwood,项目名称:singlem,代码行数:27,代码来源:test_pipe.py
示例13: test_known_sequence_taxonomy
def test_known_sequence_taxonomy(self):
expected = [
"\t".join(self.headers),
'4.11.22seqs TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTA 2 4.88 mytax; yeh',
'']
inseqs = '''>HWI-ST1243:156:D1K83ACXX:7:1106:18671:79482 1:N:0:TAAGGCGACTAAGCCT
ATTAACAGTAGCTGAAGTTACTGACTTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTACGTCGTGCAGCTGAA
>another
ATTAACAGTAGCTGAAGTTACTGACTTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTACGTCGTGCAGCTGAA
'''
with tempfile.NamedTemporaryFile(suffix='.fa') as n:
n.write(inseqs)
n.flush()
with tempfile.NamedTemporaryFile() as taxf:
taxf.write("HWI-ST1243:156:D1K83ACXX:7:1106:18671:79482\tmytax; yeh\n")
taxf.write("another\tmytax; yeh; 2\n")
taxf.flush()
cmd = "%s pipe --sequences %s --otu_table /dev/stdout --singlem_packages %s "\
"--no_assign_taxonomy --known_sequence_taxonomy %s"% (
path_to_script, n.name, os.path.join(path_to_data,'4.11.22seqs.gpkg.spkg'),
taxf.name)
self.assertEqual(expected,
extern.run(cmd).replace(
os.path.basename(n.name).replace('.fa',''),
'').split("\n"))
开发者ID:wwood,项目名称:singlem,代码行数:26,代码来源:test_pipe.py
示例14: test_two_nucleotide_packages
def test_two_nucleotide_packages(self):
expected = [
"\t".join(self.headers),
'61_otus.v3 GGAGGAACACCAGTGGCGAAGGCGACTTTCTGGTCTGACTGACGCTGATGTGCGAAAGCG 2 5.13 Root; k__Bacteria; p__Proteobacteria',
'61_otus.second.v3 TTAGGTAGTTGCTGGGGTAACGTCCCAACAAGCCGATAATCGGTACGGGTTGTGAGAGCA 1 1.66 Root; k__Archaea; p__Euryarchaeota',
'']
inseqs = '''>HWI-ST1243:156:D1K83ACXX:7:1105:6981:63483 1:N:0:AAGAGGCAAAGGAGTA
GATATGGAGGAACACCAGTGGCGAAGGCGACTTTCTGGTCTGTAACTGACGCTGATGTGCGAAAGCGTGGGGATCAAACAGGATTAGATACCCTGGTAGT
>HWI-ST1243:156:D1K83ACXX:7:1105:6981:63483_revcom
ACTACCAGGGTATCTAATCCTGTTTGATCCCCACGCTTTCGCACATCAGCGTCAGTTACAGACCAGAAAGTCGCCTTCGCCACTGGTGTTCCTCCATATC
>NS500333:10:H0V2GAGXX:2:13211:8623:16289 1:N:0:GATCAG
ATTAGGTAGTTGCTGGGGTAACGTCCCAACAAGCCGATAATCGGTACGGGTTGTGAGAGCAAGAGCCCGGAGATGGATTCTGAGACACGAATCCAGGTCCTACGGGGCGCAGCAGGCGCGAAAACTTTACACTGCGCGAAAGCGCGATA
'''
with tempfile.NamedTemporaryFile(suffix='.fa') as n:
n.write(inseqs)
n.flush()
cmd = "%s pipe --sequences %s --otu_table /dev/stdout --singlem_packages %s %s" % (
path_to_script,
n.name,
os.path.join(path_to_data,'61_otus.v3.gpkg.spkg'),
os.path.join(path_to_data,'second_packge.spkg'))
self.assertEqualOtuTable(
list([line.split("\t") for line in expected]),
extern.run(cmd).replace(os.path.basename(n.name).replace('.fa',''),''))
开发者ID:wwood,项目名称:singlem,代码行数:25,代码来源:test_pipe.py
示例15: summarise
def summarise(**kwargs):
'''Summarise an OTU table'''
krona_output_file = kwargs.pop('krona_output')
table_collection = kwargs.pop('table_collection')
if len(kwargs) > 0:
raise Exception("Unexpected arguments detected: %s" % kwargs)
# prep the array
gene_to_sample_to_taxonomy_to_count = Summariser._collapse_otu_table_into_gene_to_sample_to_taxonomy_to_count(table_collection)
# write the output krona files
sample_name_to_tempfile = OrderedDict()
logging.info("Writing krona %s" % krona_output_file)
cmd = 'ktImportText -o %s' % krona_output_file
sample_tempfiles = []
sample_to_gene_to_taxonomy_to_count = {}
all_sample_names = set()
all_gene_names = set()
for gene, sample_to_taxonomy_to_count in gene_to_sample_to_taxonomy_to_count.items():
all_gene_names.add(gene)
for sample, taxonomy_to_count in sample_to_taxonomy_to_count.items():
all_sample_names.add(sample)
if sample not in sample_to_gene_to_taxonomy_to_count:
sample_to_gene_to_taxonomy_to_count[sample] = {}
sample_to_gene_to_taxonomy_to_count[sample][gene] = taxonomy_to_count
is_more_than_one_sample = len(sample_to_gene_to_taxonomy_to_count) > 1
for sample in sorted(all_sample_names):
for gene in sorted(all_gene_names):
if gene in sample_to_gene_to_taxonomy_to_count[sample]:
f = tempfile.NamedTemporaryFile(prefix='singlem_for_krona')
sample_tempfiles.append(f)
taxonomy_to_count = sample_to_gene_to_taxonomy_to_count[sample][gene]
for taxonomy, coverage in taxonomy_to_count.iteritems():
tax_split = taxonomy.split('; ')
if tax_split[0] == 'Root' and len(tax_split) > 1: tax_split = tax_split[1:]
f.write('\t'.join([str(coverage)]+tax_split))
f.write('\n')
f.flush()
if is_more_than_one_sample:
display_name = '%s: %s' % (sample, gene)
else:
display_name = gene
cmd += " %s,'%s'" % (f.name, display_name)
extern.run(cmd)
for f in sample_tempfiles:
f.close()
开发者ID:wwood,项目名称:singlem,代码行数:47,代码来源:summariser.py
示例16: test_print_insert
def test_print_insert(self):
expected = [self.headers,['S1.5.ribosomal_protein_L11_rplK','insert','CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG','1','2.44','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'],
['S1.5.ribosomal_protein_L11_rplK','insert','CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTtttCAAGCAGGTGTG','1','2.51','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales']]
exp = sorted(["\t".join(x) for x in expected]+[''])
cmd = "%s --debug pipe --sequences %s/1_pipe/insert.fna --otu_table /dev/stdout --threads 4 --include_inserts" % (path_to_script,
path_to_data)
self.assertEqual(exp, sorted(extern.run(cmd).split("\n")))
开发者ID:wwood,项目名称:singlem,代码行数:8,代码来源:test_pipe.py
示例17: _build_tree
def _build_tree(self, alignment, base, ptype, fasttree):
log_file = base + ".tre.log"
tre_file = base + ".tre"
if ptype == Create._NUCLEOTIDE_PACKAGE_TYPE: # If it's a nucleotide sequence
cmd = "%s -quiet -gtr -nt -log %s -out %s %s" % (fasttree,
log_file,
tre_file,
alignment)
extern.run(cmd)
else: # Or if its an amino acid sequence
cmd = "%s -quiet -log %s -out %s %s" % (fasttree,
log_file,
tre_file,
alignment)
extern.run(cmd)
self.the_trash += [log_file, tre_file]
return log_file, tre_file
开发者ID:geronimp,项目名称:graftM,代码行数:18,代码来源:create.py
示例18: test_get_tree_default
def test_get_tree_default(self):
cmd = "{} get_tree".format(path_to_script)
observed = extern.run(cmd)
splits = observed.split('\n')
self.assertEqual('marker\ttree_file', splits[0])
self.assertEqual('.tre',splits[1][-4:])
self.assertGreater(len(splits), 10)
for line in splits[1:-1]:
self.assertTrue(os.path.exists(line.split('\t')[1]))
开发者ID:wwood,项目名称:singlem,代码行数:9,代码来源:test_summariser.py
示例19: _generate_tree_log_file
def _generate_tree_log_file(self, tree, alignment, output_tree_file_path,
output_log_file_path, residue_type, fasttree):
'''Generate the FastTree log file given a tree and the alignment that
made that tree
Returns
-------
Nothing. The log file as parameter is written as the log file.
'''
if residue_type==Create._NUCLEOTIDE_PACKAGE_TYPE:
cmd = "%s -quiet -gtr -nt -nome -mllen -intree '%s' -log %s -out %s %s" %\
(fasttree, tree, output_log_file_path,
output_tree_file_path, alignment)
elif residue_type==Create._PROTEIN_PACKAGE_TYPE:
cmd = "%s -quiet -nome -mllen -intree '%s' -log %s -out %s %s" %\
(fasttree, tree, output_log_file_path,
output_tree_file_path, alignment)
extern.run(cmd)
开发者ID:geronimp,项目名称:graftM,代码行数:18,代码来源:create.py
示例20: test_dump
def test_dump(self):
expected = """gene sample sequence num_hits coverage taxonomy
ribosomal_protein_L11_rplK_gpkg minimal GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC 7 15.1 Root; k__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales
ribosomal_protein_S2_rpsB_gpkg minimal CGTCGTTGGAACCCAAAAATGAAAAAATATATCTTCACTGAGAGAAATGGTATTTATATC 6 12.4 Root; k__Bacteria; p__Firmicutes; c__Bacilli
ribosomal_protein_S17_gpkg minimal GCTAAATTAGGAGACATTGTTAAAATTCAAGAAACTCGTCCTTTATCAGCAACAAAACGT 9 19.5 Root; k__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus"""
cmd = "{} query --db {}/a.sdb --dump".format(
path_to_script, path_to_data)
self.assertEqualOtuTable(
list([line.split("\t") for line in expected.split("\n")]),
extern.run(cmd))
开发者ID:wwood,项目名称:singlem,代码行数:10,代码来源:test_makedb_and_query.py
注:本文中的extern.run函数示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论