使用kraken2软件做宏基因组或扩增子项目物种注释项目的时候,如果后续不使用bracken则可能会遇到一个问题,结果生成的biom文件中taxonomy并不是最终结果,比如下面这样
可以用脚本来处理,删除未具体命名的分类级别
import argparse from biom import load_table, Table import numpy as np def main(): # Parse command-line arguments parser = argparse.ArgumentParser() parser.add_argument('-i', '--input', help='Input biom file', required=True) parser.add_argument('-o', '--output', help='Output biom file', required=True) args = parser.parse_args() # Load the BIOM table table = load_table(args.input) # Get the OTU IDs and the data otu_ids = table.ids(axis='observation') data = table.matrix_data # Get the taxonomy metadata metadata = table.metadata(axis='observation') new_metadata = [] for otu_id, meta in zip(otu_ids, metadata): if meta is not None and 'taxonomy' in meta: # Filter out unnamed taxonomic ranks taxonomy = [level for level in meta['taxonomy'] if not level.endswith('__')] meta['taxonomy'] = taxonomy new_metadata.append(meta) # Create a new BIOM table with the new metadata new_table = Table(data, otu_ids, table.ids(), new_metadata, table.metadata()) # Write the new BIOM table to a file with open(args.output, 'w') as f: f.write(new_table.to_json("Filtered Taxonomy")) if __name__ == "__main__": main()
如果觉得我的文章对您有用,请随意打赏。你的支持将鼓励我继续创作!