diff --git a/tools/kneaddata/.shed.yml b/tools/kneaddata/.shed.yml new file mode 100644 index 00000000000..69d3fc604a0 --- /dev/null +++ b/tools/kneaddata/.shed.yml @@ -0,0 +1,15 @@ +name: kneaddata +owner: iuc +type: unrestricted +description: Quality control and contaminant removal for metagenomic data +long_description: | + KneadData is a tool designed to perform quality control on + metagenomic and metatranscriptomic sequencing data, especially + data from microbiome experiments. It performs adapter trimming, + quality filtering, and removal of host contamination using + Bowtie2, TRIMMOMATIC and TRF. +homepage_url: https://github.com/biobakery/kneaddata +remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/main/tools/kneaddata +categories: + - Metagenomics + - Sequence Analysis diff --git a/tools/kneaddata/kneaddata.xml b/tools/kneaddata/kneaddata.xml new file mode 100644 index 00000000000..8cc8bcead33 --- /dev/null +++ b/tools/kneaddata/kneaddata.xml @@ -0,0 +1,505 @@ + + Quality control and contaminant removal for metagenomic data + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ + + + + + + + + + + + + + + + + +
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ + + + + + + + + +
+ + + + + + + + + + + + + + + + + +
+
+ +
+
+ + + + + + + + + + + + + + + + + + +
+ + + + read_type["select_read_type"] == "single" and tandem["trf_step"]["trf_bool"] == "include" and not cat_final_output + + + read_type["select_read_type"] == "single" and tandem["trf_step"]["trf_bool"] == "skip" and not cat_final_output + + + + + read_type["select_read_type"] == "single" and store_temp and alignment["alignment_tool"]["tool_choice"] == "bowtie2" + + + read_type["select_read_type"] == "single" and store_temp and alignment["alignment_tool"]["tool_choice"] == "bmtagger" + + + read_type["select_read_type"] == "single" and alignment["alignment_tool"]["tool_choice"] == "bowtie2" + + + read_type["select_read_type"] == "single" and alignment["alignment_tool"]["tool_choice"] == "bmtagger" + + + + + read_type["select_read_type"] == "paired" and tandem["trf_step"]["trf_bool"] == "include" and not cat_final_output + + + read_type["select_read_type"] == "paired" and tandem["trf_step"]["trf_bool"] == "skip" and not cat_final_output + + + read_type["select_read_type"] == "paired" and tandem["trf_step"]["trf_bool"] == "include" and not cat_final_output + + + read_type["select_read_type"] == "paired" and tandem["trf_step"]["trf_bool"] == "skip" and not cat_final_output + + + + + read_type["select_read_type"] == "paired" and not cat_final_output + + + read_type["select_read_type"] == "paired" and not cat_final_output + + + + + read_type["select_read_type"] == "paired" and store_temp and alignment["alignment_tool"]["tool_choice"] == "bowtie2" and not cat_final_output + + + read_type["select_read_type"] == "paired" and store_temp and alignment["alignment_tool"]["tool_choice"] == "bowtie2" and not cat_final_output + + + + + read_type["select_read_type"] == "paired" and alignment["alignment_tool"]["tool_choice"] == "bowtie2" and not cat_final_output + + + read_type["select_read_type"] == "paired" and alignment["alignment_tool"]["tool_choice"] == "bowtie2" and not cat_final_output + + + + + read_type["select_read_type"] == "paired" and store_temp and alignment["alignment_tool"]["tool_choice"] == "bmtagger" and not cat_final_output + + + read_type["select_read_type"] == "paired" and store_temp and alignment["alignment_tool"]["tool_choice"] == "bmtagger" and not cat_final_output + + + read_type["select_read_type"] == "paired" and alignment["alignment_tool"]["tool_choice"] == "bmtagger" and not cat_final_output + + + read_type["select_read_type"] == "paired" and alignment["alignment_tool"]["tool_choice"] == "bmtagger" and not cat_final_output + + + + + read_type["select_read_type"] == "paired" and store_temp and alignment["alignment_tool"]["tool_choice"] == "bowtie2" and not cat_final_output + + + read_type["select_read_type"] == "paired" and store_temp and alignment["alignment_tool"]["tool_choice"] == "bowtie2" and not cat_final_output + + + + + read_type["select_read_type"] == "paired" and alignment["alignment_tool"]["tool_choice"] == "bowtie2" and not cat_final_output + + + read_type["select_read_type"] == "paired" and alignment["alignment_tool"]["tool_choice"] == "bowtie2" and not cat_final_output + + + + + read_type["select_read_type"] == "paired" and store_temp and alignment["alignment_tool"]["tool_choice"] == "bmtagger" and not cat_final_output + + + read_type["select_read_type"] == "paired" and store_temp and alignment["alignment_tool"]["tool_choice"] == "bmtagger" and not cat_final_output + + + read_type["select_read_type"] == "paired" and alignment["alignment_tool"]["tool_choice"] == "bmtagger" and not cat_final_output + + + read_type["select_read_type"] == "paired" and alignment["alignment_tool"]["tool_choice"] == "bmtagger" and not cat_final_output + + + + + + + read_type["select_read_type"] == "paired" and cat_final_output + + + + read_type["select_read_type"] == "single" and cat_final_output + + + + + + + + + + + + + +
+ + + + + + + + + + +
+ +
+ + + +
+ +
+ + + +
+ +
+ + + + +
+ + + + + +
+
+ + + + + @software{kneaddata, + title = {KneadData}, + author = {Harvard School of Public Health}, + year = {2015}, + url = {https://github.com/biobakery/kneaddata}, + license = {MIT}, + note = {Quality control and contaminant removal tool for metagenomic sequencing data} + } + +
diff --git a/tools/kneaddata/macros.xml b/tools/kneaddata/macros.xml new file mode 100644 index 00000000000..dc04f4824e0 --- /dev/null +++ b/tools/kneaddata/macros.xml @@ -0,0 +1,14 @@ + + + 0.12.1 + 0 + 21.05 + + + + kneaddata + trf + bmtagger + + + diff --git a/tools/kneaddata/test-data/bowtie2_indices.loc b/tools/kneaddata/test-data/bowtie2_indices.loc new file mode 100644 index 00000000000..4d8f1c77d1e --- /dev/null +++ b/tools/kneaddata/test-data/bowtie2_indices.loc @@ -0,0 +1,38 @@ +# bowtie2_indices.loc.sample +# This is a *.loc.sample file distributed with Galaxy that enables tools +# to use a directory of indexed data files. This one is for Bowtie2 and Tophat2. +# See the wiki: http://wiki.galaxyproject.org/Admin/NGS%20Local%20Setup +# First create these data files and save them in your own data directory structure. +# Then, create a bowtie_indices.loc file to use those indexes with tools. +# Copy this file, save it with the same name (minus the .sample), +# follow the format examples, and store the result in this directory. +# The file should include an one line entry for each index set. +# The path points to the "basename" for the set, not a specific file. +# It has four text columns seperated by TABS. +# +# +# +# So, for example, if you had hg18 indexes stored in: +# +# /depot/data2/galaxy/hg19/bowtie2/ +# +# containing hg19 genome and hg19.*.bt2 files, such as: +# -rw-rw-r-- 1 james james 914M Feb 10 18:56 hg19canon.fa +# -rw-rw-r-- 1 james james 914M Feb 10 18:56 hg19canon.1.bt2 +# -rw-rw-r-- 1 james james 683M Feb 10 18:56 hg19canon.2.bt2 +# -rw-rw-r-- 1 james james 3.3K Feb 10 16:54 hg19canon.3.bt2 +# -rw-rw-r-- 1 james james 683M Feb 10 16:54 hg19canon.4.bt2 +# -rw-rw-r-- 1 james james 914M Feb 10 20:45 hg19canon.rev.1.bt2 +# -rw-rw-r-- 1 james james 683M Feb 10 20:45 hg19canon.rev.2.bt2 +# +# then the bowtie2_indices.loc entry could look like this: +# +#hg19 hg19 Human (hg19) /depot/data2/galaxy/hg19/bowtie2/hg19canon +# +#More examples: +# +#mm10 mm10 Mouse (mm10) /depot/data2/galaxy/mm10/bowtie2/mm10 +#dm3 dm3 D. melanogaster (dm3) /depot/data2/galaxy/mm10/bowtie2/dm3 +# +# +test_value test_dbkey test_name ${__HERE__}/bowtie2-ref \ No newline at end of file diff --git a/tools/kneaddata/test-data/kneaddata.log b/tools/kneaddata/test-data/kneaddata.log new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tools/kneaddata/test-data/test_paired_1.fastq b/tools/kneaddata/test-data/test_paired_1.fastq new file mode 100644 index 00000000000..3370d2b3700 --- /dev/null +++ b/tools/kneaddata/test-data/test_paired_1.fastq @@ -0,0 +1,4 @@ +@test1 +ACGTACGT ++ +IIIIIIII diff --git a/tools/kneaddata/test-data/test_paired_2.fastq b/tools/kneaddata/test-data/test_paired_2.fastq new file mode 100644 index 00000000000..61bf5ac7124 --- /dev/null +++ b/tools/kneaddata/test-data/test_paired_2.fastq @@ -0,0 +1,4 @@ +@test1 +TGCTAGCT ++ +IIIIIIII diff --git a/tools/kneaddata/test-data/test_single.fastq b/tools/kneaddata/test-data/test_single.fastq new file mode 100644 index 00000000000..aca0066135d --- /dev/null +++ b/tools/kneaddata/test-data/test_single.fastq @@ -0,0 +1,8 @@ +@test1 +ACGTACGT ++ +IIIIIIII +@test2 +TGCTAGCT ++ +IIIIIIII diff --git a/tools/kneaddata/test-data/tiny.fa b/tools/kneaddata/test-data/tiny.fa new file mode 100644 index 00000000000..b948f02f97f --- /dev/null +++ b/tools/kneaddata/test-data/tiny.fa @@ -0,0 +1,2 @@ +>chr1 +ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT diff --git a/tools/kneaddata/test-data/tiny_test.fastq b/tools/kneaddata/test-data/tiny_test.fastq new file mode 100644 index 00000000000..fdd2055ed5b --- /dev/null +++ b/tools/kneaddata/test-data/tiny_test.fastq @@ -0,0 +1,12 @@ +@test_read_1 +ACGTACGTACGTACGT ++ +FFFFFFFFFFFFFFFF +@test_read_2 +TGCAATGCATGCATGC ++ +FFFFFFFFFFFFFFFF +@test_read_3 +CGATCGATCGATCGAT ++ +FFFFFFFFFFFFFFFF diff --git a/tools/kneaddata/tool-data/bowtie2_indices.loc.sample b/tools/kneaddata/tool-data/bowtie2_indices.loc.sample new file mode 100644 index 00000000000..9ad57953fcb --- /dev/null +++ b/tools/kneaddata/tool-data/bowtie2_indices.loc.sample @@ -0,0 +1,35 @@ +# bowtie2_indices.loc.sample +# This is a *.loc.sample file distributed with Galaxy that enables tools +# to use a directory of indexed data files. This one is for Bowtie2 and Tophat2. +# See the wiki: http://wiki.galaxyproject.org/Admin/NGS%20Local%20Setup +# First create these data files and save them in your own data directory structure. +# Then, create a bowtie_indices.loc file to use those indexes with tools. +# Copy this file, save it with the same name (minus the .sample), +# follow the format examples, and store the result in this directory. +# The file should include an one line entry for each index set. +# The path points to the "basename" for the set, not a specific file. +# It has four text columns seperated by TABS. +# +# +# +# So, for example, if you had hg18 indexes stored in: +# +# /depot/data2/galaxy/hg19/bowtie2/ +# +# containing hg19 genome and hg19.*.bt2 files, such as: +# -rw-rw-r-- 1 james james 914M Feb 10 18:56 hg19canon.fa +# -rw-rw-r-- 1 james james 914M Feb 10 18:56 hg19canon.1.bt2 +# -rw-rw-r-- 1 james james 683M Feb 10 18:56 hg19canon.2.bt2 +# -rw-rw-r-- 1 james james 3.3K Feb 10 16:54 hg19canon.3.bt2 +# -rw-rw-r-- 1 james james 683M Feb 10 16:54 hg19canon.4.bt2 +# -rw-rw-r-- 1 james james 914M Feb 10 20:45 hg19canon.rev.1.bt2 +# -rw-rw-r-- 1 james james 683M Feb 10 20:45 hg19canon.rev.2.bt2 +# +# then the bowtie2_indices.loc entry could look like this: +# +#hg19 hg19 Human (hg19) /depot/data2/galaxy/hg19/bowtie2/hg19canon +# +#More examples: +# +#mm10 mm10 Mouse (mm10) /depot/data2/galaxy/mm10/bowtie2/mm10 +#dm3 dm3 D. melanogaster (dm3) /depot/data2/galaxy/mm10/bowtie2/dm3 diff --git a/tools/kneaddata/tool_data_table_conf.xml.sample b/tools/kneaddata/tool_data_table_conf.xml.sample new file mode 100644 index 00000000000..7a775c577f5 --- /dev/null +++ b/tools/kneaddata/tool_data_table_conf.xml.sample @@ -0,0 +1,8 @@ + + + + + value, dbkey, name, path + +
+
\ No newline at end of file diff --git a/tools/kneaddata/tool_data_table_conf.xml.test b/tools/kneaddata/tool_data_table_conf.xml.test new file mode 100644 index 00000000000..a7d6738c943 --- /dev/null +++ b/tools/kneaddata/tool_data_table_conf.xml.test @@ -0,0 +1,8 @@ + + + + + value, dbkey, name, path + +
+
\ No newline at end of file