diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml deleted file mode 100644 index e33684752cbbc526116586c8c4cdf9db7e20c286..0000000000000000000000000000000000000000 --- a/.gitlab-ci.yml +++ /dev/null @@ -1,40 +0,0 @@ -# recipe for building singularity image and deploy it on the registery for template -image: - name: nextflow/nextflow - entrypoint: [""] - -stages: - - build - - deploy - - test - -push: - stage: test - script: - - nextflow run ./main.nf - - nextflow run ./main.nf --help - -# Build Singularity container bwa_v0.7.17.sif -singularity-image: - image: quay.io/singularity/singularity:v3.4.0 - stage: build - script: - - singularity build template.sif Singularityfile - artifacts: - paths: - - template.sif - only: - changes: - - Singularityfile - - environment.yml - -# Push the image template.sif on the registry -deploy: - image: quay.io/singularity/singularity:v3.4.0 - stage: deploy - script: - - singularity push --docker-username "${CI_REGISTRY_USER}" --docker-password "${CI_REGISTRY_PASSWORD}" template.sif oras://"$CI_REGISTRY_IMAGE"/"$CI_PROJECT_NAME":"$CI_COMMIT_TAG" - only: - changes: - - Singularityfile - - environment.yml diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index 167ec9d148022db6d782c4e7309534200375145f..0000000000000000000000000000000000000000 --- a/Dockerfile +++ /dev/null @@ -1,7 +0,0 @@ -FROM nfcore/base:1.7 -LABEL authors="Céline Noirot" \ - description="Docker image containing all requirements for get/template pipeline" - -COPY environment.yml / -RUN conda env create -f /environment.yml && conda clean -a -ENV PATH /opt/conda/envs/GeT-template-1.0dev/bin:$PATH diff --git a/assets/multiqc_config.yaml b/assets/multiqc_config.yaml index b5f8ae5fe9499d60c0fe97ba40523ebfbf464d8b..75a0989baa999e00600a60420d1c27afc1c065bf 100644 --- a/assets/multiqc_config.yaml +++ b/assets/multiqc_config.yaml @@ -11,14 +11,14 @@ report_comment: > show_analysis_paths: False show_analysis_time: False - +disable_version_detection: true ## Number formatting thousandsSep_format: " " ## General Statistics table table_columns_visible: - Duplicats: False - ContaminationSearch - RNA: True + Duplicats: True + ContaminationSearch - rRNA: True samtools: False ReadsStats: percent_duplicates: False @@ -43,7 +43,7 @@ extra_fn_clean_exts: - "_screen" ## Plot config -export_plots: true +export_plots: false plots_force_interactive: true ## Module config @@ -88,7 +88,7 @@ module_order: # Pattern sp: - fastqc: + fastqc/zip: fn: "*.zip" fastq_screen: fn: '*_screen.txt' diff --git a/conf/base.config b/conf/base.config index 465f3617a3bf15ea5058a6ed4ebc10460dd0bde8..965385ff158a61077e2873867b84146b142782c4 100644 --- a/conf/base.config +++ b/conf/base.config @@ -69,6 +69,7 @@ process { saveAs: { filename -> "${name}.fastq.gz" }*/ ] + ext.analyse_type = params.read_stats_label module = toolsModuleHash['ILLUMINA_FILTER'] cpus = { checkMax( 3 * task.attempt, 'cpus' ) } time = { checkMax( 4.h * task.attempt, 'time' ) } @@ -82,7 +83,7 @@ process { ] ext.args = "--reads_to_process ${params.fastp_n_reads}" - + ext.analyse_type = params.duplicats_label module = toolsModuleHash['FASTP'] time = { checkMax( 5.h * task.attempt, 'time' ) } memory = { checkMax( 3.GB * task.attempt, 'memory' ) } @@ -103,6 +104,7 @@ process { saveAs: { filename -> "${name}.html" } ] + ext.analyse_type = params.read_stats_label module = toolsModuleHash['FASTQC'] maxRetries = 4 cpus = { checkMax( 2 * task.attempt, 'cpus' ) } @@ -112,6 +114,7 @@ process { withName: FASTQSCREEN { time = { checkMax( 1.h * task.attempt, 'time' ) } module = toolsModuleHash['FASTQSCREEN'] + ext.analyse_type = params.contamination_search_label publishDir = [ path: "${params.outdir}/ContaminationSearch/FastQ-Screen", @@ -125,6 +128,8 @@ process { cpus = { checkMax( 6 * task.attempt, 'cpus' ) } memory = { checkMax( 16.GB * task.attempt, 'memory' ) } time = { checkMax( 3.d * task.attempt, 'time' ) } + + ext.analyse_type = params.alignment_stats_label publishDir = [ path: "${params.outdir}/alignment/bwa", @@ -137,7 +142,8 @@ process { module = toolsModuleHash['SALMON'] time = { checkMax( 1.h * task.attempt, 'time' ) } memory = { checkMax( 3.GB * task.attempt, 'memory' ) } - cpus = 8 + cpus = 8 + ext.analyse_type = params.alignment_stats_label } withName: SALMON_QUANT { @@ -145,6 +151,7 @@ process { time = { checkMax( 1.h * task.attempt, 'time' ) } memory = { checkMax( 10.GB * task.attempt, 'memory' ) } cpus = 8 + ext.analyse_type = params.alignment_stats_label publishDir = [ path: "${params.outdir}/alignmentStats", @@ -157,6 +164,7 @@ process { module = toolsModuleHash['STAR'] memory = { checkMax( 50.GB * task.attempt, 'memory' ) } cpus = 8 + ext.analyse_type = params.alignment_stats_label } withName: STAR_ALIGN { @@ -164,6 +172,7 @@ process { memory = { checkMax( 20.GB * task.attempt, 'memory' ) } cpus = 2 time = { checkMax( 1.d * task.attempt, 'memory' ) } + ext.analyse_type = params.alignment_stats_label publishDir = [ path: "${params.outdir}/alignmentStats", @@ -184,6 +193,7 @@ process { "-m ${params.min_overlap}", "-M ${params.max_overlap}" ].join(' ') + ext.analyse_type = params.join_pairs_label publishDir = [ path: "${params.outdir}/joinPair", @@ -197,6 +207,7 @@ process { time = { checkMax( 5.h * task.attempt, 'time' ) } memory = { checkMax( 2.GB * task.attempt, 'memory' ) } cpus = 4 + ext.analyse_type = params.join_pairs_label ext.args = [ "-max_target_seqs ${params.blast_max_target}", @@ -324,6 +335,10 @@ process { module = toolsModuleHash['SEQTK'] } + withName: ADD_MULTIQC { + errorStrategy = 'ignore' + } + withName: MULTIQC { ext.args = [ "--config ${baseDir}/assets/multiqc_config.yaml", @@ -344,9 +359,11 @@ process { withName: SORTMERNA { module = toolsModuleHash['SORTMERNA'] - memory = { checkMax( 10.GB * task.attempt * params.resource_factor, 'memory' ) } - time = { checkMax( 10.h * task.attempt, 'time' ) } - cpus = { checkMax( 1 * task.attempt, 'cpus' ) } + memory = { checkMax( 30.GB * task.attempt * params.resource_factor, 'memory' ) } + time = { checkMax( 3.h * task.attempt, 'time' ) } + cpus = { checkMax( 28 * task.attempt, 'cpus' ) } + + ext.analyse_type = params.contamination_search_label publishDir = [ path: "${params.outdir}/rRNA", @@ -370,6 +387,8 @@ process { memory = { checkMax( 30.GB * task.attempt * params.resource_factor, 'memory' ) } time = { checkMax( 3.h * task.attempt, 'time' ) } + ext.analyse_type = params.alignment_stats_label + publishDir = [ path: "${params.outdir}/alignmentStats/qualimap", mode: 'copy', diff --git a/conf/dependencies_genobioinfo.config b/conf/dependencies_genobioinfo.config index b715b7a99798c50ea7d6d49776c65603a14b6ffd..6719d125df26fb45a759676be8da66ee4e1e633b 100644 --- a/conf/dependencies_genobioinfo.config +++ b/conf/dependencies_genobioinfo.config @@ -25,7 +25,7 @@ toolsModuleHash['BLAST'] = ['bioinfo/NCBI_Blast+/2.10.0+'] // SHARED MODULES //========================================= toolsModuleHash['SEQTK'] = ['bioinfo/Seqtk/1.3'] -toolsModuleHash['MULTIQC'] = ['bioinfo/MultiQC/1.14'] +toolsModuleHash['MULTIQC'] = ['bioinfo/MultiQC/1.24.1'] toolsModuleHash['SORTMERNA'] = ['bioinfo/SortMeRNA/4.3.6'] // version upgraded face to genologin toolsModuleHash['QUALIMAP'] = ['bioinfo/Qualimap/31-08-20'] toolsModuleHash['KRONA'] = ['bioinfo/Krona/2.8.1'] // version upgraded face to genologin diff --git a/conf/functions.config b/conf/functions.config index e4ff01764a59115c40dbd79988e86225b7bda087..7c8b7222e99e700a0bece3f1760e62532a2dbc6d 100644 --- a/conf/functions.config +++ b/conf/functions.config @@ -1,3 +1,5 @@ +import org.yaml.snakeyaml.Yaml + def helpMessage() { log.info""" @@ -208,4 +210,54 @@ def sendFinalMail(formatted_date, summary) { output_tf.withWriter { w -> w << email_txt } return mail_sent +} + +// +// [nf-core] Generate workflow version string +// +def getWorkflowVersion() { + String version_string = "" + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + return version_string +} + +// +// inspired from [nf-core] Get software versions for pipeline +// +def processVersionsFromYAML(yaml_file) { + Yaml yaml = new Yaml() + + versions = yaml.load(yaml_file).collectEntries { k, v -> + if (v != null) { + return [k.tokenize(':')[0], v] + } + } + + return yaml.dumpAsMap(versions).trim() +} + +// +// [nf-core] Get workflow version for pipeline +// +def workflowVersionToYAML() { + // Workflow: + return """ + Workflow - $workflow.manifest.name: ${getWorkflowVersion()} + Workflow - Nextflow: $workflow.nextflow.version + """.stripIndent().trim() +} + +// +// [nf-core] Get channel of software versions used in pipeline in YAML format +// +def softwareVersionsToYAML(ch_versions) { + return ch_versions + .unique() + .map { processVersionsFromYAML(it) } + .unique() + .mix(Channel.of(workflowVersionToYAML())) } \ No newline at end of file diff --git a/conf/prod.config b/conf/prod.config index 4f9b19ca1801ba4bb66a63f1bc27af11b3765bf0..eb1bd1aed87b1333340d1df8fb7fdd16c7f15f14 100644 --- a/conf/prod.config +++ b/conf/prod.config @@ -7,7 +7,7 @@ process { publishDir = [ path: "${params.outdir}/ngl", mode: 'copy', - pattern: "*.{log,created}" + pattern: "*.{log,created,existing}" ] } } \ No newline at end of file diff --git a/conf/report.config b/conf/report.config index 8a5bb71f4e7b677905e88fffd3497e831e1ae46c..5ee89825f8c3a15fbf28d5fbd7d6bddded4b2819 100644 --- a/conf/report.config +++ b/conf/report.config @@ -29,5 +29,5 @@ manifest { description = "Workflow for Illumina data quality control" mainScript = 'main.nf' nextflowVersion = '>=0.32.0' - version = '1.19.0' + version = '1.23.0' } \ No newline at end of file diff --git a/conf/test.config b/conf/test.config index 4294ca1ed794e72803f424da75cf4683a35e2424..c2de5cd82a96a9727cf3be70bf9cb5b8332f4b2f 100644 --- a/conf/test.config +++ b/conf/test.config @@ -14,7 +14,7 @@ process { publishDir = [ path: "${params.outdir}/ngl", mode: 'copy', - pattern: "*.{log,created}" + pattern: "*.{log,created,existing}" ] } diff --git a/docs/usage.md b/docs/usage.md index 6bf72b35237374a0a7ffb10ef770a2fa05467739..7831e2c9b7178cd4dea96ad7ad69e0f02275f0ad 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -64,7 +64,7 @@ _Default_ : null - **`--host`** [str] The name of the server on which the pipeline is launched. This value is used to select slurm modules to load. -_Default_ : genologin +_Default_ : genobioinfo - **`--shared_modules`** [str] Path to the shared_modules sources. This is nextflow modules shared between several pipelines. @@ -195,7 +195,7 @@ _Default_ : 0.1 - **`--assignation_databank`** [str] Path to the databank for taxonomic assignment. -_Default_ : null +_Default_ : /save/ng6/TODO/HiSeqIndexedGenomes/new_struct/ncbi_16S/240319_release/16SMicrobial - **`--blast_outfmt`** [int] BLAST output format. diff --git a/main.nf b/main.nf index 639c4f5a01690b62fcbcdcef3b0c13d71d5edc14..403cbe455b1413815fb5266f9fbeb5bd4fa88dbb 100644 --- a/main.nf +++ b/main.nf @@ -32,7 +32,7 @@ params.summary.collect{k,v -> println "$k : $v"} NAMED WORKFLOW FOR PIPELINE ======================================================================================== */ -include { SHORT_READS_QC } from "$baseDir/workflow/illumina_qc.nf" +include { SHORT_READS_QC } from "$baseDir/workflow/short_reads_qc.nf" workflow PLAGE { SHORT_READS_QC() diff --git a/modules/local/module_NGL-Bi.nf b/modules/local/module_NGL-Bi.nf index c32681ffaf350e4c8e0584cd462683b517e1f859..995ebf53d245dd517f6de6b3dfd0d88a26c4a219 100644 --- a/modules/local/module_NGL-Bi.nf +++ b/modules/local/module_NGL-Bi.nf @@ -26,6 +26,7 @@ process TREATMENT_DEMUXSTAT_ILLUMINA { input: val nglCode path csvFile + val lane output: path("*.log") @@ -34,7 +35,7 @@ process TREATMENT_DEMUXSTAT_ILLUMINA { script: def args = task.ext.args ?: '' forceOption = workflow.resume ? "--force" : '' - def lane = params.lane ?: '0' + def level = lane ? "run_${lane}" : 'readsets' """ perl ${params.ngl_bi_client}/GeT/perl/illumina/createNGL-BiTreatmentDemultiplexStat.pl \\ --code $nglCode \\ diff --git a/modules/local/module_core.nf b/modules/local/module_core.nf index 43e812821ece5b64290b9b5899eb1b3106015220..be422b7272cc4b22fc6dea606a59338878b4e0f6 100644 --- a/modules/local/module_core.nf +++ b/modules/local/module_core.nf @@ -46,11 +46,17 @@ process FASTQC { output: tuple val(name), path("*_fastqc.html") , emit: html tuple val(name), path("*_fastqc.zip") , emit: zip + path("versions.yml") , emit: versions // path log files script: + def analyse_type = task.ext.analyse_type ?: params.default_label """ fastqc -t $task.cpus --nogroup --noextract --outdir ./ ${read} + + cat <<-END_VERSIONS > versions.yml + ${analyse_type} - fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) + END_VERSIONS """ } @@ -64,12 +70,18 @@ process ILLUMINA_FILTER { output: tuple val("$name"), path("*.fastq.gz"), emit: reads path("*.output"), emit: log + path("versions.yml") , emit: versions script: + def analyse_type = task.ext.analyse_type ?: params.default_label """ zcat $read | fastq_illumina_filter --keep N -v 2> ${name}.output | gzip -c -f > ${name}_filtered.fastq.gz + + cat <<-END_VERSIONS > versions.yml + '${analyse_type} - fastq_illumina_filter': \$( fastq_illumina_filter -h | head -1 | sed -n 's/.*version \\([0-9.]*\\).*/\\1/p' ) + END_VERSIONS """ - + // } process FASTQSCREEN { @@ -80,17 +92,23 @@ process FASTQSCREEN { output: tuple val(sample), path("*.txt"), emit: report + path("versions.yml") , emit: versions script: def args = task.ext.args ?: '' def defaultConf = "${baseDir}/assets/fastq_screen.conf_example" def inputConf = "${params.inputdir}/fastq_screen.conf" def confFile = file(inputConf).exists() ? inputConf : defaultConf + def analyse_type = task.ext.analyse_type ?: params.default_label """ fastq_screen \\ $reads \\ --conf ${confFile} \\ $args + + cat <<-END_VERSIONS > versions.yml + ${analyse_type} - fastq_screen: \$( fastq_screen --version | sed '/FastQ Screen v/!d; s/.*v//' ) + END_VERSIONS """ } @@ -102,12 +120,14 @@ process DUPLICATED_READS { output: tuple val(sample), path("*.json"), emit: json - tuple val(sample), path("*.log") + tuple val(sample), path("*.log"), emit: log + path("versions.yml") , emit: versions shell: R1_name=file(fastq[0]).simpleName R2_name=file(fastq[1]).simpleName - def args = task.ext.args ?: '' + args = task.ext.args ?: '' + analyse_type = task.ext.analyse_type ?: params.default_label ''' fastp \ -i !{fastq[0]} \ @@ -120,6 +140,10 @@ process DUPLICATED_READS { --json !{R1_name}_fastp.json \ !{args} \ 2> !{R1_name}.log + + cat <<-END_VERSIONS > versions.yml + !{analyse_type} - fastp: $(fastp --version 2>&1 | sed -e 's/fastp //g') + END_VERSIONS ''' } diff --git a/modules/local/module_diversity.nf b/modules/local/module_diversity.nf index b2a27ab98002fd1994de1517813613bf914da50a..787a49da7424128b851b41b0bf5aa6723e9dc27e 100644 --- a/modules/local/module_diversity.nf +++ b/modules/local/module_diversity.nf @@ -13,9 +13,11 @@ process JOIN_PAIR { tuple val(sample), path("*.notCombined_*.fastq.gz"), emit: notCombined tuple val(sample), path("*.log"), emit: logs tuple val(sample), path("*.hist"), emit: histogram + path("versions.yml"), emit: versions script: def args = task.ext.args ?: '' + def analyse_type = task.ext.analyse_type ?: params.default_label """ flash \\ $reads \\ @@ -26,6 +28,10 @@ process JOIN_PAIR { > ${sample}_flash.log mv ${sample}.hist ${sample}_flash.hist + +cat <<-END_VERSIONS > versions.yml +${analyse_type} - flash: \$( flash --version | sed \'/^FLASH v/!d; s/.*v//' ) +END_VERSIONS """ } @@ -39,9 +45,11 @@ process BLAST_N { output: tuple val(sample), path("*.blastn"), emit: results + path("versions.yml"), emit: versions script: def args = task.ext.args ?: '' + def analyse_type = task.ext.analyse_type ?: params.default_label """ db_dir=\$(dirname $db) [[ `find -L \$db_dir -name "*.00.idx"` ]] && isIndexed='true' || isIndexed='false' @@ -53,6 +61,10 @@ process BLAST_N { -use_index \$isIndexed \\ $args \\ -out ${sample}.blastn + + cat <<-END_VERSIONS > versions.yml + ${analyse_type} - blastn: \$(blastn -version 2>&1 | sed '/^.*blastn: /!d; s/.*: //') + END_VERSIONS """ } diff --git a/modules/local/module_dna.nf b/modules/local/module_dna.nf index 02836b5ec3916fddb2e5ba31c211a8d6dcc94c3f..66fc4e2a751cd39e5b4da3fc0dcad9b805b74619 100644 --- a/modules/local/module_dna.nf +++ b/modules/local/module_dna.nf @@ -13,12 +13,18 @@ process BWA_ALIGNMENT { output: tuple val(sample), path("*.log"), emit: log tuple val(sample), path("*.sam"), emit: sam + path("versions.yml"), versions script: def reference = params.reference_genome ?: params.reference_transcriptome def referenceName=file(reference).toString().split('/')[6] + def analyse_type = task.ext.analyse_type ?: params.default_label """ bwa mem ${reference} ${reads} -t ${task.cpus} 1> ${sample}_${referenceName}.sam 2> ${sample}_${referenceName}.log + + cat <<-END_VERSIONS > versions.yml + ${analyse_type} - bwa: \$( bwa 2>&1 | sed '/^Version/!d; s/.*: //' ) + END_VERSIONS """ } @@ -32,10 +38,16 @@ process SAMTOOLS_VIEW { output: tuple val(sample), path("*.bam"), emit: bam + path("versions.yml"), versions script: + def analyse_type = task.ext.analyse_type ?: params.default_label """ samtools view -bS ${sam} -@ ${task.cpus} > ${sample}.bam + + cat <<-END_VERSIONS > versions.yml + ${analyse_type} - samtools: \$(samtools --version 2>&1 | sed '/^.*samtools/!d; s/.* //') + END_VERSIONS """ } @@ -51,11 +63,17 @@ process SAMTOOLS_SORT { output: tuple val(sample), path("*.log"), emit: log tuple val(sample), path("*.bam"), emit: bam + path("versions.yml"), versions //path("*.bam"), emit: bam script: // Pourquoi unmerged ??? https://forgemia.inra.fr/genotoul-bioinfo/ng6/-/blob/master/workflows/components/bwa.py#L97 + def analyse_type = task.ext.analyse_type ?: params.default_label """ samtools sort ${bam} -o ${sample}_unmerged.bam 2>> ${sample}.log + + cat <<-END_VERSIONS > versions.yml + ${analyse_type} - samtools: \$(samtools --version 2>&1 | sed '/^.*samtools/!d; s/.* //') + END_VERSIONS """ } @@ -71,10 +89,16 @@ process SAMTOOLS_FLAGSTATS { output: tuple val(sample), path("*.log"), emit: log tuple val(sample), path("*.txt"), emit: txt + path("versions.yml"), versions script: + def analyse_type = task.ext.analyse_type ?: params.default_label """ samtools flagstat ${bam} > ${sample}_flagstat.txt 2>> ${sample}.log + + cat <<-END_VERSIONS > versions.yml + ${analyse_type} - samtools: \$(samtools --version 2>&1 | sed '/^.*samtools/!d; s/.* //') + END_VERSIONS """ } diff --git a/modules/local/module_rna.nf b/modules/local/module_rna.nf index bd1a28825ce05d0ecd025788291f305eb3e4261e..5201617b37b9cf2574f79ce8b66c1a553ca1f613 100644 --- a/modules/local/module_rna.nf +++ b/modules/local/module_rna.nf @@ -7,13 +7,19 @@ process SALMON_INDEX { output: path("index/"), emit: index + path("versions.yml"), emit: versions script: + def analyse_type = task.ext.analyse_type ?: params.default_label """ salmon index \ -t ${params.reference_transcriptome} \ -i ./index \ --threads ${task.cpus} + + cat <<-END_VERSIONS > versions.yml + ${analyse_type} - salmon: \$(salmon --version | sed 's/salmon //') + END_VERSIONS """ } @@ -28,12 +34,13 @@ process SALMON_QUANT { output: tuple val(sample), path("$sample/"), emit: results - path("versions.yml"), emit: version + path("versions.yml"), emit: versions script: def args = task.ext.args ?: '' def R1 = reads.find { it =~ /.*_R1_.*/} def R2 = reads.find { it =~ /.*_R2_.*/} + def analyse_type = task.ext.analyse_type ?: params.default_label """ salmon quant \\ --libType ${lib_type} \\ @@ -45,10 +52,9 @@ process SALMON_QUANT { $args \\ 2> /dev/null - cat <<-END_VERSIONS > versions.yml - "${task.process}": - salmon: \$(echo \$(salmon --version) | sed -e "s/salmon //g") - END_VERSIONS + cat <<-END_VERSIONS > versions.yml + ${analyse_type} - salmon: \$(echo \$(salmon --version) | sed -e "s/salmon //g") + END_VERSIONS """ } @@ -62,11 +68,13 @@ process STAR_INDEX { output: path("index/"), emit: index + path("versions.yml"), emit: versions script: // renamme en .fa ?? utile ?? def args = task.ext.args ?: '' def memory = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : '' + def analyse_type = task.ext.analyse_type ?: params.default_label """ NUM_BASES=`gawk '{sum = sum + \$2}END{if ((log(sum)/log(2))/2 - 1 > 14) {printf "%.0f", 14} else {printf "%.0f", (log(sum)/log(2))/2 - 1}}' ${fai}` @@ -78,7 +86,10 @@ process STAR_INDEX { --runThreadN $task.cpus \\ --genomeSAindexNbases \$NUM_BASES \\ $args - + + cat <<-END_VERSIONS > versions.yml + ${analyse_type} - star: \$(STAR --version) + END_VERSIONS """ } @@ -94,10 +105,12 @@ process STAR_ALIGN { tuple val(sample), path("${sample}_Log.final.out"), emit: results tuple val(sample), path("${sample}_Log.out"), emit: log tuple val(sample), path("${sample}_Aligned.out.sam"), emit: sam + path("versions.yml"), emit: versions script: def args = task.ext.args ?: '' def read_files_cmd = reads[0].endsWith('.gz') ? '--readFilesCommand zcat' : '' + def analyse_type = task.ext.analyse_type ?: params.default_label """ STAR \\ --outFileNamePrefix ${sample}_ \\ @@ -107,5 +120,8 @@ process STAR_ALIGN { --readFilesIn $reads \\ $read_files_cmd + cat <<-END_VERSIONS > versions.yml + ${analyse_type} - star: \$(STAR --version) + END_VERSIONS """ } \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index 8bff24659cf454295661e0f5446f4ab29552e542..1a5d1b7f23483b4278ace9f9358a76f11f0a0d08 100644 --- a/nextflow.config +++ b/nextflow.config @@ -46,7 +46,7 @@ params { min_overlap = 20 max_overlap = 55 max_mismatch_density = 0.1 - assignation_databank = '' + assignation_databank = '/save/ng6/TODO/HiSeqIndexedGenomes/new_struct/ncbi_16S/240319_release/16SMicrobial' blast_outfmt = 7 blast_max_target = 10 @@ -72,6 +72,14 @@ params { max_time = "90.d" max_cpus = "48" + // Labels to display tool versions in MultiQC report + default_label = 'Pipeline' + read_stats_label = 'ReadStats' + duplicats_label = 'Duplicats' + contamination_search_label = 'ContaminationSearch' + join_pairs_label = 'JoinPairs' + alignment_stats_label = 'AlignmentStats' + // OTHERS cluster_options = '' is_dev_mode = false diff --git a/sub-workflows/local/core_illumina.nf b/sub-workflows/local/core_illumina.nf index 61f8885492641057b3afa8866ea45d8f2645013c..bb1c476fa10f507dbaf7e2f9d276c0f6b26868ff 100644 --- a/sub-workflows/local/core_illumina.nf +++ b/sub-workflows/local/core_illumina.nf @@ -39,6 +39,8 @@ workflow CORE_ILLUMINA { readsetsFile main: + ch_versions = Channel.empty() + // ----------- DemultiplexStat PREP_DEMUXSTAT(sampleSheet) DEMUX_STATS(demuxStatXML, PREP_DEMUXSTAT.out, demuxSummary) @@ -50,16 +52,18 @@ workflow CORE_ILLUMINA { } else { // Si MiSeq ou Nova + noIndex ILLUMINA_FILTER(fastq) fastq_good = ILLUMINA_FILTER.out.reads + ch_versions = ch_versions.mix(ILLUMINA_FILTER.out.versions) } if (params.insert_to_ngl){ // Add demultiplexStat treatments - TREATMENT_DEMUX_RUN(nglBiRunCode, DEMUX_STATS.out.demultiplexStatsTSV) - TREATMENT_DEMUX_READSETS(readsetsFile, DEMUX_STATS.out.demultiplexStatsTSV) + TREATMENT_DEMUX_RUN(nglBiRunCode, DEMUX_STATS.out.demultiplexStatsTSV, params.lane) + TREATMENT_DEMUX_READSETS(readsetsFile, DEMUX_STATS.out.demultiplexStatsTSV, '') } emit: fastq = fastq_good demuxStat = DEMUX_STATS.out.demultiplexStatsTSV + versions = ch_versions } diff --git a/sub-workflows/local/core_pipeline.nf b/sub-workflows/local/core_pipeline.nf index 20d45dc38f3f04a1eb6f5833ecffc4bb2823b808..f19e856563495b9c0dc026da579ab6b193a74da3 100644 --- a/sub-workflows/local/core_pipeline.nf +++ b/sub-workflows/local/core_pipeline.nf @@ -30,11 +30,15 @@ workflow CORE { ch_read main: + ch_versions = Channel.empty() + // ----------- FASTQC FASTQC(ch_read) + ch_versions = ch_versions.mix(FASTQC.out.versions) // ----------- ContaminationSearch FASTQSCREEN(ch_read) + ch_versions = ch_versions.mix(FASTQSCREEN.out.versions) // ----------- Recherche Duplicats GUNZIP(ch_read) @@ -62,10 +66,12 @@ workflow CORE { .map { $it -> [ ($it.simpleName =~ /(.*)_R[1-2].*/)[0][1] , $it ] } .groupTuple() ) // need fastq paired !!! + ch_versions = ch_versions.mix(DUPLICATED_READS.out.versions) emit: fastqc_report = FASTQC.out.zip ?: Channel.empty() fastqscreen_report = FASTQSCREEN.out.report ?: Channel.empty() fastp_report = DUPLICATED_READS.out.json subset_fastq = unzipped_fastq + versions = ch_versions } diff --git a/sub-workflows/local/diversity_qc.nf b/sub-workflows/local/diversity_qc.nf index 06c59d0656cebdc690f4765b85889fbc78df1eea..a46177c72fd1e891205043c213ac52064c3b807e 100644 --- a/sub-workflows/local/diversity_qc.nf +++ b/sub-workflows/local/diversity_qc.nf @@ -24,8 +24,11 @@ workflow DIVERSITY_QC { fastq main: + ch_versions = Channel.empty() + // Pairs merging JOIN_PAIR(fastq) + ch_versions = ch_versions.mix(JOIN_PAIR.out.versions) // SubsetAssignation if (params.assignation_databank != '') { @@ -37,6 +40,7 @@ workflow DIVERSITY_QC { // -- Taxonomic assignation BLAST_N(FQ_TO_FA.out.fasta, params.assignation_databank) + ch_versions = ch_versions.mix(BLAST_N.out.versions) KRONA_BLAST(BLAST_N.out.results) krona_html = KRONA_BLAST.out.html @@ -49,4 +53,5 @@ workflow DIVERSITY_QC { histogram = JOIN_PAIR.out.histogram logs = JOIN_PAIR.out.logs krona = krona_html + versions = ch_versions } \ No newline at end of file diff --git a/sub-workflows/local/dna_qc.nf b/sub-workflows/local/dna_qc.nf index 7f39268a802d5d9f348c616cb8d96684ab5203b9..b7d5e6a871b500f66bcd30625ca6272bd7f2f1a4 100644 --- a/sub-workflows/local/dna_qc.nf +++ b/sub-workflows/local/dna_qc.nf @@ -25,6 +25,8 @@ workflow DNA_QC { fastq main: + ch_versions = Channel.empty() + if ( "$params.reference_genome" != '' || "$params.reference_transcriptome" != '') { BWA_ALIGNMENT(fastq) SAMTOOLS_VIEW(BWA_ALIGNMENT.out.sam) @@ -35,6 +37,12 @@ workflow DNA_QC { qualimap_report_emitted = QUALIMAP.out.report flagstats_output_emitted = SAMTOOLS_FLAGSTATS.out.txt bam_output_emitted = SAMTOOLS_SORT.out.bam + ch_versions = ch_versions.mix( + BWA_ALIGNMENT.out.versions, + SAMTOOLS_VIEW.out.versions, + SAMTOOLS_SORT.out.versions, + SAMTOOLS_FLAGSTATS.out.versions + ) } else { System.out.println "Pas de référence genomique ou transcriptomique renseignée, on ne peut pas faire d'alignement" @@ -48,4 +56,5 @@ workflow DNA_QC { qualimap_report = qualimap_report_emitted flagstats_output = flagstats_output_emitted bam = bam_output_emitted + versions = ch_versions } \ No newline at end of file diff --git a/sub-workflows/local/rna_qc.nf b/sub-workflows/local/rna_qc.nf index 61d663850537efe8a7f004fb54b1f77981de0197..bfac0d84b663ecaccc7745fba210e6c3c501b79c 100644 --- a/sub-workflows/local/rna_qc.nf +++ b/sub-workflows/local/rna_qc.nf @@ -39,9 +39,11 @@ workflow RNA_QC { sortmerna_db main: - fastq = fastq.collect{it[1]}.flatten().map { $it -> [ ($it.simpleName =~ /(.*)_R[1-2].*/)[0][1] , $it ] }.groupTuple() + ch_versions = Channel.empty() align_results = Channel.empty() + fastq = fastq.collect{it[1]}.flatten().map { $it -> [ ($it.simpleName =~ /(.*)_R[1-2].*/)[0][1] , $it ] }.groupTuple() + if ( "$params.reference_genome" != '' ) { // if indexFiles does not exist if ( ! file(file(params.reference_genome).getParent() + '/SAindex').exists() || params.make_star_index) { @@ -49,6 +51,7 @@ workflow RNA_QC { reference_genome = Channel.from(params.reference_genome) genome_index = SAMTOOLS_FAIDX(reference_genome).index star_index = STAR_INDEX(reference_genome, genome_index).index + ch_versions = ch_versions.mix(STAR_INDEX.out.versions) } else { star_index = Channel.from(file(params.reference_genome).getParent()) } @@ -58,6 +61,12 @@ workflow RNA_QC { SAMTOOLS_SORT(SAMTOOLS_VIEW.out.bam) SAMTOOLS_FLAGSTATS(SAMTOOLS_VIEW.out.bam) qualimap_report_emitted = QUALIMAP(SAMTOOLS_SORT.out.bam).report + ch_versions = ch_versions.mix( + STAR_ALIGN.out.versions, + SAMTOOLS_VIEW.out.versions, + SAMTOOLS_SORT.out.versions, + SAMTOOLS_FLAGSTATS.out.versions + ) } else if ("$params.reference_transcriptome" != '') { // 10X + transcriptome > use BWA @@ -70,12 +79,14 @@ workflow RNA_QC { ) align_results = BWA.out.flagstats_output qualimap_report_emitted = BWA.out.qualimap_report + ch_versions = ch_versions.mix(BWA.out.versions) } else { // if indexFiles does not exist if ( ! file(file(params.reference_transcriptome).getParent() + '/seq.bin').exists()) { println "SALMON index files does not exists -> Let's start transcriptome indexing..." salmon_index = SALMON_INDEX().index + ch_versions = ch_versions.mix(SALMON_INDEX.out.versions) } else { salmon_index = Channel.from(file(params.reference_transcriptome).getParent()) } @@ -98,6 +109,7 @@ workflow RNA_QC { ch_lib_type ).results qualimap_report_emitted= Channel.empty() + ch_versions = ch_versions.mix(SALMON_QUANT.out.versions) } } else { @@ -114,4 +126,5 @@ workflow RNA_QC { sortmerna_log = SORTMERNA.out.log qualimap_report = qualimap_report_emitted //flagstats_output = flagstats_output_emitted + versions = ch_versions } \ No newline at end of file diff --git a/workflow/illumina_qc.nf b/workflow/short_reads_qc.nf similarity index 94% rename from workflow/illumina_qc.nf rename to workflow/short_reads_qc.nf index d2d87cae0b5bd3ee9b3fbced6b7b3c7b0c4cbcff..f393c3db80d4b08f89643f38c60e3268bc912782 100644 --- a/workflow/illumina_qc.nf +++ b/workflow/short_reads_qc.nf @@ -6,7 +6,8 @@ nextflow.enable.dsl = 2 include { helpMessage; createSummary; sendBeginMail; - sendFinalMail + sendFinalMail; + softwareVersionsToYAML } from "$baseDir/conf/functions.config" // Show help message @@ -124,6 +125,8 @@ sendBeginMail(format.format(new Date())) // ------------------------------------------------- workflow SHORT_READS_QC { ch_mqc = Channel.empty() + ch_versions = Channel.empty() + WORKFLOW_SUMMARY() if (params.insert_to_ngl){ @@ -140,6 +143,7 @@ workflow SHORT_READS_QC { if (! params.skip_core_illumina && params.sequencer =~ "NovaSeq|MiSeq" ) { CORE_ILLUMINA(ch_ss, ch_DemuxStatXML, ch_DemuxSummary, ch_read, nglBiRunCode, readsets_created) fastq = CORE_ILLUMINA.out.fastq + ch_versions = ch_versions.mix(CORE_ILLUMINA.out.versions) } else { fastq = ch_read } @@ -148,6 +152,7 @@ workflow SHORT_READS_QC { } CORE(fastq) + ch_versions = ch_versions.mix(CORE.out.versions) if (params.data_nature =~ 'DNA|GENOMIC') { DNA_QC(CORE.out.subset_fastq @@ -160,6 +165,7 @@ workflow SHORT_READS_QC { DNA_QC.out.qualimap_report.collect{it[1]}.ifEmpty([]), DNA_QC.out.flagstats_output.collect{it[1]}.ifEmpty([]) ) + ch_versions = ch_versions.mix(DNA_QC.out.versions) // DTM process if (params.DTM_mode) { @@ -174,6 +180,7 @@ workflow SHORT_READS_QC { RNA_QC.out.sortmerna_log.collect{it[1]}.ifEmpty([]), RNA_QC.out.qualimap_report.collect{it[1]}.ifEmpty([]), ) + ch_versions = ch_versions.mix(RNA_QC.out.versions) } else if (params.data_nature =~ "16S|Amplicon|METAGENOMIC|METATRANSCRIPTOMIC") { DIVERSITY_QC(fastq @@ -187,18 +194,28 @@ workflow SHORT_READS_QC { DIVERSITY_QC.out.histogram.collect{it[1]}.ifEmpty([]), DIVERSITY_QC.out.logs.collect{it[1]}.ifEmpty([]) ) + ch_versions = ch_versions.mix(DIVERSITY_QC.out.versions) } else { System.out.println "Le QC des données ${params.data_nature} n'a pas de sub-workflow spécifique pour le moment." ch_mqc = ch_mqc.mix( Channel.empty() ) } + + version_yaml = softwareVersionsToYAML(ch_versions) + .collectFile( + storeDir: "${params.outdir}/pipeline_info", + name: 'software_mqc_versions.yml', + sort: true, + newLine: true + ) MULTIQC(WORKFLOW_SUMMARY.out.ifEmpty([]) .mix( CORE.out.fastqc_report.collect{it[1]}.ifEmpty([]), CORE.out.fastqscreen_report.collect{it[1]}.ifEmpty([]), CORE.out.fastp_report.collect{it[1]}.ifEmpty([]), - ch_mqc.collect().ifEmpty([]) + ch_mqc.collect().ifEmpty([]), + version_yaml ).collect() )