From e8027638c051b2d3a47badb9e5dbd1a2d96eff4e Mon Sep 17 00:00:00 2001 From: jsabban <jules.sabban@inrae.fr> Date: Tue, 12 Sep 2023 11:28:15 +0200 Subject: [PATCH] New step to merge lane fastq files Ref: #79 --- conf/base.config | 6 ++++++ docs/usage.md | 4 ++++ modules/local/module_core.nf | 26 ++++++++++++++++++++++++++ nextflow.config | 1 + sub-workflows/local/core_pipeline.nf | 21 +++++++++++++++++++-- 5 files changed, 56 insertions(+), 2 deletions(-) diff --git a/conf/base.config b/conf/base.config index 22bb404..e918941 100644 --- a/conf/base.config +++ b/conf/base.config @@ -55,6 +55,12 @@ process { module = toolsModuleHash['R'] } + withName: MERGE_LANES { + cpus = 1 + memory = { 1.GB * task.attempt } + time = { 2.h * task.attempt } + } + withName: ILLUMINA_FILTER { publishDir = [ path: "${params.outdir}/IlluminaFilter", diff --git a/docs/usage.md b/docs/usage.md index 9908d51..10c2427 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -120,6 +120,10 @@ _Default_ : null The nG6 like description of the analysis. _Default_ : null +- **`--merge_lanes`** [bool] +Merge fastq over the two lanes in CORE pipeline. +_Default_ : false + ### Skipping parameters There are some availlable flags can be set to not run some parts of the pipeline. - **`--no_subset`** [bool] diff --git a/modules/local/module_core.nf b/modules/local/module_core.nf index cf7e9f6..35bd868 100644 --- a/modules/local/module_core.nf +++ b/modules/local/module_core.nf @@ -117,6 +117,32 @@ process DUPLICATED_READS { ''' } +process MERGE_LANES { + tag "$sample" + + input: + tuple val(sample), path(reads) + + output: + tuple val(sample), path("*.fastq.gz"), emit: fastq + + script: + def args = task.ext.args ?: '' + """ + #!/bin/bash + + R1_files=\$(ls *R1*) + R2_files=\$(ls *R2*) + + for file in \$R1_files; do + zcat \$file >> ${sample}_R1_001.fastq + done + + for file in \$R2_files; do + zcat \$file >> ${sample}_R2_001.fastq + done + """ +} /* -------------------------------------------------------------------- * OLD PROCESS diff --git a/nextflow.config b/nextflow.config index d621dab..242b327 100644 --- a/nextflow.config +++ b/nextflow.config @@ -12,6 +12,7 @@ params { fc_id = "" fc_type = "" lane = "" + merge_lanes = false data_nature = "" species = "" diff --git a/sub-workflows/local/core_pipeline.nf b/sub-workflows/local/core_pipeline.nf index 272b1f2..d837b87 100644 --- a/sub-workflows/local/core_pipeline.nf +++ b/sub-workflows/local/core_pipeline.nf @@ -15,8 +15,9 @@ include { FASTQC; FASTQSCREEN; DUPLICATED_READS; + MERGE_LANES; } from "$baseDir/modules/local/module_core.nf" -include { GUNZIP } from "${params.shared_modules}/gzip.nf" +include { GUNZIP; GZIP } from "${params.shared_modules}/gzip.nf" include { SEQTK_SAMPLE } from "${params.shared_modules}/seqtk.nf" include { md5sum as MD5SUM } from "${params.shared_modules}/md5sum.nf" //------------------------------------------------- @@ -28,9 +29,25 @@ isResume=workflow.resume //------------------------------------------------- workflow CORE { take: - ch_read + ch_fastq main: + // ----------- Lane merging fastq + if (params.merge_lanes) { + MERGE_LANES(ch_fastq + .collect{it[1]} + .flatten() + .map { $it -> [ ($it.simpleName =~ /(.*)_S\d+_.*/)[0][1] , $it ] } + .groupTuple() + ) + + GZIP(MERGE_LANES.out.fastq) + + ch_read = GZIP.out + } else { + ch_read = ch_fastq + } + // ----------- md5sum MD5SUM(ch_read.collect{it[1]}.flatten().collect(), params.run_name+'_fastq') -- GitLab