diff --git a/assets/multiqc_config.yaml b/assets/multiqc_config.yaml index 47abb58bbbe6d5a9b89d43f00d6bc833eb32c1d0..593ca971441b8ba1e1e076dde1378c14d9f3d21b 100644 --- a/assets/multiqc_config.yaml +++ b/assets/multiqc_config.yaml @@ -35,6 +35,7 @@ table_columns_visible: ## Sample name formatting extra_fn_clean_exts: + - "_qualimap_results" - "_filtered" - "_unmerged" - "_flagstat" diff --git a/bin/createNGLBiReadSets.pl b/bin/createNGLBiReadSets.pl deleted file mode 100755 index e5cdf2e378a6637bf0c5ef4fd97470eeefe2fcca..0000000000000000000000000000000000000000 --- a/bin/createNGLBiReadSets.pl +++ /dev/null @@ -1,127 +0,0 @@ -#!/usr/bin/perl -w -binmode STDIN, ':encoding(UTF-8)'; -binmode STDOUT, ':encoding(UTF-8)'; -binmode STDERR, ':encoding(UTF-8)'; - -=head1 NAME - - createNGLBiReadSets.pl - -=head1 DESCRIPTION - - Performe readSets creation on NGL-Bi - -=head1 SYNOPSIS - - createNGLBiReadSets.pl --infoFile <path> --env_ngl_bi <ENV> - -=head1 OPTIONS - - --infoFile=s : path to the info file - --env_ngl_bi=s : environment varible of ngl-bi - -=head1 EXEMPLES - - perl createNGLBiReadSets.pl --infoFile <path> --env_ngl_bi <ENV> - -=head1 AUTHOR - - Jules Sabban pour Plateforme genomique Toulouse (get-plage.bioinfo@genotoul.fr) - -=cut - -################################################################### -# -# LIBRAIRIES -# -################################################################### -use strict; -use Getopt::Long; -use Log::Log4perl qw(:easy);; - -################################################################## -# -# INITIALISATION -# -################################################################## -Log::Log4perl -> easy_init( { level => $TRACE, - utf8 => 1, - layout => '[%d][%p>createNGLBiReadSets.pl:L%L] %m%n' } ); - -my $logger = Log::Log4perl -> get_logger(); - -my $infoFile=""; -my $env_ngl_bi = ""; - -GetOptions ('infoFile=s' => \$infoFile, - "env_ngl_bi=s" => \$env_ngl_bi, # environnement path of NGL-Bi -); - -if ($env_ngl_bi eq "" || $infoFile eq "" ) { - $logger -> logdie("USAGE : createNGLBiReadSets.pl --infoFile <File> --env_ngl_bi <ENV>\n"); -} - -my $experimentName=""; -my $runName=""; -my $laneNumber=""; -my $script_path="/save/sbsuser/scripts-ngs/NGL-Bi_client_Current/GeT/perl"; # Répertoire des scripts de l'API NGL - -################################################################## -# -# NGL-Bi ENVIRONMENT -# -################################################################## - -$ENV{APIPERL}=$env_ngl_bi; -$ENV{CONFFILE}=$env_ngl_bi."conf/prod_illumina_qc.conf"; -$logger = Log::Log4perl -> get_logger('loadConfFile'); -unless ($ENV{CONFFILE}) { - $logger -> logdie("$0 : Database configuration file not defined ! Initialize 'CONFFILE' with configuration file path in your environment"); -} -my $dbconf_file = $ENV{CONFFILE}; -unless (-f $dbconf_file) { - $logger -> logdie("$0 : Database configuration file does not exist : $dbconf_file. It's necessary for continue."); -} -open my $handle, '<', $dbconf_file; -chomp ( my @lines = <$handle> ); -close $handle; -foreach my $line (@lines) { - $line =~ s/#.*//o; - unless ($line) {next;} - if ($line =~ /(.*)=(.*)/o) { - my $key = $1; - my $value = $2; - $key =~ s/^\s*//o; - $key =~ s/\s*$//o; - $value =~ s/^\s*//o; - $value =~ s/^\s*//o; - $ENV{$key} = $value; - } else { - $logger -> logdie("$0 : Can't load variable to dababase configration file $dbconf_file in line : '$_'"); - } -} - -unshift @INC, $env_ngl_bi."Common_tools/src/perl/lib"; -unshift @INC, $env_ngl_bi."DB_tools/src/perl/lib"; - -require illumina; -require json; -$logger -> info("\tVariables d'environnement pour NGL-Bi charées."); - -################################################################## -# -# INFO FILE READING -# -################################################################## -$experimentName=`grep "ExperimentName" $infoFile | cut -d';' -f2` or $logger -> logdie("[Erreur] grep ExperimentName impossible : $!"); -$runName=`grep "NGLBiRunName" $infoFile | cut -d';' -f2` or $logger -> logdie("[Erreur] grep NGLBiRunName impossible : $!"); -$laneNumber=`grep "LaneNumber" $infoFile | cut -d';' -f2` or $logger -> logdie("[Erreur] grep LaneNumber impossible : $!"); - -chomp($experimentName); -chomp($runName); -chomp($laneNumber); - - -my $commandNGLBiReadSets = "perl $script_path/createNGL-BiReadSets.pl --NGLBiRunCode $runName --NGLSqExperimentCode $experimentName --laneNumberToWorkOn $laneNumber"; -$logger -> info("\tCreation des readSets dans NGL-Bi : ".$commandNGLBiReadSets); -my $result_commandNGLBiReadSets = `$commandNGLBiReadSets 2>&1`; $? and $logger -> logdie("[Erreur]Lancement de createNGL-BiReadSets.pl\n".$result_commandNGLBiReadSets); \ No newline at end of file diff --git a/bin/demuxStatsFromXML.R b/bin/demuxStatsFromXML.R index 78a40fced5c49221146ed7e903f187a8e159895e..247e49873ef46f4db8e17cf8b6f5dc7c1f80f509 100755 --- a/bin/demuxStatsFromXML.R +++ b/bin/demuxStatsFromXML.R @@ -110,7 +110,7 @@ cat("Rassemblement des statistiques par échantillons.\n") for (line in 1:dim(indexNumber)[1]){ mySample<-indexNumber[line, "Sample"] mySampleNumber<-indexNumber[line, "NumberOfIndex"] - cat("\nEtude de l'échantillon : " , mySample, "\n") + cat("\nEtude de l'échantillon : " , mySample, "(" , mySampleNumber, "index )\n") # Single Index Case if (mySampleNumber == 1) { df.singleLine<-df[which(df$Sample == mySample),] @@ -126,9 +126,10 @@ for (line in 1:dim(indexNumber)[1]){ #print(sub.df) if (nrow(sub.df) == 0) { cat("Aucun échantillon trouvé !\n") - cat("La recherche de l'échantillon",mySample, "dans le data.table suivant à échouée :\n") + cat("La recherche de l'échantillon", paste0(mySample, sampleName.suffixe), "dans le data.table suivant à échouée :\n") print(df) } else { + countBarcodesDone = 1 # Parcours du sous-data.frame for (l in 1:dim(sub.df)[1]) { sub.df.project<-sub.df[l, "Project"] @@ -138,17 +139,20 @@ for (line in 1:dim(indexNumber)[1]){ sub.df.oneMismatch<-as.numeric(sub.df[l, "bcOneMismatch"]) # bcOneMismatch # Première iteration - if (l == 1 ) { - sub.df.project.toAdd<-sub.df.project - sub.df.barcode.toAdd<-sub.df.barcode - sub.df.bcCount.toAdd<-sub.df.bcCount - sub.df.bcPerfect.toAdd<-sub.df.bcPerfect - sub.df.oneMismatch.toAdd<-sub.df.oneMismatch - } else { - sub.df.barcode.toAdd<-paste0(sub.df.barcode.toAdd, "+", sub.df.barcode) - sub.df.bcCount.toAdd<-sub.df.bcCount.toAdd+sub.df.bcCount - sub.df.bcPerfect.toAdd<-sub.df.bcPerfect.toAdd+sub.df.bcPerfect - sub.df.oneMismatch.toAdd<-sub.df.oneMismatch.toAdd+sub.df.oneMismatch + countBarcodesDone = countBarcodesDone + str_count(sub.df.barcode, "\\+") + if (countBarcodesDone <= mySampleNumber) { + if (l == 1 ) { + sub.df.project.toAdd<-sub.df.project + sub.df.barcode.toAdd<-sub.df.barcode + sub.df.bcCount.toAdd<-sub.df.bcCount + sub.df.bcPerfect.toAdd<-sub.df.bcPerfect + sub.df.oneMismatch.toAdd<-sub.df.oneMismatch + } else { + sub.df.barcode.toAdd<-paste0(sub.df.barcode.toAdd, "+", sub.df.barcode) + sub.df.bcCount.toAdd<-sub.df.bcCount.toAdd+sub.df.bcCount + sub.df.bcPerfect.toAdd<-sub.df.bcPerfect.toAdd+sub.df.bcPerfect + sub.df.oneMismatch.toAdd<-sub.df.oneMismatch.toAdd+sub.df.oneMismatch + } } } # Add to data.frame @@ -180,14 +184,14 @@ if(nrow(tabUndetermined) > 0) { head(tabUndetermined) } # Construction du dataFrame pour intégration à df2 -df2.Projects<-unique(df2$Project) -myProject<-df2.Projects[which(df2.Projects != "default")] +#df2.Projects<-unique(df2$Project) +#myProject<-df2.Projects[which(df2.Projects != "default")] ### Pour chaque ligne de tabUndertermined, on ajoute une ligne à df2 : if (dim(tabUndetermined)[1] != 0) { df.tabUndetermined<-data.frame() for (i in 1:dim(tabUndetermined)[1]) { - df.tabUndetermined.tmp<-data.frame(myProject, "Undetermined", tabUndetermined[i, "Index"], tabUndetermined[i, "Count"], "-", "-") + df.tabUndetermined.tmp<-data.frame("default", "Undetermined", tabUndetermined[i, "Index"], tabUndetermined[i, "Count"], "-", "-") df.tabUndetermined<-concat_df(df.tabUndetermined, df.tabUndetermined.tmp, vec.names) } @@ -198,11 +202,11 @@ if (dim(tabUndetermined)[1] != 0) { } ## Soustraction des undertermined aux allOthers -# recuperer les Count de tabUndetermined et soustraire la somme à df2[which(df2$Project == "default"), "bcCount"] +# recuperer les Count de tabUndetermined et soustraire la somme à df2[which(df2$Barcode == "unknown"), "bcCount"] cat("\nQuelques calculs sur les données avant de les exporter.\n") cat("\tActualisation du nombre d'index 'AllOthers'.\n") undertermined.count<-sum(as.numeric(tabUndetermined[,"Count"])) -df2[which(df2$Project == "default"), "bcCount"]<-as.numeric(df2[which(df2$Project == "default"), "bcCount"])-undertermined.count +df2[which(df2$Barcode == "unknown"), "bcCount"]<-as.numeric(df2[which(df2$Barcode == "unknown"), "bcCount"])-undertermined.count # Calcul pourcentages de chaque barcode cat("\tCalcul du pourcentage sur le nombre de fragments total.\n") @@ -216,9 +220,9 @@ df2<-cbind(df2, percentOfFragment) # Export du data.frame cat("\nSauvegarde du data.frame.\n") -myProject<-"DEBUG" +#myProject<-"DEBUG" # mettre des 0 à la place des NA dans df2 -write.table(df2, row.names = FALSE, quote = F, sep = "\t", file = paste0("DemultiplexStats_", myProject, ".csv")) +write.table(df2, row.names = FALSE, quote = F, sep = "\t", file = paste0("DemultiplexStats.tsv")) # Ecrire un fichier par valeur de myProject ! Cas ou il y a plusieurs projets sur la même lane. -cat(paste0("\tLe fichier suivant à été créé :\t", launchDir, "/DemultiplexStats_", myProject, ".csv\n")) +cat(paste0("\tLe fichier suivant à été créé :\t", launchDir, "/DemultiplexStats.tsv\n")) cat("\nFin normale du script, on sort.\n") diff --git a/bin/extractInfoForDemuxStats.pl b/bin/extractInfoForDemuxStats.pl index eddd76002844721da55861e7dce013f0fafdc69c..f3a51a07909d7a04cf717b3e9e438930ad44eb9c 100755 --- a/bin/extractInfoForDemuxStats.pl +++ b/bin/extractInfoForDemuxStats.pl @@ -96,11 +96,6 @@ foreach my $line (@lines) { $machineName = $machineName =~ /^NOVASEQ/ ? 'NOVASEQ' : $machineName; } - # Recherche du nom du projet - if ($line =~ /^Infos/) { - $projectName = $cur_line[1]; - } - # Recherche des positions des Sample_ID et des Index_ID elsif ($line =~ m/${regexForDataHeader{$machineName}}/) { while ( my ( $indice, $valeur ) = each @cur_line ) { @@ -109,13 +104,19 @@ foreach my $line (@lines) { } } - # Association Sample_ID avec sont nombre d'index + # Association Sample_ID avec son nombre d'index elsif ($line =~ m/${regexForSampleLine{$machineName}}/) { my $sample_ID = $cur_line[$sample_ID_position]; my $index_number=0; my @cur_index_ID = (); foreach my $pos (@index_ID_position) { - if ($cur_line[$pos] =~ /\w{2}-\w{2}-\w{2}/) { $index_number = 4; } else { $index_number += 1; } + if ($cur_line[$pos] =~ /^SI-T|NT-\w{2}$/) { + $index_number = 2; + } elsif ($cur_line[$pos] =~ /^\w{2}-\w{2}-\w{2}$/) { + $index_number = 4; + } else { + $index_number += 1; + } } $sample_info{$sample_ID} = $index_number; } @@ -128,8 +129,7 @@ foreach my $k (keys(%sample_info)) { $content.="$k\t$sample_info{$k}\n"; } -$projectName = $projectName eq "" ? 'noName' : $projectName; -my $file2write = "$projectName.indexNumber"; +my $file2write = "indexNumber.tsv"; open(my $fh, '>', $file2write) or exit 1; print $fh $content; diff --git a/bin/parse_reports.sh b/bin/parse_reports.sh new file mode 100755 index 0000000000000000000000000000000000000000..a7d46ace6a8a5e5eb7cd5727024670460632ed50 --- /dev/null +++ b/bin/parse_reports.sh @@ -0,0 +1,28 @@ +TAG=$1 +FASTP_REPORT=$2 +QUALIMAP_REPORT=$3/genome_results.txt + +O_STAT="./${TAG}.stat" +O_CSV="./${TAG}.csv" + +## Get values +DUPLI=$(jq '.duplication.rate' $FASTP_REPORT) +TOT_SEQ=$(( $(sed -n 's/number of reads = \(.*\)/\1/p' $QUALIMAP_REPORT | sed 's/ //g' | sed 's/,//g') / 2 )) +INSERT=$(sed -n 's/median insert size = \(.*\)/\1/p' $QUALIMAP_REPORT | sed 's/ //g') +GC_PERCENT=$(sed -n 's/GC percentage = \(.*%\)/\1/p' $QUALIMAP_REPORT | sed 's/ //g') +GEN_COV=$(grep ">= 1X" $QUALIMAP_REPORT | sed -n 's/There is a \(.*%\) of.*/\1/p' | sed 's/ //g') +MEAN_COV=$(sed -n 's/mean coverageData.*= \(.*X\)/\1/p' $QUALIMAP_REPORT | sed 's/ //g') +ALIGN=$(sed -n 's/number of mapped reads =.*(\(.*%\))/\1/p' $QUALIMAP_REPORT | sed 's/ //g') + +## Write stat file +echo "duplication_rate: $DUPLI" >> $O_STAT +echo "total_sequences: $TOT_SEQ" >> $O_STAT +echo "mean_insert_size: $INSERT" >> $O_STAT +echo "GC_percent: $GC_PERCENT" >> $O_STAT +echo "genome_cov_percent: $GEN_COVcat " >> $O_STAT +echo "mean_cov: $MEAN_COV" >> $O_STAT +echo "align_percent: $ALIGN" >> $O_STAT + +## Write export file +echo "Sample;Tot_seq;Duplication_rate;Mean_insert_size;%GC;%Genome_cov;Mean_cov;%Align" > $O_CSV +echo "$TAG;$TOT_SEQ;$DUPLI;$INSERT;$GC_PERCENT;$GEN_COV;$MEAN_COV;$ALIGN" >> $O_CSV \ No newline at end of file diff --git a/conf/base.config b/conf/base.config index b99f889c90e3fab39851521f887ee5b6a8196c0f..43666af5cd52d46af158ab99024599ab1f6ac1d7 100644 --- a/conf/base.config +++ b/conf/base.config @@ -1,62 +1,8 @@ // ======================================== -// PARAMS +// BASE CONFIGURATION //========================================= -System.out.println "Chargement des paramètres de base" -// Fixed params +// Print of analysis parameters params { - // EMPTY INITIALISATION OF INPUT PARAMS - // General params - outdir = "./" // base output directory for all analysis - inputdir = "" - project = "" - sequencer = "" - machine_id = "" - fc_id = "" - fc_type = "" - lane = "" - demux_uniqueness = "" - - data_nature = "" - species = "" - is_multiplex = false - - run_name = "" - run_date = "" - description = "" - split_reads = false - - // DNA / RNA params - reference_genome = "" - make_star_index = false - reference_transcriptome = "" - - // Amplicon / 16S params - min_overlap = "" - max_overlap = "" - - // 10X params - - - // MethylSeq params - puc19 = "" - lambda = "" - - // NGL - insert_to_ngl = true - bi_run_code = '' - sq_xp_code = '' -} - -params.samplesheet = params.inputdir.toString() + "/SampleSheet.csv" -params.data_location = params.inputdir.toString() + "/" + params.project.toString() - -// Dynamic params -import java.text.SimpleDateFormat -SimpleDateFormat uniqueness_format = new SimpleDateFormat("yyyyMMddHHmmss") -params { - nf_uniqueness = uniqueness_format.format(new Date()) - outdir= params.inputdir + "/nextflow/" + project + "_" + run_name + "_" + nf_uniqueness - System.out.println "" System.out.println "run_name : "+run_name System.out.println "data : "+data_nature @@ -70,32 +16,6 @@ params { System.out.println "" } -// Dynamic params depending on samples number -import java.nio.file.Files -import java.nio.file.Paths -def n_read_files = Files.walk(Paths.get(params.data_location)) - .filter(Files::isRegularFile) - .filter(p -> p.getFileName().toString().matches(".*_R[12](_.*)?\\.fastq\\.gz")) - .count() - -params.n_samples = n_read_files / 2 -params.resource_factor = 0.1 * params.n_samples - -params { - bytes_subset_seq = miseq_subset_byte - subset_seq = miseq_subset_seq - if ( sequencer =~ /NovaSeq.*/ ) { - if ( n_samples >= large_sampling_threshold ) { - nova_subset_byte = large_indexing_nova_subset_byte - nova_subset_seq = large_indexing_nova_subset_seq - } - bytes_subset_seq = nova_subset_byte - subset_seq = nova_subset_seq - } - System.out.println "Seuil de taille de fichier pour subset : " + bytes_subset_seq + " bytes." - System.out.println "Nombre de reads pour subset : " + subset_seq + "." -} - // ======================================== // PROCESS //========================================= @@ -112,8 +32,30 @@ process { maxRetries = 2 maxErrors = '-1' - // ----- WithName + // ----- DTM + withName: PARSE_REPORTS { + executor = 'local' + memory = { 500.MB * task.attempt } + time = { 5.m * task.attempt } + + publishDir = [ + path: "${params.outdir}/DTM", + mode: 'copy' + ] + } + // ----- CORE ----- // + withLabel: demux { + publishDir = [ + path: "${params.outdir}/Demux", + mode: 'copy' + ] + } + + withName: DEMUX_STATS { + module = toolsModuleHash['R'] + } + withName: ILLUMINA_FILTER { publishDir = [ path: "${params.outdir}/IlluminaFilter", @@ -121,8 +63,8 @@ process { pattern: '*.gz'/*, saveAs: { filename -> "${name}.fastq.gz" }*/ ] - - module = ['bioinfo/fastq_illumina_filter-0.1'] + + module = toolsModuleHash['ILLUMINA_FILTER'] cpus = { 3 * task.attempt } time = { 4.h * task.attempt } } @@ -131,9 +73,12 @@ process { publishDir = [ path: "${params.outdir}/Duplicats", mode: 'copy', - pattern: "*.log" + pattern: "*.{log,json}" ] - module = ['bioinfo/fastp-0.23.2'] + + ext.args = "--reads_to_process ${params.fastp_n_reads}" + + module = toolsModuleHash['FASTP'] time = { 5.h * task.attempt } memory = { 3.GB * task.attempt } cpus = { 3 * task.attempt } @@ -153,27 +98,46 @@ process { saveAs: { filename -> "${name}.html" } ] + module = toolsModuleHash['FASTQC'] maxRetries = 4 - module = ['bioinfo/FastQC_v0.11.7'] - time = { 2.h * task.attempt * params.resource_factor } + time = { 5.h * task.attempt * params.resource_factor } } withName: FASTQSCREEN { - ext.args = [ - "--conf ${params.inputdir}/fastq_screen.conf" - ].join(' ') + time = { 1.h * task.attempt } + module = toolsModuleHash['FASTQSCREEN'] + + ext.args = "--conf ${params.inputdir}/fastq_screen.conf" + + publishDir = [ + path: "${params.outdir}/ContaminationSearch/FastQ-Screen", + mode: 'copy' + ] + } + + // ----- DNA ----- // + withLabel: bwa { + module = toolsModuleHash['BWA'] + cpus = { 6 * task.attempt } + memory = { 8.GB * task.attempt } + time = { 3.d * task.attempt } + + publishDir = [ + path: "${params.outdir}/alignment/bwa", + mode: 'copy' + ] } // ----- RNA ----- // withName: SALMON_INDEX { - module = ['bioinfo/salmon-1.9.0'] + module = toolsModuleHash['SALMON'] time = { 1.h * task.attempt } memory = { 3.GB * task.attempt } cpus = 8 } withName: SALMON_QUANT { - module = ['bioinfo/salmon-1.9.0'] + module = toolsModuleHash['SALMON'] time = { 1.h * task.attempt } memory = { 3.GB * task.attempt } cpus = 8 @@ -185,12 +149,14 @@ process { ] } - withName: STAR_INDEX { + withName: STAR_INDEX { + module = toolsModuleHash['STAR'] memory = { 50.GB * task.attempt } cpus = 8 } withName: STAR_ALIGN { + module = toolsModuleHash['STAR'] memory = { 20.GB * task.attempt } cpus = 2 } @@ -199,21 +165,37 @@ process { withLabel: littleJob { executor = 'local' } - - withLabel: cigar { - module = ['system/Python-3.7.4:bioinfo/samtools-1.14'] + + withLabel: ngl { + beforeScript = "source ${params.ngl_bi_client}/GeT/bash/loadConfFile.sh ${params.ngl_bi_client}/IG/SystemeInteractionNGL-Bi/conf/prod_illumina_qc.conf" + publishDir = [ + path: { "${params.outdir}/ngl" }, + mode: 'copy', + pattern: "*.{log,created}" + ] } - // ----- DNA ----- // - withLabel: bwa { - module = ['/tools/share/Modules/bioinfo/bwa-0.7.17'] - beforeScript = "module list" + withLabel: samtools { + module = toolsModuleHash['SAMTOOLS'] + cpus = { 6 * task.attempt } + memory = { 8.GB * task.attempt } + time = { 3.h * task.attempt } } - // ----- RNA ----- // - withLabel: star { - module = ['bioinfo/STAR-2.7.10a_alpha_220314'] + withLabel: alignment { + publishDir = [ + path: "${params.outdir}/alignment/samtools", + mode: 'copy' + ] } + + withLabel: alignmentStats { + publishDir = [ + path: "${params.outdir}/alignmentStats/samtools", + mode: 'copy' + ] + } + } // ======================================== @@ -221,8 +203,8 @@ process { //========================================= process { withName: SAMTOOLS_FAIDX { + module = toolsModuleHash['SAMTOOLS'] beforeScript = "module purge" - module = ['bioinfo/samtools-1.16.1'] } withName: GZIP { @@ -247,7 +229,8 @@ process { ext.args = '-s100' ext.args2 = params.subset_seq - module = 'bioinfo/seqtk-1.3' + memory = { 5.GB * task.attempt } + module = toolsModuleHash['SEQTK_SAMPLE'] publishDir = [ path: { "${params.outdir}/subset" }, @@ -264,7 +247,7 @@ process { ].join(' ') beforeScript = "module purge" - module = 'bioinfo/MultiQC-1.14' + module = toolsModuleHash['MULTIQC'] memory = { 10.GB * task.attempt * params.resource_factor } publishDir = [ @@ -276,7 +259,7 @@ process { } withName: SORTMERNA { - module = 'bioinfo/sortmerna-4.3.2' + module = toolsModuleHash['SORTMERNA'] memory = { 2.GB * task.attempt } time = { 10.h * task.attempt } cpus = { 1 * task.attempt } @@ -289,10 +272,30 @@ process { } withName: MD5SUM { + time = { 3.h * task.attempt * params.resource_factor } publishDir = [ path: { "${params.outdir}/fastq" }, mode: 'copy', pattern: "*.md5sum" ] } + + withName: QUALIMAP { + module = toolsModuleHash['QUALIMAP'] + cpus = { 8 * task.attempt } + memory = { 2.GB * task.attempt } + time = { 3.h * task.attempt } + + publishDir = [ + path: "${params.outdir}/alignmentStats/qualimap", + mode: 'copy', + pattern: "*/*.html" + ] + + publishDir = [ + path: "${params.outdir}/alignmentStats/qualimap", + mode: 'copy', + pattern: "*/*.txt" + ] + } } \ No newline at end of file diff --git a/conf/dependencies_genobioinfo.config b/conf/dependencies_genobioinfo.config new file mode 100644 index 0000000000000000000000000000000000000000..5276bced4fa66188eced6aa9f481da55b564ca64 --- /dev/null +++ b/conf/dependencies_genobioinfo.config @@ -0,0 +1,25 @@ +// ======================================== +// GENOBIOINFO MODULES +//========================================= +// ----- CORE ----- // +toolsModuleHash['ILLUMINA_FILTER'] = ['bioinfo/fastq_illumina_filter/0.1'] +toolsModuleHash['FASTP'] = ['bioinfo/fastp/0.23.2'] +toolsModuleHash['FASTQC'] = ['bioinfo/FastQC/0.12.1'] // version upgraded face to genologin +toolsModuleHash['FASTQSCREEN'] = ['bioinfo/FastQScreen/0.15.3'] +toolsModuleHash['R'] = ['statistics/R/4.3.0'] + +// ----- RNA ----- // +toolsModuleHash['SALMON'] = ['bioinfo/Salmon/1.10.0'] // version upgraded face to genologin +toolsModuleHash['STAR'] = ['bioinfo/STAR/2.7.5a'] // version upgraded face to genologin + +// ----- DNA ----- // +toolsModuleHash['BWA'] = ['bioinfo/bwa/0.7.17'] +toolsModuleHash['SAMTOOLS'] = ['bioinfo/samtools/1.18'] // version upgraded face to genologin + +// ======================================== +// SHARED MODULES +//========================================= +toolsModuleHash['SEQTK_SAMPLE'] = ['bioinfo/Seqtk/1.3'] +toolsModuleHash['MULTIQC'] = ['bioinfo/MultiQC/1.14'] +toolsModuleHash['SORTMERNA'] = ['bioinfo/SortMeRNA/4.3.6'] // version upgraded face to genologin +toolsModuleHash['QUALIMAP'] = ['bioinfo/Qualimap/31-08-20'] diff --git a/conf/dependencies_genologin.config b/conf/dependencies_genologin.config new file mode 100644 index 0000000000000000000000000000000000000000..7c9fa92461af40f0436c0cf7d7cd4c6d6208a63b --- /dev/null +++ b/conf/dependencies_genologin.config @@ -0,0 +1,25 @@ +// ======================================== +// GENOLOGIN MODULES +//========================================= +// ----- CORE ----- // +toolsModuleHash['ILLUMINA_FILTER'] = ['bioinfo/fastq_illumina_filter-0.1'] +toolsModuleHash['FASTP'] = ['bioinfo/fastp-0.23.2'] +toolsModuleHash['FASTQC'] = ['bioinfo/FastQC_v0.11.7'] +toolsModuleHash['FASTQSCREEN'] = ['bioinfo/FastQ-Screen-0.15.2'] +toolsModuleHash['R'] = ['system/R-4.0.4_gcc-9.3.0'] + +// ----- RNA ----- // +toolsModuleHash['SALMON'] = ['bioinfo/salmon-1.9.0'] +toolsModuleHash['STAR'] = ['bioinfo/STAR-2.7.10a_alpha_220314'] + +// ----- DNA ----- // +toolsModuleHash['BWA'] = ['/tools/share/Modules/bioinfo/bwa-0.7.17'] +toolsModuleHash['SAMTOOLS'] = ['bioinfo/samtools-1.16.1'] + +// ======================================== +// SHARED MODULES +//========================================= +toolsModuleHash['SEQTK_SAMPLE'] = ['bioinfo/seqtk-1.3'] +toolsModuleHash['MULTIQC'] = ['bioinfo/MultiQC-1.14'] +toolsModuleHash['SORTMERNA'] = ['bioinfo/sortmerna-4.3.2'] +toolsModuleHash['QUALIMAP'] = ['bioinfo/qualimap-31-08-20'] diff --git a/conf/functions.config b/conf/functions.config index f16fa59e2ac46f7247bc9df28cacba8aef259b06..2099a7ac3da22b1b3cae6bd333e8a73a5a0fa1f9 100644 --- a/conf/functions.config +++ b/conf/functions.config @@ -67,7 +67,7 @@ def customMailSend(body, subject, email_address) { if (email_address == null) { email_address = params.email_bioinfo } - if (workflow.profile == 'dev') { + if (params.is_dev_mode) { email_address = params.email_dev try { def sending = ['echo', '-e' , body ].execute() | [ 'mail', '-s', subject, email_address ].execute() @@ -177,7 +177,7 @@ def sendFinalMail(formatted_date, summary) { if (!params.email && params.email_on_fail && !workflow.success) { email_address = params.email_on_fail } - if (workflow.profile == 'dev') { + if (params.is_dev_mode) { email_address = params.email_dev } // Render the TXT template diff --git a/conf/genomes.config b/conf/genomes.config deleted file mode 100644 index b8ef76186a407aaa8519d819daebeb509aa3ce5b..0000000000000000000000000000000000000000 --- a/conf/genomes.config +++ /dev/null @@ -1,29 +0,0 @@ -/* - * ------------------------------------------------- - * Nextflow config file for Genomes paths and indexes - * ------------------------------------------------- - * Defines reference genomes, using Genome paths - * Can be used by any config that customises the base - */ - -params { - genomes { - 'GRCh37' { - bed12 = "${params.genomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.bed" - fasta = "${params.genomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.genomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.gtf" - star = "${params.genomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/STARIndex/" - bowtie2 = "${params.genomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/Bowtie2Index/" - bwa = "${params.genomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/" - } - 'GRCm38' { - bed12 = "${params.genomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.bed" - fasta = "${params.genomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" - gtf = "${params.genomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.gtf" - star = "${params.genomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/STARIndex/" - bowtie2 = "${params.genomes_base}/Mus_musculus/Ensembl/GRCh37/Sequence/Bowtie2Index/" - bwa = "${params.genomes_base}/Mus_musculus/Ensembl/GRCh37/Sequence/BWAIndex/" - } - - } -} diff --git a/conf/prod.config b/conf/prod.config deleted file mode 100644 index 3dee02a8bfb1e9d73ae2b277f0f9eded2cbff689..0000000000000000000000000000000000000000 --- a/conf/prod.config +++ /dev/null @@ -1,48 +0,0 @@ -// ======================================== -// PARAMS -//========================================= -params { - ngl_bi_client = '/home/sbsuser/save/scripts-ngs/NGL-Bi_client_Current' - shared_modules = '/home/sbsuser/save/scripts-ngs/shared_modules_Current' -} - -// ======================================== -// PROCESSES -//========================================= -process { - withLabel: ngl { - beforeScript = "source ${params.ngl_bi_client}/GeT/bash/loadConfFile.sh ${params.ngl_bi_client}/IG/SystemeInteractionNGL-Bi/conf/prod_illumina_qc.conf" - publishDir = [ - path: { "${params.outdir}/ngl" }, - mode: 'copy', - pattern: "*.{log,created}" - ] - } - - withLabel: samtools { - module = ['bioinfo/samtools-1.14'] - cpus = { 6 * task.attempt } - memory = { 8.GB * task.attempt } - time = { 3.h * task.attempt } - } - - withLabel: qualimap { - module = ['system/R-3.4.3:bioinfo/qualimap-31-08-20'] - beforeScript='unset DISPLAY' - cpus = { 8 * task.attempt } - memory = { 2.GB * task.attempt } - time = { 3.h * task.attempt } - } - - - withName: BWA_ALIGNMENT { - cpus = { 6 * task.attempt } - memory = { 8.GB * task.attempt } - time = { 3.d * task.attempt } - } -} - -// ======================================== -// CONFIG FILES -//========================================= -includeConfig "$baseDir/conf/report.config" \ No newline at end of file diff --git a/conf/report.config b/conf/report.config index 68b3e9d587fbe1781096df6012ef03f3e2f69b04..2c00805068d4ed6c20e39518db6b4337b519341c 100644 --- a/conf/report.config +++ b/conf/report.config @@ -29,5 +29,5 @@ manifest { description = "Workflow for Illumina data quality control" mainScript = 'main.nf' nextflowVersion = '>=0.32.0' - version = '1.2.4' + version = '1.6.0' } \ No newline at end of file diff --git a/conf/test.config b/conf/test.config index 7e37ac0ae723cb5c49e4f8e60f66443276473d19..23c2af36f96925678e3b192b469a231417cce5fd 100644 --- a/conf/test.config +++ b/conf/test.config @@ -4,45 +4,5 @@ params { ngl_bi_client = '/home/sbsuser/work/test/jules/VisualStudioSources/ngl-bi_client/' shared_modules = '/home/sbsuser/work/Nextflow/shared_modules/ExportSources_Jules/' + is_dev_mode = true } - -// ======================================== -// PROCESSES -//========================================= -process { - withLabel: ngl { - beforeScript = "source ${params.ngl_bi_client}/GeT/bash/loadConfFile.sh ${params.ngl_bi_client}/IG/SystemeInteractionNGL-Bi/conf/dev_illumina_qc.conf" - publishDir = [ - path: { "${params.outdir}/ngl" }, - mode: 'copy', - pattern: "*.{log,created}" - ] - } - - withLabel: samtools { - module = ['bioinfo/samtools-1.14'] - cpus = { 1 * task.attempt } - memory = { 2.GB * task.attempt } - time = { 10.m * task.attempt } - } - - withLabel: qualimap { - module = ['system/R-3.4.3:bioinfo/qualimap-31-08-20'] - beforeScript='unset DISPLAY' - cpus = { 1 * task.attempt } - memory = { 2.GB * task.attempt } - time = { 10.m * task.attempt } - } - - withName: BWA_ALIGNMENT { - cpus = { 6 * task.attempt } - memory = { 8.GB * task.attempt } - time = { 3.d * task.attempt } - } -} - - -// ======================================== -// CONFIG FILES -//========================================= -includeConfig "$baseDir/conf/report.config" \ No newline at end of file diff --git a/modules/local/module_DTM.nf b/modules/local/module_DTM.nf new file mode 100644 index 0000000000000000000000000000000000000000..451b3cd8464f729c96d5ad805baa904cb08f262a --- /dev/null +++ b/modules/local/module_DTM.nf @@ -0,0 +1,20 @@ +/* + * Module pour la gestion des analyses particulières dans le cadre d'un DTM +*/ + +process PARSE_REPORTS { + tag "$sample" + + input: + tuple val(sample), path(fastp_json_report) + tuple val(sample), path(qualimap_folder) + + output: + tuple val(sample), path("*.csv"), emit: csv + + script: + """ + bash parse_reports.sh $sample $fastp_json_report $qualimap_folder + """ +} + diff --git a/modules/local/module_NGL-Bi.nf b/modules/local/module_NGL-Bi.nf index 96f29d5f40edc0861fe33e3b93ed25aeb724cb11..e243b2e0687127c2fbfb40f4d5d7f8083d103cb0 100644 --- a/modules/local/module_NGL-Bi.nf +++ b/modules/local/module_NGL-Bi.nf @@ -1,4 +1,7 @@ -params.outdir='' +/* + * Ensemble de process pour l'interraction avec NGL-Bi + * Process pour la création de traitement SAV + */ process prepareReadSetCreation { @@ -17,38 +20,27 @@ process prepareReadSetCreation { """ } -process readsetNGLBiCreation { - publishDir path: "${params.outdir}/NGLBi" , mode: 'copy', pattern: '*.created' - - executor = 'local' - beforeScript = "export ENV_NGL='/save/sbsuser/scripts-ngs/NGL-Bi_client_Current/IG/SystemeInteractionNGL-Bi/'" - errorStrategy = { 'ignore' } - - input : - path infoFile - - output : - path 'ReadsetsNGL-Bi.created', emit: readSetFile - path 'ReadsetsNGL-BiCreation.log', emit: readSetLog - - script : - """ - createNGLBiReadSets.pl --infoFile $infoFile --env_ngl_bi \$ENV_NGL 2> ReadsetsNGL-BiCreation.log 1> ReadsetsNGL-Bi.created - - """ -} +process TREATMENT_DEMUXSTAT { + label 'ngl' -process checkErrorFromNGLBi { - publishDir path: "${params.outdir}/NGLBi" , mode: 'copy' - input: - path logFile - + val nglCode + path csvFile + val lane + output: - path 'ReadsetsNGL-BiCreation.log' - + path("*.log") + val 1, emit: ready + script: + laneOption = lane ? "--lane $lane" : '' + forceOption = workflow.resume ? "--force" : '' """ - checkErrorNGLScripts.pl --file $logFile + perl ${params.ngl_bi_client}/GeT/perl/illumina/createNGL-BiTreatmentDemultiplexStat.pl \\ + --code $nglCode \\ + --stat $csvFile \\ + ${laneOption} \\ + ${forceOption} \\ + 1> treatment_demux_${lane}.log """ -} \ No newline at end of file +} diff --git a/modules/local/module_core.nf b/modules/local/module_core.nf index 9b7f25c368e1b6f6d11f9be7494065d42731307a..cf7e9f64234dbb9b18a618e5b7ef9f3a44067ba0 100644 --- a/modules/local/module_core.nf +++ b/modules/local/module_core.nf @@ -3,13 +3,13 @@ */ process PREP_DEMUXSTAT { - publishDir path: "${params.outdir}/Demux" , mode: 'copy' - + label 'demux' + input: path SampleSheet output: - path "*.indexNumber" + path "indexNumber.tsv" script: """ @@ -19,9 +19,7 @@ process PREP_DEMUXSTAT { } process DEMUX_STATS { - publishDir path: "${params.outdir}/Demux" , mode: 'copy' - - //module 'system/R-4.0.4_gcc-9.3.0' // Ne fonctionne pas ! + label 'demux' input: path DemuxStatXML @@ -30,11 +28,10 @@ process DEMUX_STATS { output: path 'demultiplexStats.log', emit: log - path "DemultiplexStats_*", emit: demultiplexStatsCSV + path "DemultiplexStats.tsv", emit: demultiplexStatsTSV script: """ - module load system/R-4.0.4_gcc-9.3.0 demuxStatsFromXML.R --xml $DemuxStatXML --indexNumber $IndexNumberFile --demuxSum $DemuxSummary > demultiplexStats.log """ } @@ -75,12 +72,7 @@ process ILLUMINA_FILTER { } -process FASTQSCREEN { - publishDir path: "${params.outdir}/ContaminationSearch/FastQ-Screen", mode: 'copy' - - module 'bioinfo/FastQ-Screen-0.15.2' - time { 1.h * task.attempt } - +process FASTQSCREEN { tag " $sample" input: @@ -97,7 +89,6 @@ process FASTQSCREEN { } process DUPLICATED_READS { - tag "$sample" input: @@ -110,6 +101,7 @@ process DUPLICATED_READS { shell: R1_name=file(fastq[0]).simpleName R2_name=file(fastq[1]).simpleName + def args = task.ext.args ?: '' ''' fastp \ -i !{fastq[0]} \ @@ -120,6 +112,7 @@ process DUPLICATED_READS { --disable_quality_filtering \ --disable_length_filtering \ --json !{R1_name}_fastp.json \ + !{args} \ 2> !{R1_name}.log ''' } diff --git a/modules/local/module_dna.nf b/modules/local/module_dna.nf index 8dc0709098973e3c6281c20c58ed7eac1ed5feed..2afa1986d1028fb0b96b6fd4e19eed5d6e806541 100644 --- a/modules/local/module_dna.nf +++ b/modules/local/module_dna.nf @@ -3,9 +3,8 @@ */ process BWA_ALIGNMENT { - publishDir path: "${params.outdir}/alignment/bwa" , mode: 'copy' - tag "$sample" + label 'bwa' input: @@ -19,16 +18,15 @@ process BWA_ALIGNMENT { def reference = params.reference_genome ?: params.reference_transcriptome def referenceName=file(reference).toString().split('/')[6] """ - bwa mem ${reference} ${reads} 1> ${sample}_${referenceName}.sam 2> ${sample}_${referenceName}.log + bwa mem ${reference} ${reads} -t ${task.cpus} 1> ${sample}_${referenceName}.sam 2> ${sample}_${referenceName}.log """ } process SAMTOOLS_VIEW { - publishDir path: "${params.outdir}/alignment/samtools" , mode: 'copy' - tag "$sample" label 'samtools' + label 'alignment' input: tuple val(sample), path(sam) @@ -38,16 +36,15 @@ process SAMTOOLS_VIEW { script: """ - samtools view -bS ${sam} > ${sample}.bam + samtools view -bS ${sam} -@ ${task.cpus} > ${sample}.bam """ } process SAMTOOLS_SORT { - publishDir path: "${params.outdir}/alignment/samtools" , mode: 'copy' - tag "$sample" label 'samtools' + label 'alignment' input: tuple val(sample), path(bam) @@ -64,11 +61,10 @@ process SAMTOOLS_SORT { } process SAMTOOLS_FLAGSTATS { - publishDir path: "${params.outdir}/alignmentStats/samtools" , mode: 'copy' - tag "$sample" label 'samtools' + label 'alignmentStats' input: tuple val(sample), path(bam) @@ -83,29 +79,6 @@ process SAMTOOLS_FLAGSTATS { """ } -process QUALIMAP { - publishDir path: "${params.outdir}/alignmentStats/qualimap" , mode: 'copy', pattern: "*.html" - publishDir path: "${params.outdir}/alignmentStats/qualimap" , mode: 'copy', pattern: "*.txt" - - tag "$sample" - - label 'qualimap' - - errorStrategy = { 'ignore' } - - input: - tuple val(sample), path(bam) - - output: - tuple val(sample), path("*.log"), emit: log - tuple val(sample), path("*/*"), emit: all // ${sample}_stats/* - tuple val(sample), path("${sample}"), emit: report - - script: - """ - qualimap bamqc -bam ${bam} -outdir ${sample} 1> ${sample}.log - """ -} diff --git a/nextflow.config b/nextflow.config index 157085f18b2fb84c07659ac2d27fbaebbe8e6577..5541f97e1ce518c9e7666e0b2a3a208c75bb92f3 100644 --- a/nextflow.config +++ b/nextflow.config @@ -1,18 +1,70 @@ // ======================================== -// PARAMS -// ========================================= -// Global params -params { - // PARAMETRE POUR OUTILS +// WORKFLOW FLAGS / OPTIONS +//========================================= +params { + // ----- GLOBAL PARAMETERS ----- + inputdir = "" + project = "" + sequencer = "" + machine_id = "" + fc_id = "" + fc_type = "" + lane = "" + demux_uniqueness = "" + + data_nature = "" + species = "" + is_multiplex = false + + run_name = "" + run_date = "" + description = "" + split_reads = false + + // DNA / RNA params + reference_genome = "" + make_star_index = false + reference_transcriptome = "" + + // Amplicon / 16S params + min_overlap = "" + max_overlap = "" + + // 10X params + // MethylSeq params + puc19 = "" + lambda = "" + + // NGL + ngl_bi_client = '/home/sbsuser/save/scripts-ngs/NGL-Bi_client_Current' + insert_to_ngl = true + bi_run_code = '' + sq_xp_code = '' + + // Shared Modules + shared_modules = '/home/sbsuser/save/scripts-ngs/shared_modules_Current' + + // OTHERS + cluster_options = '' + is_dev_mode = false + DTM_mode = false + host = 'genologin' + email="" + email_dev="jules.sabban@inrae.fr" + email_on_fail="jules.sabban@inrae.fr" + email_bioinfo="get-plage.bioinfo@genotoul.fr" + //email_labo="get-plage.labo@genotoul.fr" + email_labo="" + + + // ----- TOOLS PARAMETERS ----- // Subset fastq files params - large_sampling_threshold = 200 // 200 samples run is high multiplexed - miseq_subset_byte = 20000000 // in byte <=> 20 000 reads - miseq_subset_seq = 20000 // in reads - nova_subset_byte = 700000000 // in byte <=> 1 000 000 reads - nova_subset_seq = 1000000 // in reads - large_indexing_nova_subset_byte = 350000000 // in byte <=> 500 000 reads - large_indexing_nova_subset_seq = 500000 // in reads + no_subset = false // to skip subset step -> use every reads to align + large_sampling_threshold = 200 // 200 samples run is high multiplexed + miseq_subset_seq = "50000" // in reads must be a string + nova_subset_seq = "50000000" // in reads + large_indexing_nova_subset_seq = "500000" // in reads // RNA QC sortmerna_db_path = '/usr/local/bioinfo/src/SortMeRNA/sortmerna-2.1b/rRNA_databases' @@ -23,50 +75,70 @@ params { sortmerna_euk_18s = sortmerna_db_path + '/silva-euk-18s-id95.fasta' sortmerna_euk_28s = sortmerna_db_path + '/silva-euk-28s-id98.fasta' - // OTHERS - email="" - email_dev="jules.sabban@inrae.fr" - email_on_fail="jules.sabban@inrae.fr" - email_bioinfo="get-plage.bioinfo@genotoul.fr" - //email_labo="get-plage.labo@genotoul.fr" - email_labo="" - - cluster_options = '' + // FASTP + fastp_n_reads = 100000000 // skip parameters skip_core_illumina = false - monochrome_logs = true help = false - - config_profile_description = false // ?? - config_profile_contact = false // ?? - config_profile_url = false // ?? +} + +// ======================================== +// ANALYSIS PARAMETERS +//========================================= +import java.text.SimpleDateFormat +SimpleDateFormat uniqueness_format = new SimpleDateFormat("yyyyMMddHHmmss") + +import java.nio.file.Files +import java.nio.file.Paths + +params.data_location = params.inputdir.toString() + "/" + params.project.toString() +def n_read_files = Files.walk(Paths.get(params.data_location)) + .filter(Files::isRegularFile) + .filter(p -> p.getFileName().toString().matches(".*_R[12](_.*)?\\.fastq\\.gz")) + .count() + +params.n_samples = n_read_files / 2 +params.resource_factor = 0.1 * params.n_samples + +params { + // Dynamics params, depend on others + samplesheet = inputdir.toString() + "/SampleSheet.csv" + nf_uniqueness = uniqueness_format.format(new Date()) + outdir = params.inputdir + "/nextflow/" + project + "_" + run_name + "_" + nf_uniqueness + + subset_seq = miseq_subset_seq + if ( sequencer =~ /NovaSeq.*/ ) { + if ( n_samples >= large_sampling_threshold ) { + nova_subset_seq = large_indexing_nova_subset_seq + } + subset_seq = nova_subset_seq + } } // ======================================== // PROFILES //========================================= -// Load base.config by default for all pipelines -includeConfig "$baseDir/conf/base.config" +toolsModuleHash = [:] +if (params.host == 'genologin') { + includeConfig "$baseDir/conf/dependencies_genologin.config" +} else if (params.host == 'genobioinfo') { + includeConfig "$baseDir/conf/dependencies_genobioinfo.config" +} -System.out.println "Les configurations de bases sont chargées" +// Load base.config and report.config by default for all pipelines +includeConfig "$baseDir/conf/base.config" +includeConfig "$baseDir/conf/report.config" // Container slug. Stable releases should specify release tag! // Developmental code should specify :dev process.container = "$baseDir/template-nf.sif" profiles { - conda { process.conda = "$baseDir/environment.yml" } - debug { process.beforeScript = 'echo $HOSTNAME' } - docker { docker.enabled = true } - singularity { singularity.enabled = true } - dev { includeConfig "$baseDir/conf/test.config" } - prod { includeConfig "$baseDir/conf/prod.config" } + dev { includeConfig "$baseDir/conf/test.config" } } -System.out.println "Tous les profiles ont été analysés" - // Avoid this error: // WARNING: Your kernel does not support swap limit capabilities or the cgroup is not mounted. Memory limited without swap. // Testing this in nf-core after discussion here https://github.com/nf-core/tools/pull/351, once this is established and works well, nextflow might implement this behavior as new default. @@ -74,4 +146,3 @@ docker.runOptions = '-u \$(id -u):\$(id -g)' // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] -System.out.println "Sortie du nextflow.config" \ No newline at end of file diff --git a/sub-workflows/local/core_illumina.nf b/sub-workflows/local/core_illumina.nf index 6a79fdff11ca9db5709b0aceb77675f0a74d68e6..a03cd6f04d8c83201f2653be7c6a013d2dad847d 100644 --- a/sub-workflows/local/core_illumina.nf +++ b/sub-workflows/local/core_illumina.nf @@ -47,5 +47,6 @@ workflow CORE_ILLUMINA { emit: fastq = fastq_good + demuxStat = DEMUX_STATS.out.demultiplexStatsTSV } diff --git a/sub-workflows/local/core_pipeline.nf b/sub-workflows/local/core_pipeline.nf index edbb825db4179328dd4da359f45881f042f3cc80..272b1f28c579247548ade23f7c03ead352bf5427 100644 --- a/sub-workflows/local/core_pipeline.nf +++ b/sub-workflows/local/core_pipeline.nf @@ -43,28 +43,25 @@ workflow CORE { // ----------- Recherche Duplicats GUNZIP(ch_read) - GUNZIP.out.branch{ - large : it[1].size() >= params.bytes_subset_seq - small : it[1].size() < params.bytes_subset_seq - }.set{unzip_reads_split} + // ----------- Sous-échantillonnage + if (params.no_subset) { + unzipped_fastq = GUNZIP.out + } else { + SEQTK_SAMPLE(GUNZIP.out) + unzipped_fastq = SEQTK_SAMPLE.out + } - unzip_reads_split.large.count().map{it}.subscribe onNext: { println it + " large fastq (more than ${params.subset_seq} reads)" } - unzip_reads_split.small.count().map{it}.subscribe onNext: { println it + " small fastq" } - - // Do subset only on large fastq files - SEQTK_SAMPLE(unzip_reads_split.large) - DUPLICATED_READS(unzip_reads_split.small - .mix(SEQTK_SAMPLE.out) - .collect{it[1]} - .flatten() - .map { $it -> [ ($it.simpleName =~ /(.*)_R[1-2]_.*/)[0][1] , $it ] } - .groupTuple() + DUPLICATED_READS(unzipped_fastq + .collect{it[1]} + .flatten() + .map { $it -> [ ($it.simpleName =~ /(.*)_R[1-2]_.*/)[0][1] , $it ] } + .groupTuple() ) // need fastq paired !!! emit: fastqc_report = FASTQC.out.zip ?: Channel.empty() fastqscreen_report = FASTQSCREEN.out.report ?: Channel.empty() fastp_report = DUPLICATED_READS.out.json - subset_fastq = unzip_reads_split.small.mix(SEQTK_SAMPLE.out) + subset_fastq = unzipped_fastq fastq_md5 = MD5SUM.out } diff --git a/sub-workflows/local/dna_qc.nf b/sub-workflows/local/dna_qc.nf index 57e1a0845c112de54f45d68c83a702c100c15249..f41af2e8b33323e923cae0c78d5019f0fa30a0d6 100644 --- a/sub-workflows/local/dna_qc.nf +++ b/sub-workflows/local/dna_qc.nf @@ -14,9 +14,8 @@ include { BWA_ALIGNMENT; SAMTOOLS_VIEW; SAMTOOLS_SORT; SAMTOOLS_FLAGSTATS; - QUALIMAP; } from "$baseDir/modules/local/module_dna.nf" - +include { QUALIMAP } from "${params.shared_modules}/qualimap.nf" // ------------------------------------------------- // WORKFLOW diff --git a/workflow/illumina_qc.nf b/workflow/illumina_qc.nf index 04c0e66e896dc51407fa4a58c519cfaec5ec80cf..5d2e895841a4d3625c39d9317e939a0ec6d72f4d 100644 --- a/workflow/illumina_qc.nf +++ b/workflow/illumina_qc.nf @@ -57,15 +57,19 @@ createDir = file(params.outdir).mkdir() // ------------------------------------------------- // INCLUDES // ------------------------------------------------- -include { NGLBI } from "$baseDir/sub-workflows/local/begin_nglbi.nf" -include { CORE_ILLUMINA } from "$baseDir/sub-workflows/local/core_illumina.nf" -include { CORE } from "$baseDir/sub-workflows/local/core_pipeline.nf" -include { DNA_QC } from "$baseDir/sub-workflows/local/dna_qc.nf" -include { RNA_QC } from "$baseDir/sub-workflows/local/rna_qc.nf" -include { MULTIQC } from "${params.shared_modules}/multiqc.nf" -include { workflow_summary as WORKFLOW_SUMMARY } from "${params.shared_modules}/workflow_summary.nf" -include { UPDATE_NGLBI_STATE_FROM_FILE as UPDATE_STATE_FQC } from "${params.shared_modules}/ngl_bi.nf" -include { READSET_FILE_FROM_FILE as ADD_RS_RAW_FILES } from "${params.shared_modules}/ngl_bi.nf" addParams(ext: 'RAW') +include { NGLBI } from "$baseDir/sub-workflows/local/begin_nglbi.nf" +include { CORE_ILLUMINA } from "$baseDir/sub-workflows/local/core_illumina.nf" +include { CORE } from "$baseDir/sub-workflows/local/core_pipeline.nf" +include { DNA_QC } from "$baseDir/sub-workflows/local/dna_qc.nf" +include { RNA_QC } from "$baseDir/sub-workflows/local/rna_qc.nf" +include { PARSE_REPORTS } from "$baseDir/modules/local/module_DTM.nf" +include { TREATMENT_DEMUXSTAT as TREATMENT_DEMUX_RUN; + TREATMENT_DEMUXSTAT as TREATMENT_DEMUX_READSETS + } from "$baseDir/modules/local/module_NGL-Bi.nf" +include { MULTIQC } from "${params.shared_modules}/multiqc.nf" +include { workflow_summary as WORKFLOW_SUMMARY } from "${params.shared_modules}/workflow_summary.nf" +include { UPDATE_NGLBI_STATE_FROM_FILE as UPDATE_STATE_FQC } from "${params.shared_modules}/ngl_bi.nf" +include { READSET_FILE_FROM_FILE as ADD_RS_RAW_FILES } from "${params.shared_modules}/ngl_bi.nf" addParams(ext: 'RAW') // ------------------------------------------------- // EMAIL ON START // ------------------------------------------------- @@ -84,21 +88,38 @@ workflow ILLUMINA_QC { NGLBI() } - if ( ! params.skip_core_illumina ) { + if ( params.skip_core_illumina ) { + fastq = ch_read + } else { CORE_ILLUMINA(ch_ss, ch_DemuxStatXML, ch_DemuxSummary, ch_read) fastq = CORE_ILLUMINA.out.fastq - } else { - fastq = ch_read + + if (params.insert_to_ngl){ + // Add demultiplexStat treatments + TREATMENT_DEMUX_RUN(params.bi_run_code, CORE_ILLUMINA.out.demuxStat, params.lane) + TREATMENT_DEMUX_READSETS(NGLBI.out.readsetsFile, CORE_ILLUMINA.out.demuxStat, '') + } } CORE(fastq) if (params.data_nature == 'DNA') { - DNA_QC(CORE.out.subset_fastq) + DNA_QC(CORE.out.subset_fastq + .collect{it[1]} + .flatten() + .map { $it -> [ ($it.simpleName =~ /(.*)_R[1-2]_.*/)[0][1] , $it ] } + .groupTuple() + ) ch_mqc = ch_mqc.mix( DNA_QC.out.qualimap_report.collect{it[1]}.ifEmpty([]), DNA_QC.out.flagstats_output.collect{it[1]}.ifEmpty([]) ) + + // DTM process + if (params.DTM_mode) { + PARSE_REPORTS(CORE.out.fastp_report, DNA_QC.out.qualimap_report) + } + } else if (params.data_nature =~ 'RNA') { RNA_QC(CORE.out.subset_fastq, ch_sortmerna_db) ch_mqc = ch_mqc.mix( @@ -145,6 +166,13 @@ workflow ILLUMINA_QC { def end_mail_sent = false workflow.onComplete { end_mail_sent = sendFinalMail(format.format(new Date()), params.summary) + + // remove work directory if pipeline is successful + if (workflow.success && !params.is_dev_mode) { + println "Pipeline terminé avec succès => suppression du workdir : $workflow.workDir" + exec: + workflow.workDir.deleteDir() + } } workflow.onError { } \ No newline at end of file