From c332298c40d596e8abb74282f992e1a8f37387f7 Mon Sep 17 00:00:00 2001 From: jsabban <jules.sabban@inrae.fr> Date: Thu, 25 Apr 2024 11:17:30 +0200 Subject: [PATCH 1/2] FastQ namming using NGL api - Name of fastq with info from NGL databases - Uniformity with ng6 file names (new pipeline option) - Add files to readsets - Move MD5SUM process from CORE_PIPELINE to ILLUMINA_QC workflow ref: #91 --- conf/base.config | 7 +++++++ modules/local/module_NGL-Bi.nf | 25 +++++++++++++++++++++++++ nextflow.config | 1 + sub-workflows/local/begin_nglbi.nf | 17 +++++++++-------- sub-workflows/local/core_pipeline.nf | 16 ++++++++++------ workflow/illumina_qc.nf | 10 +++++++--- 6 files changed, 59 insertions(+), 17 deletions(-) diff --git a/conf/base.config b/conf/base.config index e62053e..b4796c9 100644 --- a/conf/base.config +++ b/conf/base.config @@ -207,6 +207,13 @@ process { } + withName: FILE_RENAME { + ext.args = [ + params.run_date ? "--date " + params.run_date : '', + params.ng6_name ? "--ng6_like" : '', + ].join(' ') + } + // ----- WithLabel withLabel: littleJob { executor = 'local' diff --git a/modules/local/module_NGL-Bi.nf b/modules/local/module_NGL-Bi.nf index 8da6904..9797b91 100644 --- a/modules/local/module_NGL-Bi.nf +++ b/modules/local/module_NGL-Bi.nf @@ -44,3 +44,28 @@ process TREATMENT_DEMUXSTAT { 1> treatment_demux_${lane}.log """ } + +process FILE_RENAME { + label 'ngl' + + input: + path(inputDirectory) + path(readsetsFile) + val(sqXpCode) + + output: + path("outputs/*.fastq.gz"), emit: fastq + path("*.log"), emit: log + val 1, emit: ready + + script: + def args = task.ext.args ?: '' + """ + perl ${params.ngl_bi_client}/GeT/perl/illumina/file_rename.pl \\ + --input ./ \\ + --readsets $readsetsFile \\ + --sqExperimentCode $sqXpCode \\ + $args \\ + 1> file_rename.log + """ +} \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index a823a40..1b1145c 100644 --- a/nextflow.config +++ b/nextflow.config @@ -58,6 +58,7 @@ params { insert_to_ngl = true bi_run_code = '' sq_xp_code = '' + ng6_name = true // Shared Modules shared_modules = '/home/sbsuser/save/scripts-ngs/shared_modules_Current' diff --git a/sub-workflows/local/begin_nglbi.nf b/sub-workflows/local/begin_nglbi.nf index c71b11e..2a50f52 100644 --- a/sub-workflows/local/begin_nglbi.nf +++ b/sub-workflows/local/begin_nglbi.nf @@ -13,8 +13,8 @@ // ------------------------------------------------- include { CREATE_RUN; - UPDATE_NGLBI_STATE_FROM_CODE as UPDATE_STATE_FS; - UPDATE_NGLBI_STATE_FROM_CODE as UPDATE_STATE_IPRG; + UPDATE_NGLBI_STATE_FROM_FILE as UPDATE_STATE_FS; + UPDATE_NGLBI_STATE_FROM_FILE as UPDATE_STATE_IPRG; UPDATE_NGLBI_STATE_FROM_FILE as UPDATE_STATE_FRG; UPDATE_NGLBI_STATE_FROM_FILE as UPDATE_STATE_IPQC; CREATE_READSETS; @@ -35,22 +35,23 @@ workflow NGLBI { // Creation of the Run if needed if (params.bi_run_code == '') { - CREATE_RUN(params.sequencer, params.sqXpCode, '', 1) + CREATE_RUN(params.sequencer, params.sq_xp_code, '', 1) - params.bi_run_code = CREATE_RUN.out.createdFile.splitText().map{it -> it.trim()} + bi_run_code = CREATE_RUN.out.createdFile.splitText().map{it -> it.trim()} ready_for_state = CREATE_RUN.out.ready } else { ready_for_state = Channel.value(1) + bi_run_code = params.bi_run_code } // Update run state - UPDATE_STATE_FS(params.bi_run_code, 'F-S', ready_for_state) - UPDATE_STATE_IPRG(params.bi_run_code, 'IP-RG', UPDATE_STATE_FS.out.ready) + UPDATE_STATE_FS(CREATE_RUN.out.createdFile, 'F-S', ready_for_state) + UPDATE_STATE_IPRG(CREATE_RUN.out.createdFile, 'IP-RG', UPDATE_STATE_FS.out.ready) // Creation of ReadSets CREATE_READSETS( params.sq_xp_code, - params.bi_run_code, + bi_run_code, '', '', UPDATE_STATE_IPRG.out.ready @@ -63,7 +64,7 @@ workflow NGLBI { emit: ready = UPDATE_STATE_IPQC.out.ready readsetsFile = CREATE_READSETS.out.createdFile - + runFile = CREATE_RUN.out.createdFile } diff --git a/sub-workflows/local/core_pipeline.nf b/sub-workflows/local/core_pipeline.nf index eb17141..a14b560 100644 --- a/sub-workflows/local/core_pipeline.nf +++ b/sub-workflows/local/core_pipeline.nf @@ -19,7 +19,6 @@ include { } from "$baseDir/modules/local/module_core.nf" include { GUNZIP } from "${params.shared_modules}/gzip.nf" include { SEQTK_SAMPLE } from "${params.shared_modules}/seqtk.nf" -include { md5sum as MD5SUM } from "${params.shared_modules}/md5sum.nf" //------------------------------------------------- inNGL=true @@ -49,9 +48,6 @@ workflow CORE { ch_read = ch_fastq } - // ----------- md5sum - MD5SUM(ch_read.collect{it[1]}.flatten().collect(), params.run_name+'_fastq') - // ----------- FASTQC FASTQC(ch_read) @@ -65,7 +61,16 @@ workflow CORE { if (params.no_subset) { unzipped_fastq = GUNZIP.out } else { - SEQTK_SAMPLE(GUNZIP.out) + /* + if ( "$params.coverage" != 0 && ("$params.reference_genome" != '' || "$params.reference_transcriptome" != '')) { + reference = params.reference_genome ?: params.reference_transcriptome + GET_NB_SEQ_FROM_COV(reference, params.coverage, ch_read[0]) + nb_seq = GET_NB_SEQ.out.value + } else { + nb_seq = params.subset_seq + } + */ + SEQTK_SAMPLE(GUNZIP.out/*, nb_seq*/) unzipped_fastq = SEQTK_SAMPLE.out } @@ -81,5 +86,4 @@ workflow CORE { fastqscreen_report = FASTQSCREEN.out.report ?: Channel.empty() fastp_report = DUPLICATED_READS.out.json subset_fastq = unzipped_fastq - fastq_md5 = MD5SUM.out } diff --git a/workflow/illumina_qc.nf b/workflow/illumina_qc.nf index a352500..eb75e22 100644 --- a/workflow/illumina_qc.nf +++ b/workflow/illumina_qc.nf @@ -68,13 +68,15 @@ include { RNA_QC } from "$baseDir/sub-workflows/local/rna_qc.nf" include { DIVERSITY_QC } from "$baseDir/sub-workflows/local/diversity_qc.nf" include { PARSE_REPORTS } from "$baseDir/modules/local/module_DTM.nf" include { TREATMENT_DEMUXSTAT as TREATMENT_DEMUX_RUN; - TREATMENT_DEMUXSTAT as TREATMENT_DEMUX_READSETS + TREATMENT_DEMUXSTAT as TREATMENT_DEMUX_READSETS; + FILE_RENAME as RENAME_FASTQ; } from "$baseDir/modules/local/module_NGL-Bi.nf" include { MULTIQC } from "${params.shared_modules}/multiqc.nf" include { GCBIAS as GC_BIAS } from "${params.shared_modules}/gcbias.nf" include { workflow_summary as WORKFLOW_SUMMARY } from "${params.shared_modules}/workflow_summary.nf" include { UPDATE_NGLBI_STATE_FROM_FILE as UPDATE_STATE_FQC } from "${params.shared_modules}/ngl_bi.nf" include { READSET_FILE_FROM_FILE as ADD_RS_RAW_FILES } from "${params.shared_modules}/ngl_bi.nf" addParams(ext: 'RAW') +include { md5sum as MD5SUM } from "${params.shared_modules}/md5sum.nf" // ------------------------------------------------- // EMAIL ON START // ------------------------------------------------- @@ -101,7 +103,7 @@ workflow ILLUMINA_QC { if (params.insert_to_ngl){ // Add demultiplexStat treatments - TREATMENT_DEMUX_RUN(params.bi_run_code, CORE_ILLUMINA.out.demuxStat) + TREATMENT_DEMUX_RUN(NGLBI.out.runFile, CORE_ILLUMINA.out.demuxStat) TREATMENT_DEMUX_READSETS(NGLBI.out.readsetsFile, CORE_ILLUMINA.out.demuxStat) } } @@ -162,7 +164,9 @@ workflow ILLUMINA_QC { ) if (params.insert_to_ngl){ - ADD_RS_RAW_FILES(NGLBI.out.readsetsFile, CORE.out.fastq_md5, 'RAW', NGLBI.out.ready) + RENAME_FASTQ(fastq.map{it[1]}.collect(), NGLBI.out.readsetsFile, params.sq_xp_code) + MD5SUM(RENAME_FASTQ.out.fastq.collect(), params.run_name+'_fastq') + ADD_RS_RAW_FILES(NGLBI.out.readsetsFile, MD5SUM.out, 'RAW', NGLBI.out.ready) UPDATE_STATE_FQC(NGLBI.out.readsetsFile, 'F-QC', MULTIQC.out.html) } -- GitLab From 6a7302f8c3f84c208be1e0c970b489af773eac9e Mon Sep 17 00:00:00 2001 From: jsabban <jules.sabban@inrae.fr> Date: Thu, 25 Apr 2024 11:28:50 +0200 Subject: [PATCH 2/2] Update ressources --- conf/base.config | 9 ++++++--- conf/test.config | 2 +- modules/local/module_dna.nf | 1 - nextflow.config | 6 +++--- sub-workflows/local/rna_qc.nf | 4 ++-- 5 files changed, 12 insertions(+), 10 deletions(-) diff --git a/conf/base.config b/conf/base.config index b4796c9..f6ad4e7 100644 --- a/conf/base.config +++ b/conf/base.config @@ -125,12 +125,15 @@ process { withLabel: bwa { module = toolsModuleHash['BWA'] cpus = { checkMax( 6 * task.attempt, 'cpus' ) } - memory = { checkMax( 8.GB * task.attempt, 'memory' ) } + memory = { checkMax( 16.GB * task.attempt, 'memory' ) } time = { checkMax( 3.d * task.attempt, 'time' ) } publishDir = [ path: "${params.outdir}/alignment/bwa", - mode: 'copy' + mode: { + if (params.DTM_mode) 'symlink' + else 'copy' + } ] } @@ -223,7 +226,7 @@ process { module = toolsModuleHash['SAMTOOLS'] cpus = { checkMax( 6 * task.attempt, 'cpus' ) } memory = { checkMax( 8.GB * task.attempt, 'memory' ) } - time = { checkMax( 3.h * task.attempt, 'time' ) } + time = { checkMax( 24.h * task.attempt, 'time' ) } } withLabel: alignment { diff --git a/conf/test.config b/conf/test.config index b8095b6..e8ba1ef 100644 --- a/conf/test.config +++ b/conf/test.config @@ -3,7 +3,7 @@ //========================================= System.out.println "Profil dev => on ajuste les paramètres..." params { - ngl_bi_client = '/home/sbsuser/work/test/jules/VisualStudioSources/ngl-bi_client/' + ngl_bi_client = '/work/project/PlaGe/test/jules/VisualStudioSources/ngl-bi_client/' shared_modules = '/save/user/sbsuser/scripts-ngs/shared_modules_Current/' is_dev_mode = true } diff --git a/modules/local/module_dna.nf b/modules/local/module_dna.nf index 2afa198..02836b5 100644 --- a/modules/local/module_dna.nf +++ b/modules/local/module_dna.nf @@ -26,7 +26,6 @@ process SAMTOOLS_VIEW { tag "$sample" label 'samtools' - label 'alignment' input: tuple val(sample), path(sam) diff --git a/nextflow.config b/nextflow.config index 1b1145c..ea9a9d0 100644 --- a/nextflow.config +++ b/nextflow.config @@ -37,7 +37,7 @@ params { reference_genome = "" make_star_index = false reference_transcriptome = "" - sortmerna_db_path = '/usr/local/bioinfo/src/SortMeRNA/sortmerna-2.1b/rRNA_databases' + sortmerna_db_path = '/work/project/PlaGe/sortemerna_db' // Amplicon / 16S params min_overlap = 20 @@ -64,7 +64,7 @@ params { shared_modules = '/home/sbsuser/save/scripts-ngs/shared_modules_Current' // SLURM MAX RESSOURCES - max_memory = "250.GB" + max_memory = "500.GB" max_time = "90.d" max_cpus = "48" @@ -72,7 +72,7 @@ params { cluster_options = '' is_dev_mode = false DTM_mode = false - host = 'genologin' + host = 'genobioinfo' email="" email_dev="jules.sabban@inrae.fr" email_on_fail="jules.sabban@inrae.fr" diff --git a/sub-workflows/local/rna_qc.nf b/sub-workflows/local/rna_qc.nf index 2c32b0b..b04ac82 100644 --- a/sub-workflows/local/rna_qc.nf +++ b/sub-workflows/local/rna_qc.nf @@ -89,8 +89,8 @@ workflow RNA_QC { } else { // If Qualimap and Samtools were not executed System.out.println "Pas de référence genomique ou transcriptomique renseignée, on ne peut pas faire d'alignement" - //qualimap_report_emitted = Channel.empty() - //flagstats_output_emitted = Channel.empty() + qualimap_report_emitted = Channel.empty() + flagstats_output_emitted = Channel.empty() } SORTMERNA(fastq, sortmerna_db.collect()) -- GitLab