From c332298c40d596e8abb74282f992e1a8f37387f7 Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Thu, 25 Apr 2024 11:17:30 +0200
Subject: [PATCH 1/2] FastQ namming using NGL api

	- Name of fastq with info from NGL databases
	- Uniformity with ng6 file names (new pipeline option)
	- Add files to readsets
	- Move MD5SUM process from CORE_PIPELINE to ILLUMINA_QC workflow

	ref: #91
---
 conf/base.config                     |  7 +++++++
 modules/local/module_NGL-Bi.nf       | 25 +++++++++++++++++++++++++
 nextflow.config                      |  1 +
 sub-workflows/local/begin_nglbi.nf   | 17 +++++++++--------
 sub-workflows/local/core_pipeline.nf | 16 ++++++++++------
 workflow/illumina_qc.nf              | 10 +++++++---
 6 files changed, 59 insertions(+), 17 deletions(-)

diff --git a/conf/base.config b/conf/base.config
index e62053e..b4796c9 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -207,6 +207,13 @@ process {
 
 	}
 
+	withName: FILE_RENAME {
+		ext.args = [
+			params.run_date ? "--date " +  params.run_date : '',
+			params.ng6_name ? "--ng6_like" : '',
+		].join(' ')
+	}
+
 	// ----- WithLabel
 	withLabel: littleJob {
 		executor = 'local'
diff --git a/modules/local/module_NGL-Bi.nf b/modules/local/module_NGL-Bi.nf
index 8da6904..9797b91 100644
--- a/modules/local/module_NGL-Bi.nf
+++ b/modules/local/module_NGL-Bi.nf
@@ -44,3 +44,28 @@ process TREATMENT_DEMUXSTAT {
 			1> treatment_demux_${lane}.log
 	"""
 }
+
+process FILE_RENAME {
+	label 'ngl'
+
+	input:
+		path(inputDirectory)
+		path(readsetsFile)
+		val(sqXpCode)
+
+	output:
+		path("outputs/*.fastq.gz"), emit: fastq
+		path("*.log"), emit: log
+		val 1, emit: ready
+
+	script:
+	def args = task.ext.args ?: ''
+	"""
+		perl ${params.ngl_bi_client}/GeT/perl/illumina/file_rename.pl \\
+			--input ./ \\
+			--readsets $readsetsFile \\
+			--sqExperimentCode $sqXpCode \\
+			$args \\
+			1> file_rename.log
+	"""
+}
\ No newline at end of file
diff --git a/nextflow.config b/nextflow.config
index a823a40..1b1145c 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -58,6 +58,7 @@ params {
 	insert_to_ngl = true
 	bi_run_code = ''
 	sq_xp_code = ''
+	ng6_name = true
 
 	// Shared Modules
 	shared_modules = '/home/sbsuser/save/scripts-ngs/shared_modules_Current'
diff --git a/sub-workflows/local/begin_nglbi.nf b/sub-workflows/local/begin_nglbi.nf
index c71b11e..2a50f52 100644
--- a/sub-workflows/local/begin_nglbi.nf
+++ b/sub-workflows/local/begin_nglbi.nf
@@ -13,8 +13,8 @@
 // -------------------------------------------------
 
 include {	CREATE_RUN;
-            UPDATE_NGLBI_STATE_FROM_CODE as UPDATE_STATE_FS;
-			UPDATE_NGLBI_STATE_FROM_CODE as UPDATE_STATE_IPRG;
+            UPDATE_NGLBI_STATE_FROM_FILE as UPDATE_STATE_FS;
+			UPDATE_NGLBI_STATE_FROM_FILE as UPDATE_STATE_IPRG;
 			UPDATE_NGLBI_STATE_FROM_FILE as UPDATE_STATE_FRG;
             UPDATE_NGLBI_STATE_FROM_FILE as UPDATE_STATE_IPQC;
 			CREATE_READSETS;
@@ -35,22 +35,23 @@ workflow NGLBI {
         // Creation of the Run if needed
         if (params.bi_run_code == '') {
 
-            CREATE_RUN(params.sequencer, params.sqXpCode, '', 1)
+            CREATE_RUN(params.sequencer, params.sq_xp_code, '', 1)
 
-            params.bi_run_code = CREATE_RUN.out.createdFile.splitText().map{it -> it.trim()}
+            bi_run_code = CREATE_RUN.out.createdFile.splitText().map{it -> it.trim()}
             ready_for_state = CREATE_RUN.out.ready
         } else {
             ready_for_state = Channel.value(1)
+            bi_run_code = params.bi_run_code
         }
 
         // Update run state
-		UPDATE_STATE_FS(params.bi_run_code, 'F-S', ready_for_state)
-		UPDATE_STATE_IPRG(params.bi_run_code, 'IP-RG', UPDATE_STATE_FS.out.ready)
+		UPDATE_STATE_FS(CREATE_RUN.out.createdFile, 'F-S', ready_for_state)
+		UPDATE_STATE_IPRG(CREATE_RUN.out.createdFile, 'IP-RG', UPDATE_STATE_FS.out.ready)
 
         // Creation of ReadSets
 		CREATE_READSETS(
 			params.sq_xp_code,
-			params.bi_run_code,
+			bi_run_code,
 			'',
 			'',
 			UPDATE_STATE_IPRG.out.ready
@@ -63,7 +64,7 @@ workflow NGLBI {
     emit:
         ready = UPDATE_STATE_IPQC.out.ready
 		readsetsFile = CREATE_READSETS.out.createdFile
-
+        runFile = CREATE_RUN.out.createdFile
 }
 
 
diff --git a/sub-workflows/local/core_pipeline.nf b/sub-workflows/local/core_pipeline.nf
index eb17141..a14b560 100644
--- a/sub-workflows/local/core_pipeline.nf
+++ b/sub-workflows/local/core_pipeline.nf
@@ -19,7 +19,6 @@ include {
 } from "$baseDir/modules/local/module_core.nf"
 include { GUNZIP			} from "${params.shared_modules}/gzip.nf"
 include { SEQTK_SAMPLE 		} from "${params.shared_modules}/seqtk.nf"
-include { md5sum as MD5SUM	} from "${params.shared_modules}/md5sum.nf"
 //-------------------------------------------------
 
 inNGL=true
@@ -49,9 +48,6 @@ workflow CORE {
 			ch_read = ch_fastq
 		}
 		
-		// ----------- md5sum
-		MD5SUM(ch_read.collect{it[1]}.flatten().collect(), params.run_name+'_fastq')
-		
 		// ----------- FASTQC
 		FASTQC(ch_read)
 		
@@ -65,7 +61,16 @@ workflow CORE {
 		if (params.no_subset) {
 			unzipped_fastq = GUNZIP.out
 		} else {
-			SEQTK_SAMPLE(GUNZIP.out)
+			/*
+			if ( "$params.coverage" != 0 && ("$params.reference_genome" != '' || "$params.reference_transcriptome" != '')) {
+				reference = params.reference_genome ?: params.reference_transcriptome
+				GET_NB_SEQ_FROM_COV(reference, params.coverage, ch_read[0])
+				nb_seq = GET_NB_SEQ.out.value
+			} else {
+				nb_seq = params.subset_seq
+			}
+			*/
+			SEQTK_SAMPLE(GUNZIP.out/*, nb_seq*/)
 			unzipped_fastq = SEQTK_SAMPLE.out
 		}
 
@@ -81,5 +86,4 @@ workflow CORE {
 		fastqscreen_report = FASTQSCREEN.out.report ?: Channel.empty()
 		fastp_report = DUPLICATED_READS.out.json
 		subset_fastq = unzipped_fastq
-		fastq_md5 = MD5SUM.out
 }
diff --git a/workflow/illumina_qc.nf b/workflow/illumina_qc.nf
index a352500..eb75e22 100644
--- a/workflow/illumina_qc.nf
+++ b/workflow/illumina_qc.nf
@@ -68,13 +68,15 @@ include {	RNA_QC		} from "$baseDir/sub-workflows/local/rna_qc.nf"
 include {	DIVERSITY_QC	} from "$baseDir/sub-workflows/local/diversity_qc.nf"
 include { 	PARSE_REPORTS } from "$baseDir/modules/local/module_DTM.nf"
 include {	TREATMENT_DEMUXSTAT as TREATMENT_DEMUX_RUN;
-			TREATMENT_DEMUXSTAT as TREATMENT_DEMUX_READSETS
+			TREATMENT_DEMUXSTAT as TREATMENT_DEMUX_READSETS;
+			FILE_RENAME as RENAME_FASTQ;
 						} from "$baseDir/modules/local/module_NGL-Bi.nf"
 include {	MULTIQC		} from "${params.shared_modules}/multiqc.nf"
 include {	GCBIAS as GC_BIAS } from "${params.shared_modules}/gcbias.nf"
 include {	workflow_summary as WORKFLOW_SUMMARY } from "${params.shared_modules}/workflow_summary.nf"
 include {	UPDATE_NGLBI_STATE_FROM_FILE as UPDATE_STATE_FQC } from "${params.shared_modules}/ngl_bi.nf"
 include {	READSET_FILE_FROM_FILE as ADD_RS_RAW_FILES } from "${params.shared_modules}/ngl_bi.nf" addParams(ext: 'RAW')
+include { 	md5sum as MD5SUM	} from "${params.shared_modules}/md5sum.nf"
 // -------------------------------------------------
 // 					 EMAIL ON START
 // -------------------------------------------------
@@ -101,7 +103,7 @@ workflow ILLUMINA_QC {
 
 		if (params.insert_to_ngl){
 			// Add demultiplexStat treatments
-			TREATMENT_DEMUX_RUN(params.bi_run_code, CORE_ILLUMINA.out.demuxStat)
+			TREATMENT_DEMUX_RUN(NGLBI.out.runFile, CORE_ILLUMINA.out.demuxStat)
 			TREATMENT_DEMUX_READSETS(NGLBI.out.readsetsFile, CORE_ILLUMINA.out.demuxStat)
 		}
 	}
@@ -162,7 +164,9 @@ workflow ILLUMINA_QC {
 	)
 
 	if (params.insert_to_ngl){
-		ADD_RS_RAW_FILES(NGLBI.out.readsetsFile, CORE.out.fastq_md5, 'RAW', NGLBI.out.ready)
+		RENAME_FASTQ(fastq.map{it[1]}.collect(), NGLBI.out.readsetsFile, params.sq_xp_code)
+		MD5SUM(RENAME_FASTQ.out.fastq.collect(), params.run_name+'_fastq')
+		ADD_RS_RAW_FILES(NGLBI.out.readsetsFile, MD5SUM.out, 'RAW', NGLBI.out.ready)
 		UPDATE_STATE_FQC(NGLBI.out.readsetsFile, 'F-QC', MULTIQC.out.html)
 	}
 
-- 
GitLab


From 6a7302f8c3f84c208be1e0c970b489af773eac9e Mon Sep 17 00:00:00 2001
From: jsabban <jules.sabban@inrae.fr>
Date: Thu, 25 Apr 2024 11:28:50 +0200
Subject: [PATCH 2/2] Update ressources

---
 conf/base.config              | 9 ++++++---
 conf/test.config              | 2 +-
 modules/local/module_dna.nf   | 1 -
 nextflow.config               | 6 +++---
 sub-workflows/local/rna_qc.nf | 4 ++--
 5 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/conf/base.config b/conf/base.config
index b4796c9..f6ad4e7 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -125,12 +125,15 @@ process {
 	withLabel: bwa {
 		module = toolsModuleHash['BWA']
 		cpus = { checkMax( 6 * task.attempt, 'cpus' ) }
-	    memory = { checkMax( 8.GB * task.attempt, 'memory' ) }
+	    memory = { checkMax( 16.GB * task.attempt, 'memory' ) }
 	    time = { checkMax( 3.d * task.attempt, 'time' ) }
 
 		publishDir = [
 			path: "${params.outdir}/alignment/bwa",
-			mode: 'copy'
+			mode: {
+				if (params.DTM_mode) 'symlink'
+				else 'copy'
+			}
 		]
 	}
 
@@ -223,7 +226,7 @@ process {
 		module = toolsModuleHash['SAMTOOLS']
 		cpus = { checkMax( 6 * task.attempt, 'cpus' ) }
 	    memory = { checkMax( 8.GB * task.attempt, 'memory' ) }
-	    time = { checkMax( 3.h * task.attempt, 'time' ) }
+	    time = { checkMax( 24.h * task.attempt, 'time' ) }
 	}
 
 	withLabel: alignment {
diff --git a/conf/test.config b/conf/test.config
index b8095b6..e8ba1ef 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -3,7 +3,7 @@
 //=========================================
 System.out.println "Profil dev => on ajuste les paramètres..."
 params {
-	ngl_bi_client = '/home/sbsuser/work/test/jules/VisualStudioSources/ngl-bi_client/'
+	ngl_bi_client = '/work/project/PlaGe/test/jules/VisualStudioSources/ngl-bi_client/'
 	shared_modules = '/save/user/sbsuser/scripts-ngs/shared_modules_Current/'
 	is_dev_mode = true
 }
diff --git a/modules/local/module_dna.nf b/modules/local/module_dna.nf
index 2afa198..02836b5 100644
--- a/modules/local/module_dna.nf
+++ b/modules/local/module_dna.nf
@@ -26,7 +26,6 @@ process SAMTOOLS_VIEW {
 	tag "$sample"
 	
 	label 'samtools'
-	label 'alignment'
 	
 	input:
 		tuple val(sample), path(sam)
diff --git a/nextflow.config b/nextflow.config
index 1b1145c..ea9a9d0 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -37,7 +37,7 @@ params {
 	reference_genome = ""
 	make_star_index = false
 	reference_transcriptome = ""
-	sortmerna_db_path = '/usr/local/bioinfo/src/SortMeRNA/sortmerna-2.1b/rRNA_databases'
+	sortmerna_db_path = '/work/project/PlaGe/sortemerna_db'
 
 	// Amplicon / 16S params
 	min_overlap = 20
@@ -64,7 +64,7 @@ params {
 	shared_modules = '/home/sbsuser/save/scripts-ngs/shared_modules_Current'
 
 	// SLURM MAX RESSOURCES
-	max_memory = "250.GB"
+	max_memory = "500.GB"
 	max_time = "90.d"
 	max_cpus = "48"
 
@@ -72,7 +72,7 @@ params {
 	cluster_options = ''
 	is_dev_mode = false
 	DTM_mode = false
-	host = 'genologin'
+	host = 'genobioinfo'
 	email=""
 	email_dev="jules.sabban@inrae.fr"
 	email_on_fail="jules.sabban@inrae.fr"
diff --git a/sub-workflows/local/rna_qc.nf b/sub-workflows/local/rna_qc.nf
index 2c32b0b..b04ac82 100644
--- a/sub-workflows/local/rna_qc.nf
+++ b/sub-workflows/local/rna_qc.nf
@@ -89,8 +89,8 @@ workflow RNA_QC {
 		} else {
 			// If Qualimap and Samtools were not executed
 			System.out.println "Pas de référence genomique ou transcriptomique renseignée, on ne peut pas faire d'alignement"
-			//qualimap_report_emitted =  Channel.empty()
-			//flagstats_output_emitted = Channel.empty()
+			qualimap_report_emitted =  Channel.empty()
+			flagstats_output_emitted = Channel.empty()
 		}
 
 		SORTMERNA(fastq, sortmerna_db.collect())
-- 
GitLab