From 651c869f84cc28d345e422377ed741d007578d05 Mon Sep 17 00:00:00 2001 From: jsabban <jules.sabban@inrae.fr> Date: Thu, 13 Feb 2025 10:11:05 +0100 Subject: [PATCH 1/6] Multiqc fastqscreen section has tips in blockquote Ref: #155 --- assets/multiqc_config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/multiqc_config.yaml b/assets/multiqc_config.yaml index 9b5f787..cd1e609 100644 --- a/assets/multiqc_config.yaml +++ b/assets/multiqc_config.yaml @@ -61,6 +61,7 @@ remove_sections: section_comments: fastqc_sequence_counts: "Tips : Use this graph to visualize the amount of each samples. Shouldn't be use to determine the proportion of duplicated reads (see 'General Statistics')." sortmerna: "Total rRNA percentage is available in the 'General Statistics'. Non-rRNA sequences are NOT USED for this graph." + fastq_screen: "This analysis is performed by permissive alignments using BWA, which display a certain amount of false positive hits. Thus, don't worry if a low percentage of contamination is present.<br><i>NB: This analysis has limited interest on transcriptomic reads.</i>" module_order: - fastqc: @@ -76,7 +77,6 @@ module_order: - fastq_screen: name: "ContaminationSearch" #info: "This section shows the module with different files" - info: "This analysis is performed by permissive alignments using BWA, which display a certain amount of false positive hits. Thus, don't worry if a low percentage of contamination is present. This analysis has limited interest on transcriptomic reads.<br>" target: "FastQ-Screen" - sortmerna: name: "ContaminationSearch - rRNA" -- GitLab From 7b0c9b10465b987cb72db07aa5a094e834c08f1b Mon Sep 17 00:00:00 2001 From: jsabban <jules.sabban@inrae.fr> Date: Thu, 13 Feb 2025 11:57:06 +0100 Subject: [PATCH 2/6] typo in mqc conf file Ref: #155 --- assets/multiqc_config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/assets/multiqc_config.yaml b/assets/multiqc_config.yaml index cd1e609..e5bde6f 100644 --- a/assets/multiqc_config.yaml +++ b/assets/multiqc_config.yaml @@ -6,7 +6,7 @@ intro_text: "This MultiQC report summarise Quality Control analysis results." report_comment: > You can find detailed guidance on how to interpret the following graphs in the <a href="https://bios4biol.pages.mia.inra.fr/Help4MultiQC/" target="_blank"><strong>Help4MultiQC GitBook</strong></a>. - Developed by some members of the CATI Bios4Biol team at INRAE, this resource provides valuable insights into data interpretation. While it was not specifically created for this report, it offers complementary information that may help deepen your understanding of the presented data. + Developed by some members of the CATI Bios4Biol team at INRAE, this resource provides valuable insights into data interpretation. While it was not specifically created for this report, it offers complementary information that may enhance your understanding of the presented data. show_analysis_paths: False show_analysis_time: False @@ -59,7 +59,7 @@ remove_sections: - flash-histogram section_comments: - fastqc_sequence_counts: "Tips : Use this graph to visualize the amount of each samples. Shouldn't be use to determine the proportion of duplicated reads (see 'General Statistics')." + fastqc_sequence_counts: "Tip : Use this graph to visualize the amount of each samples. Shouldn't be used to determine the proportion of duplicated reads (see 'General Statistics')." sortmerna: "Total rRNA percentage is available in the 'General Statistics'. Non-rRNA sequences are NOT USED for this graph." fastq_screen: "This analysis is performed by permissive alignments using BWA, which display a certain amount of false positive hits. Thus, don't worry if a low percentage of contamination is present.<br><i>NB: This analysis has limited interest on transcriptomic reads.</i>" -- GitLab From 27bbe0cefabe18f2a170bb728d79dc822dc71de6 Mon Sep 17 00:00:00 2001 From: jsabban <jules.sabban@inrae.fr> Date: Thu, 20 Feb 2025 16:27:55 +0100 Subject: [PATCH 3/6] AVITI fastq header is anonymized Machine's run name was removed Ref: #156 --- conf/base.config | 6 ++++++ modules/local/module_core_element.nf | 28 ++++++++++++++++++++++++++++ sub-workflows/local/ngl.nf | 10 +++++++++- 3 files changed, 43 insertions(+), 1 deletion(-) diff --git a/conf/base.config b/conf/base.config index 6f435ae..e4411db 100644 --- a/conf/base.config +++ b/conf/base.config @@ -232,6 +232,12 @@ process { ] } + withNAme: FQ_HEADER_RENAME { + time = { checkMax( 2.h * task.attempt, 'time' ) } + memory = { checkMax( 5.GB * task.attempt * params.resource_factor, 'memory' ) } + cpus = 8 + } + // ----- WithLabel withLabel: littleJob { executor = 'local' diff --git a/modules/local/module_core_element.nf b/modules/local/module_core_element.nf index a8ff5da..b7457d1 100644 --- a/modules/local/module_core_element.nf +++ b/modules/local/module_core_element.nf @@ -26,3 +26,31 @@ process DEMUX_STATS { $threshold """ } + +process FQ_HEADER_RENAME { + tag "$meta.name" + + input: + tuple val(meta), path(fastx) + + output: + tuple val(meta), path("${fileName}.${extension}"), emit: file + + script: + def args = task.ext.args ?: '' + extension = "fastq" + if ("$fastx" ==~ /.+\.fasta|.+\.fasta.gz|.+\.fa|.+\.fa.gz|.+\.fas|.+\.fas.gz|.+\.fna|.+\.fna.gz/) { + extension = "fasta" + } + fileName = fastx.toString() - ".${extension}" - '.gz' // remove also .gz if exists + if (fastx.toString().endsWith('.gz')) { + """ + pigz $args -dc -p ${task.cpus} ${fastx} | \ + awk -F':' 'BEGIN{OFS=":"} {if (\$0 ~ /^@/) {\$2=""; sub("::", ":", \$0)} print}' - > ${fileName}.${extension} + """ + } else { + """ + awk -F':' 'BEGIN{OFS=":"} {if (\$0 ~ /^@/) {\$2=""; sub("::", ":", \$0)} print}' ${fastx} > ${fileName}.${extension} + """ + } +} \ No newline at end of file diff --git a/sub-workflows/local/ngl.nf b/sub-workflows/local/ngl.nf index f05c720..2bff87e 100644 --- a/sub-workflows/local/ngl.nf +++ b/sub-workflows/local/ngl.nf @@ -13,11 +13,13 @@ include { BEGIN_NGLBI as BEGIN } from "${params.shared_modules}/workflows/begin_nglbi.nf" include { COPY_TO_SAVE } from "${params.shared_modules}/workflows/copy_to_save.nf" include { UPDATE_NGLBI_STATE_FROM_FILE as UPDATE_STATE_FQC} from "${params.shared_modules}/ngl_bi.nf" +include { GZIP } from "${params.shared_modules}/gzip.nf" +include { FQ_HEADER_RENAME } from "$baseDir/modules/local/module_core_element.nf" include { FILE_RENAME as RENAME_FASTQ } from "$baseDir/modules/local/module_NGL-Bi.nf" include { FILE_RENAME as RENAME_INDEX } from "$baseDir/modules/local/module_NGL-Bi.nf" include { NGSRG_ILLUMINA } from "$baseDir/modules/local/module_NGL-Bi.nf" -include { NGSRG_ELEMBIO; } from "$baseDir/modules/local/module_NGL-Bi.nf" +include { NGSRG_ELEMBIO } from "$baseDir/modules/local/module_NGL-Bi.nf" // ------------------------------------------------- // WORKFLOW // ------------------------------------------------- @@ -66,6 +68,12 @@ workflow NGL { RENAME_FASTQ(fastq.map{it[1]}.collect(), readsets_created, sq_xp_code, 'fastq_read') fastq_files = fastq_files.mix(RENAME_FASTQ.out.fastq.ifEmpty([])) + if (sequencer_name =~ "AVITI") { + FQ_HEADER_RENAME(fastq_files.flatten().map{it -> [[name: it.simpleName], it]}) + GZIP(FQ_HEADER_RENAME.out.file) + fastq_files = GZIP.out.archive.map{it -> it[1]} // remapper ? // [[meta], files] + } + fq = fastq_files .flatMap() .map { it -> [[type: 'fastq', barcode:'all'], it]} -- GitLab From 8de006d34844aa801cdfa3aaf8e58ffbda5e9b57 Mon Sep 17 00:00:00 2001 From: jsabban <jules.sabban@inrae.fr> Date: Thu, 20 Feb 2025 16:30:27 +0100 Subject: [PATCH 4/6] Remove comment in code ref: #156 --- sub-workflows/local/ngl.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sub-workflows/local/ngl.nf b/sub-workflows/local/ngl.nf index 2bff87e..6c6fd48 100644 --- a/sub-workflows/local/ngl.nf +++ b/sub-workflows/local/ngl.nf @@ -71,7 +71,7 @@ workflow NGL { if (sequencer_name =~ "AVITI") { FQ_HEADER_RENAME(fastq_files.flatten().map{it -> [[name: it.simpleName], it]}) GZIP(FQ_HEADER_RENAME.out.file) - fastq_files = GZIP.out.archive.map{it -> it[1]} // remapper ? // [[meta], files] + fastq_files = GZIP.out.archive.map{it -> it[1]} } fq = fastq_files -- GitLab From ba6d615d2e0605ec5e57ef7bf630eb37679ca98c Mon Sep 17 00:00:00 2001 From: jsabban <jules.sabban@inrae.fr> Date: Thu, 20 Feb 2025 16:33:31 +0100 Subject: [PATCH 5/6] E.coli reference for conta has now generic name Nammed as Bacterium to be in line with Yeast --- assets/fastq_screen.conf_example | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/fastq_screen.conf_example b/assets/fastq_screen.conf_example index 084bedc..97dff58 100644 --- a/assets/fastq_screen.conf_example +++ b/assets/fastq_screen.conf_example @@ -55,7 +55,7 @@ THREADS 8 ## have contaminated your sample during the library preparation step. ## Genome of E. coli -DATABASE E.coli /home/sbsuser/plage/references/indexed/ng6_conta_ref/Escherichia_coli/FRIK2069/genome/BWA/Escherichia_coli_FRIK2069 +DATABASE Bacterium /home/sbsuser/plage/references/indexed/ng6_conta_ref/Escherichia_coli/FRIK2069/genome/BWA/Escherichia_coli_FRIK2069 Sequence of PhiX DATABASE PhiX /home/sbsuser/plage/references/indexed/ng6_conta_ref/PhiX/BWA/phi.fa -- GitLab From 1d00aaf4924d8bcba0627e9260e87db6e397c369 Mon Sep 17 00:00:00 2001 From: jsabban <jules.sabban@inrae.fr> Date: Thu, 20 Feb 2025 16:44:48 +0100 Subject: [PATCH 6/6] Update manifest version --- conf/report.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/report.config b/conf/report.config index 7cd6657..f97ea87 100644 --- a/conf/report.config +++ b/conf/report.config @@ -29,5 +29,5 @@ manifest { description = "Workflow for Illumina data quality control" mainScript = 'main.nf' nextflowVersion = '>=0.32.0' - version = '1.27.0' + version = '1.27.6' } \ No newline at end of file -- GitLab