diff --git a/assets/fastq_screen.conf_example b/assets/fastq_screen.conf_example index 78180aedd6af036d755ac20046b302818472966e..dbbb7bcb7c49f4053af9020f13f4d0f4feb80ec9 100644 --- a/assets/fastq_screen.conf_example +++ b/assets/fastq_screen.conf_example @@ -10,7 +10,7 @@ #BOWTIE /usr/local/bin/bowtie/bowtie #BOWTIE2 /usr/local/bioinfo/src/bowtie/bowtie2-2.4.4-linux-x86_64/bowtie2 -BWA /usr/local/bioinfo/src/bwa/bwa-0.7.15/bwa +BWA /usr/local/bioinfo/src/bwa/bwa-0.7.17/bwa ############################################ ## Bismark (for bisulfite sequencing only) # diff --git a/bin/DTM/make_bedgraph.sh b/bin/DTM/make_bedgraph.sh index 1eab8918d443762ca0ffb71ce6a68d5b2e160ed8..099a9ad322eb502b80247aae5aeeb8c0c5a76445 100644 --- a/bin/DTM/make_bedgraph.sh +++ b/bin/DTM/make_bedgraph.sh @@ -21,10 +21,8 @@ I_NAMES=$2 # path to chrom_names file R_PATTERN=$3 # chr pattern to remove from bedgraph file #### MODULES #### -module load bioinfo/samtools-1.16.1 -module load bioinfo/bedtools-2.27.1 - - +module load bioinfo/samtools/1.18 +module load bioinfo/bedtools/2.27.1 replace_chr_names() { # replace chr names @@ -82,7 +80,7 @@ remove_unwanted_scaffold(){ main() { - BAM=$(find $I_DIR -type f -name '*R1*unmerged.bam' -execdir basename '{}' ';'|sed -n ${SLURM_ARRAY_TASK_ID}p) + BAM=$(find $I_DIR -type f -name '*unmerged.bam' -execdir basename '{}' ';'|sed -n ${SLURM_ARRAY_TASK_ID}p) echo -e "Traitement de ${BAM}" BAM_PATH="${I_DIR}/${BAM}" @@ -97,4 +95,4 @@ main() { remove_unwanted_scaffold } -main \ No newline at end of file +main diff --git a/bin/parse_reports.sh b/bin/parse_reports.sh index 03d309de371f6980accac9e5b61572a116b936a0..24042e7079964afd9f849f1b3d05745ff0902b3c 100755 --- a/bin/parse_reports.sh +++ b/bin/parse_reports.sh @@ -17,7 +17,7 @@ MEAN_COV=$(sed -n 's/mean coverageData.*= \(.*X\)/\1/p' $QUALIMAP_REPORT | sed ' ALIGN=$(sed -n 's/number of mapped reads =.*(\(.*%\))/\1/p' $QUALIMAP_REPORT | sed 's/ //g') AT_DROPOUT=$(grep '^ACCUMULATION_LEVEL' -A 1 $GCBIAS_REPORT | cut -d$'\t' -f6 | tail -1) -GC_DROPOUT=$(grep '^ACCUMULATION_LEVEL' -A 1 $GCBIAS_REPORT | cut -d$'\t' -f6 | tail -1) +GC_DROPOUT=$(grep '^ACCUMULATION_LEVEL' -A 1 $GCBIAS_REPORT | cut -d$'\t' -f7 | tail -1) ## Write stat file echo "duplication_rate: $DUPLI" >> $O_STAT diff --git a/conf/base.config b/conf/base.config index 0060fd2aebdffc2d7c90fcbcf6e8445fa3616b19..e62053e612fca82da9fad3df50e811bb8be279dd 100644 --- a/conf/base.config +++ b/conf/base.config @@ -105,6 +105,7 @@ process { module = toolsModuleHash['FASTQC'] maxRetries = 4 + cpus = { checkMax( 2 * task.attempt, 'cpus' ) } time = { checkMax( 5.h * task.attempt * params.resource_factor, 'time' ) } } @@ -338,7 +339,7 @@ process { withName: SORTMERNA { module = toolsModuleHash['SORTMERNA'] - memory = { checkMax( 2.GB * task.attempt, 'memory' ) } + memory = { checkMax( 10.GB * task.attempt * params.resource_factor, 'memory' ) } time = { checkMax( 10.h * task.attempt, 'time' ) } cpus = { checkMax( 1 * task.attempt, 'cpus' ) } @@ -361,7 +362,7 @@ process { withName: QUALIMAP { module = toolsModuleHash['QUALIMAP'] cpus = { checkMax( 8 * task.attempt, 'cpus' ) } - memory = { checkMax( 8.GB * task.attempt, 'memory' ) } + memory = { checkMax( 30.GB * task.attempt * params.resource_factor, 'memory' ) } time = { checkMax( 3.h * task.attempt, 'time' ) } publishDir = [ diff --git a/conf/dependencies_genobioinfo.config b/conf/dependencies_genobioinfo.config index de6c332aef3e2b19a2e61c7f2acfe861b754cf28..b715b7a99798c50ea7d6d49776c65603a14b6ffd 100644 --- a/conf/dependencies_genobioinfo.config +++ b/conf/dependencies_genobioinfo.config @@ -7,7 +7,7 @@ toolsModuleHash['FASTP'] = ['bioinfo/fastp/0.23.2'] toolsModuleHash['FASTQC'] = ['bioinfo/FastQC/0.12.1'] // version upgraded face to genologin toolsModuleHash['FASTQSCREEN'] = ['bioinfo/FastQScreen/0.15.3'] toolsModuleHash['R'] = ['statistics/R/4.3.0'] -toolsModuleHash['PICARD'] = ['devel/java/17.0.6', 'bioinfo/picard-tools/3.0.0'] // a vérifier pour R miniconda!! +toolsModuleHash['PICARD'] = ['statistics/R/4.3.0', 'devel/java/17.0.6', 'bioinfo/picard-tools/3.0.0'] // a vérifier pour R miniconda!! // ----- RNA ----- // toolsModuleHash['SALMON'] = ['bioinfo/Salmon/1.10.0'] // version upgraded face to genologin diff --git a/conf/report.config b/conf/report.config index 520c00b48c85a8f167893b1a3ced4251eb5036a6..c9f0e97a82711a599e74e370de57db8aa2c811a8 100644 --- a/conf/report.config +++ b/conf/report.config @@ -29,5 +29,5 @@ manifest { description = "Workflow for Illumina data quality control" mainScript = 'main.nf' nextflowVersion = '>=0.32.0' - version = '1.10.0' + version = '1.15.0' } \ No newline at end of file diff --git a/conf/test.config b/conf/test.config index 39d54d62bcb2453f425e2480d3a0a791c9c53690..b8095b6487b0aa61cb6c8fb971619f8801c52e6f 100644 --- a/conf/test.config +++ b/conf/test.config @@ -4,7 +4,7 @@ System.out.println "Profil dev => on ajuste les paramètres..." params { ngl_bi_client = '/home/sbsuser/work/test/jules/VisualStudioSources/ngl-bi_client/' - shared_modules = '/home/sbsuser/work/Nextflow/shared_modules/ExportSources_Jules/' + shared_modules = '/save/user/sbsuser/scripts-ngs/shared_modules_Current/' is_dev_mode = true } diff --git a/modules/local/module_rna.nf b/modules/local/module_rna.nf index 2cf07ecff83b6a52dd24d57121d859001463907c..2563adfe9f7f58dc704ac1ea8613a1d3fc83f811 100644 --- a/modules/local/module_rna.nf +++ b/modules/local/module_rna.nf @@ -91,6 +91,7 @@ process STAR_ALIGN { output: tuple val(sample), path("${sample}_Log.final.out"), emit: results tuple val(sample), path("${sample}_Log.out"), emit: log + tuple val(sample), path("${sample}_Aligned.out.sam"), emit: sam script: def args = task.ext.args ?: '' diff --git a/nextflow.config b/nextflow.config index 4627edbf8205e964a7f2a7c635c5eaa868a9fc5b..a823a406f1e3567a265a33aeffd1c47af7364d86 100644 --- a/nextflow.config +++ b/nextflow.config @@ -38,12 +38,6 @@ params { make_star_index = false reference_transcriptome = "" sortmerna_db_path = '/usr/local/bioinfo/src/SortMeRNA/sortmerna-2.1b/rRNA_databases' - sortmerna_bac_16s = sortmerna_db_path + '/silva-bac-16s-id90.fasta' - sortmerna_bac_23s = sortmerna_db_path + '/silva-bac-23s-id98.fasta' - sortmerna_arc_16s = sortmerna_db_path + '/silva-arc-16s-id95.fasta' - sortmerna_arc_23s = sortmerna_db_path + '/silva-arc-23s-id98.fasta' - sortmerna_euk_18s = sortmerna_db_path + '/silva-euk-18s-id95.fasta' - sortmerna_euk_28s = sortmerna_db_path + '/silva-euk-28s-id98.fasta' // Amplicon / 16S params min_overlap = 20 @@ -114,7 +108,7 @@ params { samplesheet = inputdir.toString() + "/SampleSheet.csv" nf_uniqueness = uniqueness_format.format(new Date()) outdir_prefix = outdir_prefix ?: project + "_" + run_name - outdir = inputdir + "/nextflow/" + outdir_prefix + "_" + nf_uniqueness + outdir = inputdir + "/nextflow/" + run_name + "/" + outdir_prefix + "_" + nf_uniqueness subset_seq = miseq_subset_seq if ( sequencer =~ /NovaSeq.*/ ) { diff --git a/sub-workflows/local/core_pipeline.nf b/sub-workflows/local/core_pipeline.nf index 73c5fd873fcd737108ff37ced9eb34b08033f10f..eb171419713f8799c17a9d70a460f443c4b3d9a4 100644 --- a/sub-workflows/local/core_pipeline.nf +++ b/sub-workflows/local/core_pipeline.nf @@ -72,7 +72,7 @@ workflow CORE { DUPLICATED_READS(unzipped_fastq .collect{it[1]} .flatten() - .map { $it -> [ ($it.simpleName =~ /(.*)_R[1-2]_.*/)[0][1] , $it ] } + .map { $it -> [ ($it.simpleName =~ /(.*)_R[1-2].*/)[0][1] , $it ] } .groupTuple() ) // need fastq paired !!! diff --git a/sub-workflows/local/rna_qc.nf b/sub-workflows/local/rna_qc.nf index c7afbaa2173856ff7f1cc0a8a50fb5b56d34cbba..2c32b0b42f75cb7e71dce35148747b3e348a72b5 100644 --- a/sub-workflows/local/rna_qc.nf +++ b/sub-workflows/local/rna_qc.nf @@ -11,7 +11,7 @@ * QC des données ARN : * - Alignement contre génome de référence avec STAR * - Pseudo alignement contre transcriptome de référence avec SALMON - * - Rapport d'alignement avec Qualimap ?? + * - Rapport d'alignement avec Qualimap */ // ------------------------------------------------- @@ -22,7 +22,11 @@ include { STAR_INDEX; SALMON_INDEX; SALMON_QUANT; } from "$baseDir/modules/local/module_rna.nf" - +include { SAMTOOLS_VIEW; + SAMTOOLS_SORT; + SAMTOOLS_FLAGSTATS; +} from "$baseDir/modules/local/module_dna.nf" +include { QUALIMAP } from "${params.shared_modules}/qualimap.nf" include { SAMTOOLS_FAIDX } from "${params.shared_modules}/samtools.nf" include { SORTMERNA } from "${params.shared_modules}/sortmerna.nf" // ------------------------------------------------- @@ -34,7 +38,7 @@ workflow RNA_QC { sortmerna_db main: - fastq = fastq.collect{it[1]}.flatten().map { $it -> [ ($it.simpleName =~ /(.*)_R[1-2]_.*/)[0][1] , $it ] }.groupTuple() + fastq = fastq.collect{it[1]}.flatten().map { $it -> [ ($it.simpleName =~ /(.*)_R[1-2].*/)[0][1] , $it ] }.groupTuple() align_results = Channel.empty() if ( "$params.reference_genome" != '' ) { @@ -47,7 +51,12 @@ workflow RNA_QC { } else { star_index = Channel.from(file(params.reference_genome).getParent()) } - align_results = STAR_ALIGN(fastq, star_index).results // R1 et R2 en même temps + STAR_ALIGN(fastq, star_index).results // R1 et R2 en même temps + align_results = STAR_ALIGN.out.results + SAMTOOLS_VIEW(STAR_ALIGN.out.sam) + SAMTOOLS_SORT(SAMTOOLS_VIEW.out.bam) + SAMTOOLS_FLAGSTATS(SAMTOOLS_VIEW.out.bam) + qualimap_report_emitted = QUALIMAP(SAMTOOLS_SORT.out.bam).report } else if ("$params.reference_transcriptome" != '') { // if indexFiles does not exist @@ -75,6 +84,7 @@ workflow RNA_QC { salmon_index, ch_lib_type ).results + qualimap_report_emitted= Channel.empty() } else { // If Qualimap and Samtools were not executed @@ -88,5 +98,6 @@ workflow RNA_QC { emit: align_results = align_results sortmerna_log = SORTMERNA.out.log + qualimap_report = qualimap_report_emitted //flagstats_output = flagstats_output_emitted } \ No newline at end of file diff --git a/workflow/illumina_qc.nf b/workflow/illumina_qc.nf index 9b17503f6ef00485e758588b9dbe1d8fb54a9a53..a3525002faf027541b3876d36b38abb0db3564a5 100644 --- a/workflow/illumina_qc.nf +++ b/workflow/illumina_qc.nf @@ -44,12 +44,12 @@ ch_read=Channel // Channel of rRNA databases for sortmerna ch_sortmerna_db = Channel.from( - params.sortmerna_euk_18s, - params.sortmerna_euk_28s, - params.sortmerna_bac_16s, - params.sortmerna_bac_23s, - params.sortmerna_arc_16s, - params.sortmerna_arc_23s + params.sortmerna_db_path + '/silva-bac-16s-id90.fasta', + params.sortmerna_db_path + '/silva-bac-23s-id98.fasta', + params.sortmerna_db_path + '/silva-arc-16s-id95.fasta', + params.sortmerna_db_path + '/silva-arc-23s-id98.fasta', + params.sortmerna_db_path + '/silva-euk-18s-id95.fasta', + params.sortmerna_db_path + '/silva-euk-28s-id98.fasta', ) mismatchNumber = params.sequencer == 'MiSeq'? 0 : 1 @@ -130,7 +130,8 @@ workflow ILLUMINA_QC { RNA_QC(CORE.out.subset_fastq, ch_sortmerna_db) ch_mqc = ch_mqc.mix( RNA_QC.out.align_results.collect{it[1]}.ifEmpty([]), - RNA_QC.out.sortmerna_log.collect{it[1]}.ifEmpty([]) + RNA_QC.out.sortmerna_log.collect{it[1]}.ifEmpty([]), + RNA_QC.out.qualimap_report.collect{it[1]}.ifEmpty([]), ) } else if (params.data_nature =~ "16S|Amplicon") { @@ -188,7 +189,7 @@ workflow.onComplete { end_mail_sent = sendFinalMail(format.format(new Date()), params.summary) // remove work directory if pipeline is successful - if (workflow.success && (!params.is_dev_mode || !params.DTM_mode)) { + if (workflow.success && !( params.is_dev_mode || params.DTM_mode)) { println "Pipeline terminé avec succès => suppression du workdir : $workflow.workDir" exec: workflow.workDir.deleteDir()