Configuration File

To download a configuration file template users just use --get_config parameter. Using a config file your code is lot more clean and concise.

# get config
nextflow run fmalmeida/mpgap --get_config

# run with config
nextflow run fmalmeida/mpgap -c [path-to-config]

Default configuration

/*
 * Configuration File to run fmalmeida/mpgap pipeline.
 */

params {

              /* 
               * Input parameter
               */


// Path to YAML samplesheet file.
// Please read the documentation https://mpgap.readthedocs.io/en/latest/samplesheet.html to know how to create a samplesheet file.
  input = null

// what is the type of organism of input? Bacteria / Eukaryote / Fungus
// This changes the parameters for gene detection and genome assessment for Quast and BUSCO.
  organism = 'bacteria'

              /*
               * Output parameters
               */


// Output folder name
  output   = "output"
  tracedir = "${params.output}/pipeline_info"


              /*
               * Resources parameters
               */

// Memory allocation for pilon polish.
// Values in Gb. Default 50G. 50G has been proved to be enough in most cases.
// This step is crucial because with not enough memory will crash and not correct your assembly.
  pilon_memory_limit  = 50
  pilon_polish_rounds = 4 // how many rounds should be performed?

              /*
               * General parameters
               *
               * These parameters will set the default for all samples.
               * However, they can also be set inside the YAML, if this happens 
               * the pipeline will use the value inside the YAML to overwrite 
               * the parameter for that specific sample.
               * 
               * Please read the documentation https://mpgap.readthedocs.io/en/latest/samplesheet.html to know more about the samplesheet file.
               */


// This parameter only needs to be set if the software chosen is Canu, wtdbg2 or Haslr. Is optional for Flye.
// It is an estimate of the size of the genome. Common suffices are allowed, for example, 3.7m or 2.8g
  genome_size = null

// Select the appropriate value to pass to wtdbg2 to assemble input.
// Options are: "ont" for Nanopore reads, "rs" for PacBio RSII, "sq" for PacBio Sequel, "ccs" for PacBio CCS reads.
// By default, if not given, the pipeline will use the value "ont" if nanopore reads are used and "sq" if pacbio reads are used
  wtdbg2_technology = null

// Select the appropriate shasta config to use for assembly
// Since shasta v0.8 (Oct/2021) this parameter is now mandatory.
// You can check availability at: https://paoloshasta.github.io/shasta/Configurations.html
  shasta_config = "Nanopore-Oct2021"

// Tells the pipeline to interpret the long reads as "corrected" long reads.
// This will activate (if available) the options for corrected or even high
// quality (hq) reads in the assemblers.
// Be cautious when using this parameter. If your reads are not corrected|hq, and
// you use this parameter, you will probably do not generate any contig.
  corrected_longreads    = false
  high_quality_longreads = false

// This parameter below (hybrid_strategy) is to select the hybrid strategies adopted by the pipeline.
// Read the documentation https://mpgap.readthedocs.io/en/latest/manual.html to know more about the hybrid strategies.
//
// Whenever using this parameter, it is also possible to polish the longreads-only assemblies with Nanopolish,
// Medaka or VarianCaller (Arrow) before the polishing with shortreads (using Pilon). For that it is necessary to set
// the right parameters: pacbio_bam and nanopolish_fast5 (files given only inside YAML) or medaka_model.
  hybrid_strategy = 1

// Default medaka model used for polishing nanopore long reads assemblies.
// Please read their manual https://github.com/nanoporetech/medaka to know more about the available models.
  medaka_model = "r941_min_high_g360"

// This parameter sets to nanopolish the max number of haplotypes to be considered.
// Sometimes the pipeline may crash because to much variation was found exceeding the limit
  nanopolish_max_haplotypes = 1000

// BUSCO dataset. The pipeline runs BUSCO after assemblies and the user can select on of the
// available BUSCO datasets listed in their website: https://busco.ezlab.org/busco_userguide.html#running-busco
//
// If blank, bacteria_odb10 will be used
//
// If unsure you can set the param to 'auto' which will tell BUSCO to automatically select the most
// appropriate one (it takes a little bit more of time and space).
  busco_lineage = null


            /*
             * Advanced parameters
             * 
             * Controlling the execution of assemblers and other tools.
             * It must be set as true to skip the software and false to use it.
             * Also adding the possibility to pass additional parameters to them
             * Additional parameters must be in quotes and separated by spaces.
             */


  quast_additional_parameters = null          // Give additional parameters to Quast while assessing assembly metrics.
                                              // Must be given as shown in Quast manual. E.g. " --large --eukaryote ".

  skip_raw_assemblies_polishing = false       // This will make the pipeline not polish raw assemblies on hybrid strategy 2.
                                              // For example, if a sample is assembled with flye and polished with medaka,
                                              // by default, both assemblies will be passed to pilon so you can compare them.
                                              // If you don't need this comparison and don't want to polish the raw assembly,
                                              // use this parameter.

  skip_spades = false                         // Hybrid and shortreads only assemblies
  spades_additional_parameters = null         // Must be given as shown in Spades manual. E.g. " --meta --plasmids "

  skip_shovill = false                        // Paired shortreads only assemblies
  shovill_additional_parameters = null        // Must be given as shown in Shovill manual. E.g. " --depth 15 "
                                              // The pipeline already executes shovill with spades, skesa and megahit, so please, do not use it with shovill's ``--assembler`` parameter.

  skip_unicycler = false                      // Hybrid and shortreads only assemblies
  unicycler_additional_parameters = null      // Must be given as shown in Unicycler manual. E.g. " --mode conservative --no_correct "

  skip_megahit = false                        // Shortreads only assemblies
  megahit_additional_parameters = null        // Must be given as shown in Megahit manual. E.g. " --presets meta-large "

  skip_haslr = false                          // Hybrid assemblies
  haslr_additional_parameters = null          // Must be given as shown in Haslr manual. E.g. " --cov-lr 30 "

  skip_canu = false                           // Longreads only assemblies
  canu_additional_parameters = null           // Must be given as shown in Canu manual. E.g. " correctedErrorRate=0.075 corOutCoverage=200 "

  skip_flye = false                           // Longreads only assemblies
  flye_additional_parameters = null           // Must be given as shown in Flye manual. E.g. " --meta --iterations 4 "

  skip_raven = false                          // Longreads only assemblies
  raven_additional_parameters = null          // Must be given as shown in Raven manual. E.g. " --polishing-rounds 4 "

  skip_wtdbg2 = false                         // Longreads only assemblies
  wtdbg2_additional_parameters = null         // Must be given as shown in wtdbg2 manual. E.g. " --tidy-reads 5000 "

  skip_shasta = false                         // Nanopore longreads only assemblies
  shasta_additional_parameters = null         // Must be given as shown in shasta manual. E.g. " --Reads.minReadLength 5000 "

  skip_hifiasm = false                        // Longreads only assemblies
  hifiasm_additional_parameters = null        // Must be given as shown in Hifiasm manual. E.g. " --ul ul.fq.gz "

  skip_pilon = false                          // Skip pilon polisher when performing hybrid assembly strategy 2
  skip_polypolish = false                     // Skip polypolisher polisher when performing hybrid assembly strategy 2

            /*
             * Resources controlling parameters
             * 
             * Here some parameters that allow the user to better tune the resources used by the pipeline.
             *
             * The start_asm_{mem,cpus} parameter tells the pipeline how much memory should the assembly
             * modules and quast request in the first try. This is essential for bigger genomes in order
             * to avoid having to fail the first try due lack of memory and then running again (automatically)
             * using all the max values allowed with the max_{mem,cpus} parameters.
             *
             * The max_memory and max_cpus parameters, tell the pipeline how much is the maximum number of
             * these items that is allowoed per job. The pipeline start by requesting less mem&cpus than 
             * what is defined by these params, and, in case the first try fails, it then maxes out the job
             * to use the maximum number you allowed.
             *
             * The max_time parameter defines how long a single job is allowed to run.
             */

  // starting values for the assembly jobs (and quast) to ask for in the very first try
  start_asm_mem              = 20.GB
  start_asm_cpus             = 6

  // maximum values to be used on automatic second try in case of lack of memory (all jobs)
  max_memory                 = 40.GB
  max_cpus                   = 10
  max_time                   = '40.h'

}