Newer
Older

Helene Rimbert
committed
##### QUERY related files/parameters (refseqv2.1)

Helene Rimbert
committed
# GFF annotatin to transfert

Helene Rimbert
committed
annotationQuery: 'data/IWGSC_refseqv2.1/IWGSC_refseqv2.1_annotation.gff3'

Helene Rimbert
committed
# feature type used for anchoring on target genome

Helene Rimbert
committed
# FASTA of the query (used to check the sequences after the coordinates are calculated on the target genome)

Helene Rimbert
committed
queryFasta: 'data/IWGSC_refseqv2.1/IWGSC_refseqv2.1_genome.fasta'

Helene Rimbert
committed
# blastdb of all mrnas. used to rescue genes which have failed in the transfert using the targeted approache

Helene Rimbert
committed
blastdb: 'data/IWGSC_refseqv2.1/IWGSC_refseqv2.1_mrna'
# map of all chromosome ids --> NEED TO BE UPDATED in another version WITH ONE ARRAY FOR THE QUERY AND ONE ARRAY FOR THE TARGET GENOME ASSEMBLY

Helene Rimbert
committed
chromosomes: ['1A', '2A', '3A', '4A', '5A', '6A', '7A', '1B', '2B', '3B', '4B', '5B', '6B', '7B', '1D', '2D', '3D', '4D', '5D', '6D', '7D', 'U']
refChrom: ['chr1A', 'chr1B', 'chr1D', 'chr2A', 'chr2B', 'chr2D', 'chr3A', 'chr3B', 'chr3D', 'chr4A', 'chr4B', 'chr4D', 'chr5A', 'chr5B', 'chr5D', 'chr6A', 'chr6B', 'chr6D', 'chr7A', 'chr7B', 'chr7D', 'chrUn']

Helene Rimbert
committed

Helene Rimbert
committed
##### TARGET related files/parameters (julius)
targetFasta: 'data/TraesJulius_pseudo.fasta'

Helene Rimbert
committed
##### ISBP/markers related config and parameters

Helene Rimbert
committed
# BAM file of markers/ISBPs mapped on the target genome (Julius)
isbpBam: 'data/TraesJulius_pseudo.isbps.bam'
# BED file of coordinates on the query genome (REFSEQ v2.1)
isbpBed: 'data/IWGSC_refseqv2.1/IWGSC_refseqv2.1_ISBPs.bed'

Helene Rimbert
committed
# minimum mapping quality of markers on the target genome
mapq: 30
# max mismatches per ISBP/marker

Helene Rimbert
committed
mismatches: 2

Helene Rimbert
committed
##### OUTPUT directory

Helene Rimbert
committed
results: 'results'
finalPrefix: 'TaeJulius_magatt_20november'
# this file contains two columns: the first is the chromosome name as it appears in the genome.fasta of the new reference,
# and the second the chromosome name as it will appear in the new gene Names
chromMapID: 'data/chromosomeMappingID.csv'
##### Nomenclature for final gene IDs
# used in rule renameGeneIds (rules/geneAnchoring.smk)

Helene Rimbert
committed
gff_prefix: 'TraesJU'
gff_version: '01G'
gff_source: 'MAGATT-IWGSCCSv2.1'