diff --git a/.gitignore b/.gitignore
index 91c1711040a5616a1e7f8c3081c8ad9bcefc9251..f7f2ef469c92ae120744e09ed58b10c34579b3a1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -48,9 +48,6 @@
 #!*.txt
 !*.yml
 !*.yaml
-!Snakefile
-!*.smk
-!slurm_logs/
 
 # 3) add a pattern to track the file patterns of section2 even if they are in
     # subdirectories
diff --git a/bed2vcf.py b/bed2vcf.py
index 3f76d2c0630a1b2a292bafbd4a6309d9be77dd69..13d59ad5cff4c0ca0c5a0a9a241425fdabcc94a2 100644
--- a/bed2vcf.py
+++ b/bed2vcf.py
@@ -35,24 +35,14 @@ def set_ref_alt(ref, alt, row, vcf_df):
 	vcf_df.at[row, "ALT"] = alt
 
 def get_random_len(svtype):
-	# INV
-	if svtype == "INV":
-		rng = np.random.default_rng()
-		r = rng.random()
-		if r > 0.17:
-			s = np.random.uniform(250, 6000,1).round()
-		else:
-			s = np.random.uniform(6000, 45000,1).round()
-	# INS, DEL, DUP, CNV
-	else:
-		df = pd.read_csv("sv_distributions/size_distrib" + svtype + ".tsv", sep="\t")
-		pb = df["pb"].tolist()
-		li = np.random.multinomial(1, pb)
-		for i in range(len(li)):	
-			if li[i] != 0:
-				interval = df["size_interval"].iloc[i]
-				interval = json.loads(interval)
-				s = np.random.uniform(interval[0], interval[1], 1).round()
+	# INS, DEL, DUP, CNV, INV
+	df = pd.read_csv("sv_distributions/size_distrib" + svtype + ".tsv", sep="\t")
+	pb = df["pb"].tolist()
+	li = np.random.multinomial(1, pb)
+	i = np.argmax(li)
+	interval = df["size_interval"].iloc[i]
+	interval = json.loads(interval)
+	s = np.random.uniform(interval[0], interval[1], 1).round()
 	return(int(s[0]))
 
 # get the sequence of each variants in BED
@@ -75,7 +65,7 @@ def get_seq(vcf_df, bed_df, fa_dict, output_file):
 		start = start-1 # pour ajuster à l'index python
 		sv_type = sv_info[3]
 
-		fasta_seq = fa_dict[chr_name]
+		fasta_seq = fa_dict[chr_name].upper()
 
 		if sv_type == "deletion":
 			end = start + get_random_len("DEL")
@@ -121,7 +111,7 @@ def get_seq(vcf_df, bed_df, fa_dict, output_file):
 			# translocation réciproque
 			if sv_type == "reciprocal translocation":
 				ref = str(fasta_seq.seq[start:end])
-				fasta_seq_trans = fa_dict[trans_chr]
+				fasta_seq_trans = fa_dict[trans_chr].upper()
 				ref2 = str(fasta_seq_trans.seq[trans_start:trans_end])
 				
 				if infos[3] == "reverse":
@@ -166,15 +156,8 @@ def get_seq(vcf_df, bed_df, fa_dict, output_file):
 
 	# remove unnecessary columns for VCF
 	vcf_df["ID"] = "."
-
-	# adjust variant start position to include reference
-	# vcf_df["POS"] = vcf_df["POS"] - 1 ### Attention aux translocation avec le nouveau start ?
-
+	vcf_df["QUAL"] = 99
 	# output VCF
 	if not os.path.exists("results"):
 		os.mkdir("results")
-	vcf_df.to_csv("results/" + output_file, sep="\t", header=False, index=False)
-
-
-
-# vcf_header="##source=VISOR BED to VCF\n#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tSAMPLE"
\ No newline at end of file
+	vcf_df.to_csv("results/" + output_file + ".vcf", sep="\t", header=False, index=False)
\ No newline at end of file
diff --git a/fusion.py b/fusion.py
index fbd2de9dffbe7540f1f1a2070f420668283ae420..ea0696c006791743574da20139f30d3ae4f0d7d4 100644
--- a/fusion.py
+++ b/fusion.py
@@ -1,13 +1,21 @@
 # fusionner le VCF de msprime (position, génotype) et le BED de VISOR
 # pour obtenir un BED avec tous les variants
 # récupérer les variants et les génotypes msprime
-import io
+import io, os, shutil
 import pandas as pd
 
 def read_vcf(input_file):
 	with open(input_file, "r") as txt:
 		t = txt.read()
 	txt.close()
+
+	# write header for later
+	if not os.path.exists("results"):
+		os.mkdir("results")
+	f = open("results/vcf_header.txt", "w")
+	f.write(''.join(t.splitlines(keepends=True)[:6]))
+	f.close()
+
 	# remove VCF header for dataframe
 	t = ''.join(t.splitlines(keepends=True)[5:])
 	df = pd.read_table(io.StringIO(t))
@@ -25,6 +33,10 @@ def replace_bed_col(input_BED, input_VCF):
 	if len(bed) == len(vcf):
 		bed["chr"] = vcf["CHROM"]
 		bed["start"] = vcf["POS"]
-		# piocher dans une distribution de taille de SV --> modéliser sur la distrib humain, etc
-		# bed["end"] = vcf["POS"] + len_SV
-	return(bed)
\ No newline at end of file
+	return(bed)
+
+def merge_final_vcf(header_file, content_file, merged_file):
+    with open(merged_file, 'wb') as outfile:
+        for filename in [header_file, content_file]:
+            with open(filename, 'rb') as infile:
+                shutil.copyfileobj(infile, outfile)
\ No newline at end of file
diff --git a/main.py b/main.py
index 0b47c0fea1021a91451eaf4a72a345b695ba2a37..82e271348e4121250f47ced98e3dad4f616c9573 100644
--- a/main.py
+++ b/main.py
@@ -15,11 +15,15 @@ def main(bed, vcf, fasta, output_file):
 bed = "test/mini_random.bed"
 vcf = "test/output.vcf"
 fasta = "/home/sukanya/tests/02_data/hackathon_Ztritici/CHR8/ztIPO323.chr8.fasta"
-# n = 200
 
-output_file="60_tritici.vcf"
+output_file="tritici_test"
 main(bed, vcf, fasta, output_file)
 
+header_file="results/vcf_header.txt"
+content_file = "results/" + output_file + ".vcf"
+merged_file="results/" + output_file + "_final.vcf"
+merge_final_vcf(header_file, content_file, merged_file)
+
 ### generate msprime population VCF
 # fai = "ztIPO323.chr8.fasta.fai"
 
diff --git a/random_bed.sh b/random_bed.sh
index bb9cf618a5ba77298e99e621db062c782ba61541..be169a3957a89d2e002e6de2f53b46465ff68c05 100644
--- a/random_bed.sh
+++ b/random_bed.sh
@@ -1,23 +1,6 @@
 ### create random BED
 
-# refFAI="ztIPO323.chr8.fasta.fai"
+refFAI="ztIPO323.chr8.fasta.fai"
 
-# cut -f1,2 $refFAI > chrom.dim.tsv
-# Rscript randomregion.r -d chrom.dim.tsv -n 2638 -v 'deletion,inversion,inverted tandem duplication,translocation copy-paste,reciprocal translocation' -r '40:20:20:10:10' | sortBed > HACk.random.bed
-
-
-### vg construct
-IMG="/home/sukanya/singularity/builddir/vg_v1.51.0.sif"
-REF="/home/sukanya/tests/02_data/hackathon_Ztritici/CHR8/ztIPO323.chr8.fasta"
-VCF="/home/sukanya/repos/00_to_forgemia/msprime_VISOR_fusionVCF/results/60_tritici.vcf"
-
-# $IMG vg construct -r $REF -v $VCF -S -f -m 250000000 -a >test/test.vg
-$IMG vg construct -r $REF -v $VCF -S -f -m 250000000 >test/test.vg
-
-# # $IMG vg convert
-# $IMG vg paths -F -v test/test.vg >test/test.fasta
-# $IMG vg paths -E -x test/test.vg
-
-$IMG vg convert -f test/test.vg > test/test.gfa
-
-# $IMG vg deconstruct -p chr_8 test/test.gfa > test/test.vcf
\ No newline at end of file
+cut -f1,2 $refFAI > chrom.dim.tsv
+Rscript randomregion.r -d chrom.dim.tsv -n 2638 -v 'deletion,inversion,inverted tandem duplication,translocation copy-paste,reciprocal translocation' -r '40:20:20:10:10' | sortBed > HACk.random.bed
\ No newline at end of file
diff --git a/sv_distributions/size_distribINV.tsv b/sv_distributions/size_distribINV.tsv
new file mode 100644
index 0000000000000000000000000000000000000000..4f5ccb9f230409a111603b55b16c8f53f8e46f8c
--- /dev/null
+++ b/sv_distributions/size_distribINV.tsv
@@ -0,0 +1,3 @@
+size_interval	pb
+[250.0, 6000.0]	0.83
+[6000.0, 50200.0]	0.17
\ No newline at end of file
diff --git a/visor_sv_type.yaml b/visor_sv_type.yaml
index 42a807560c0f26f52d743f597a5467876c3e876b..5aafc79c73da30d758fb338b1b5efd1b9373524e 100644
--- a/visor_sv_type.yaml
+++ b/visor_sv_type.yaml
@@ -14,8 +14,8 @@ translocation copy-paste: 0
 translocation cut-paste : 0
 reciprocal translocation: 0
 SNP: 0
-MNP: 0
-tandem repeat contraction: 0
-tandem repeat expansion: 0
-perfect tandem repetition: 0
-approximate tandem repetition: 0
\ No newline at end of file
+# MNP: 0
+# tandem repeat contraction: 0
+# tandem repeat expansion: 0
+# perfect tandem repetition: 0
+# approximate tandem repetition: 0
\ No newline at end of file