From fa39507bf3893ad19e81667199f0a2d3a8887a2f Mon Sep 17 00:00:00 2001
From: Celia Michotey <celia.michotey@inra.fr>
Date: Thu, 23 Feb 2023 11:13:50 +0100
Subject: [PATCH 1/3] Refactor code, improve log and add test for PG
 connection.

---
 etl_gnpis-core/extract-gnpis-core-brapi.sh    | 174 +++++++++---------
 .../gnpis-pg-to-json/count_extracted_data.sql |   5 +-
 .../max_id_by_document_type.sql               |  47 +++--
 3 files changed, 111 insertions(+), 115 deletions(-)
 mode change 100755 => 100644 etl_gnpis-core/extract-gnpis-core-brapi.sh

diff --git a/etl_gnpis-core/extract-gnpis-core-brapi.sh b/etl_gnpis-core/extract-gnpis-core-brapi.sh
old mode 100755
new mode 100644
index 0bc1031..1f16f6c
--- a/etl_gnpis-core/extract-gnpis-core-brapi.sh
+++ b/etl_gnpis-core/extract-gnpis-core-brapi.sh
@@ -1,148 +1,146 @@
 #!/usr/bin/env bash
+
 RED='\033[0;31m'
 GREEN='\033[0;32m'
 ORANGE='\033[0;33m'
 BOLD='\033[1m'
 RED_BOLD="${RED}${BOLD}"
 NC='\033[0m' # No format
+export RED GREEN ORANGE BOLD RED_BOLD NC
+
+# shellcheck disable=SC2120,SC2162
+colorize() {
+    # -- #!/bin/bash#colorize  by JohnnyB - boeroboy@gmail.com
+    RED='\033[0;31m'
+    NC='\033[0m' # No format
+
+    while read line
+    do
+        echo -e "${RED}${line}${NC}"
+    done < "${1:-/dev/stdin}"
+}
 
 
-############### URGI BrAPI Extractor##############
+############### URGI BrAPI Extractor ###############
 # URGI Internal Use
 # GNU philosophy: do a single step, but do it well.
-# This does extraction only, no transformation, 
-# fully handled by plant-brapi-etl-faidare and its 
-# avatars.
-# ################################################
+# Extraction only, no transformation
+# (fully handled by plant-brapi-etl-faidare).
+####################################################
 
-###### sources
+### Sources
 #https://stackoverflow.com/questions/71825711/how-to-use-jq-to-format-array-of-objects-to-separated-list-of-key-values
 #https://stackoverflow.com/questions/54087481/assigning-an-array-parsed-with-jq-to-bash-script-array
 #https://lzone.de/cheat-sheet/jq
 
-# Check jq installed ?
-
 # load configuration
 source gnpis_params.cfg
-#extract_page_size=10000
 extract_page_size=10000
 
-# get MAX ID for all table
+export sqlDir="./gnpis-pg-to-json/"
+sqlMaxidFile="$sqlDir/max_id_by_document_type.sql"
+sqlCountFile="$sqlDir/count_extracted_data.sql"
 
-sqlMaxidFile="./gnpis-pg-to-json/max_id_by_document_type.sql"
-sqlCountFile="./gnpis-pg-to-json/count_extracted_data.sql"
-export extractFolderPaged="./data/json-page"
-export extractFolder="./data/json"
-#transformFolder="./data/json-bulk"
-mkdir -p $extractFolder/INRAE-URGI
-mkdir -p $extractFolderPaged/INRAE-URGI
-rm -f $extractFolder/INRAE-URGI/*
-rm -f $extractFolderPaged/INRAE-URGI/*
-echo "Extracting data to ${extractFolder}"
+export dataSource="INRAE-URGI"
+export extractFolder="./data/json/$dataSource"
+export extractFolderPaged="./data/json-page/$dataSource"
+[ -d $extractFolder ] && rm -f $extractFolder/* || mkdir -p $extractFolder
+[ -d $extractFolderPaged ] && rm -f $extractFolderPaged/* || mkdir -p $extractFolderPaged
 
-# ############################
-# EXTRACT INRAE-URGI to INRAE-URGI-E
-# ############################
+
+##############
+# EXTRACTION #
+##############
 
 do_psql_extract() {
     declare -i page=$1
     local docType=$2
-    declare -i docCount=$3
-    declare -i e_page_size=$4
-    declare -i upper_limit=$((page + e_page_size -1)) 
+    declare -i docMaxId=$3
+    declare -i extract_page_size=$4
+    declare -i upper_limit=$((page + extract_page_size -1))
     source gnpis_params.cfg # this is VOODOO ! why is this needed ?
     #sleep $((RANDOM % 3))
-    declare -i page_number=$((page / e_page_size )) 
-    sqlFile="./gnpis-pg-to-json/${docType}.sql"
-    echo "psql from $page to $upper_limit : $docType $docCount " 
-    psql_cmd="psql -f ${sqlFile} \
-        -At --host ${host} -p ${port} \
-        -U ${user} -d ${db} \
+    declare -i page_number=$((page / extract_page_size ))
+
+    #echo "psql from $page to $upper_limit / $docMaxId "
+    sqlFile="$sqlDir/${docType}.sql"
+    psql_cmd="psql -f ${sqlFile} -At \
+        -h ${host} -p ${port} -U ${user} -d ${db} \
         -v faidareURL=${faidareURL} \
         -v gnpisBaseURL=${gnpisBaseURL} \
         -v trialId=${trialId} \
         -v startPageId=${page} \
         -v endPageId=${upper_limit} \
         -v FETCH_COUNT=${fetchCount} \
-        -o ${extractFolderPaged}/INRAE-URGI/${docType}-${page_number}.json"
+        -o ${extractFolderPaged}/${docType}-${page_number}.json"
     #echo $psql_cmd
     $psql_cmd
-    
 }
-
 export -f do_psql_extract
-urgi_extract() {
-    local docType=$1
-    local docCount=$2
-
-    echo " Extracting $docCount $docType "
-    echo 
-    seq -f'%.0f' 0 $extract_page_size $(($docCount + 1)) |\
-     parallel --link --bar do_psql_extract {} $docType $docCount $extract_page_size  :::: - 
-    echo "Extraction done for ${docType}"
-}
-export -f urgi_extract
 
-echo " extracting MAXIDs using ${sqlMaxidFile} "
-mockMaxIdCmd="echo 100"
+# Test PG connection
+eval "psql -h ${host} -p ${port} -d ${db} -U ${user} -c '\conninfo'" 2> >(colorize)
+CODE=$?
+[ $CODE -gt 0 ] && { echo -e "${RED_BOLD}Error when trying to connect to ${DB_NAME} DB. Check that your passfile is correclty filled. Exiting.${NC}" ; exit $CODE ; }
 
+echo -e "${BOLD}Extracting data to ${extractFolder}.${NC}"
 for documentType in ${documentTypes[@]}; do
- 
-    # ### Count maxId ###
-    maxIdCmd=" psql -f ${sqlMaxidFile} \
-      -At --host ${host} -p ${port} \
-      -U ${user} -d ${db} \
-      -v type=${documentType} "
-    docDbCount=$($maxIdCmd)
+    echo -e "\nManage $documentType"
+
+    ### Get max ID ###
+    echo "* Get max ID"
+    maxIdCmd=" psql -f ${sqlMaxidFile} -At \
+        -h ${host} -p ${port} -U ${user} -d ${db} \
+        -v type=${documentType} "
+    docMaxId=$($maxIdCmd)
     #echo $maxIdCmd 
 
-    # ### Extract ###
-    urgi_extract $documentType $docDbCount
+    ### Extract data ###
+    echo "* Extract data"
+    seq -f'%.0f' 0 $extract_page_size $(($docMaxId + 1)) |\
+        parallel --link --bar do_psql_extract {} $documentType $docMaxId $extract_page_size  :::: -
 
-    # ### Concat paginated output ###
-    cat ${extractFolderPaged}/INRAE-URGI/${documentType}-*.json > ${extractFolder}/INRAE-URGI/${documentType}.json
+    ### Concat paginated output ###
+    echo "* Concat paginated output"
+    cat ${extractFolderPaged}/${documentType}-*.json > ${extractFolder}/${documentType}.json
 
 done
 
-#echo "Extracting URGI data for  $docTypesMaxCount"
-
-#echo $docTypesMaxCount | jq -r 'to_entries[] |.key as $id | .value | to_entries[] |  [ .key, .value ] |@sh'
-
 
-# ############################
-# VALIDATION
-# ############################
+##############
+# VALIDATION #
+##############
 
-# ############################
-# Count document in DB
-
-# DB vs E
-# -------
+echo -e "\n${BOLD}Count data for validation.${NC}"
 for documentType in ${documentTypes[@]}; do
-    countCmd="psql -f ${sqlCountFile} \
-        -At  --host ${host} -p ${port} \
-        -U ${user} -d ${db} \
+
+    ### Get DB count ###
+    countCmd="psql -f ${sqlCountFile} -At \
+        -h ${host} -p ${port} -U ${user} -d ${db} \
         -v type=${documentType}
         -v trialId=${trialId}"
     docTypeDbCount=$($countCmd)
-    docTypeFileCount=$(wc -l ${extractFolder}/INRAE-URGI/${documentType}.json | tr -d "[:alpha:][:blank:][:punct:]")
-    echo ${docTypeDbCount}  ${docTypeFileCount}
-    if [ ${docTypeDbCount} -eq ${docTypeFileCount} ];
-    then
-        echo "Extraction validated for ${documentType} with database: ${docTypeDbCount} and file: ${docTypeFileCount} "
+    #echo "docTypeDbCount = ${docTypeDbCount}"
+
+    ### Get file count ###
+    docTypeFileCount=$(wc -l ${extractFolder}/${documentType}.json | tr -d "[:alpha:][:blank:][:punct:]")
+    #echo "docTypeFileCount = ${docTypeFileCount}"
+
+    if [ ${docTypeDbCount} -eq ${docTypeFileCount} ]; then
+        echo -e "${GREEN}Extraction validated for ${documentType} (${docTypeFileCount} documents)${NC}"
     else
-        echo -e "${RED_BOLD} ERROR ${NC}  Extraction of ${documentType} with database: ${docTypeDbCount} and file: ${docTypeFileCount} "
+        echo -e "${RED_BOLD}ERROR: Extraction failed for ${documentType} (database = ${docTypeDbCount} and file = ${docTypeFileCount})${NC}"
     fi
-    #KO test
-    docTypeFileCount=$((docTypeFileCount + 1)) 
-    if [ ${docTypeDbCount} -eq ${docTypeFileCount} ];
-    then
-        echo -e "${RED_BOLD} ERROR ${NC} in the testing code "
+
+    ### KO test ###
+    docTypeFileCount=$((docTypeFileCount + 1))
+    if [ ${docTypeDbCount} -eq ${docTypeFileCount} ]; then
+        echo -e "${RED_BOLD}ERROR in the testing code${NC}"
     else
-        echo "TEST PROCEDURE OK "
+        echo -e "${GREEN}TEST PROCEDURE OK${NC}"
     fi
-done
 
-# E vs ET
+done
 
 exit 1
+
diff --git a/etl_gnpis-core/gnpis-pg-to-json/count_extracted_data.sql b/etl_gnpis-core/gnpis-pg-to-json/count_extracted_data.sql
index a1f21e2..6595049 100644
--- a/etl_gnpis-core/gnpis-pg-to-json/count_extracted_data.sql
+++ b/etl_gnpis-core/gnpis-pg-to-json/count_extracted_data.sql
@@ -4,7 +4,7 @@
 select CASE
 	WHEN :'type' = 'germplasm' THEN
 		(select count(distinct a.accession_id) from accession_t a)
-    WHEN :'type' = 'germplasmMcpd' THEN
+	WHEN :'type' = 'germplasmMcpd' THEN
 		(select count(distinct a.accession_id) from accession_t a)
 	WHEN :'type' = 'germplasmAttribute' THEN
 		(select count(distinct a.accession_id) from accession_descriptor_t a)
@@ -32,5 +32,4 @@ select CASE
 		) as studies)
 	WHEN :'type' = 'trial' THEN
 		(select count(distinct trial_set_id) from trial_set_t)
-	END
-as count;
+END as count;
diff --git a/etl_gnpis-core/gnpis-pg-to-json/max_id_by_document_type.sql b/etl_gnpis-core/gnpis-pg-to-json/max_id_by_document_type.sql
index 5f0ab81..1173293 100644
--- a/etl_gnpis-core/gnpis-pg-to-json/max_id_by_document_type.sql
+++ b/etl_gnpis-core/gnpis-pg-to-json/max_id_by_document_type.sql
@@ -3,35 +3,34 @@
 
 select CASE
 	WHEN :'type' = 'germplasm' THEN
-		(select max( a.accession_id) from accession_t a)
-    WHEN :'type' = 'germplasmMcpd' THEN
-		(select max( a.accession_id) from accession_t a)
+		(select max(a.accession_id) from accession_t a)
+	WHEN :'type' = 'germplasmMcpd' THEN
+		(select max(a.accession_id) from accession_t a)
 	WHEN :'type' = 'germplasmAttribute' THEN
-        (select 10 from ontology_t limit 1) --default min page size, no pagination implemented for that type
-		--(select max( germplasm.accession_id)
-        -- FROM accession_t germplasm
-        -- WHERE exists (select 1 from accession_descriptor_t ad where germplasm.accession_id = ad.accession_id))
+		(select 10) --default min page size, no pagination implemented for that type
+		--(select max(a.accession_id) from accession_descriptor_t a)
 	WHEN :'type' = 'germplasmPedigree' THEN
-        (select 10 from ontology_t limit 1) --default min page size, no pagination implemented for that type
-		--(select max(genealogy_id) from genealogy_t )
+		(select 10) --default min page size, no pagination implemented for that type
+		--(select max(genealogy_id) from genealogy_t)
 	WHEN :'type' = 'germplasmProgeny' THEN
-        (select 10 from ontology_t limit 1) --default min page size, no pagination implemented for that type
-		--(select max( a.accession_id) from accession_t a
-		--WHERE exists (select 1 from genealogy_t ge where ge.first_parent_id = a.accession_id or ge.second_parent_id = a.accession_id))
+		(select 10) --default min page size, no pagination implemented for that type
+		--(select max(a.accession_id) from accession_t a
+		--where exists (select 1 from genealogy_t g where g.first_parent_id = a.accession_id or g.second_parent_id = a.accession_id))
 	WHEN :'type' = 'location' THEN
-        (select 10 from ontology_t limit 1) --default min page size, no pagination implemented for that type
-		--(select max( site_id) from site_t)
+		(select 10) --default min page size, no pagination implemented for that type
+		--(select max(site_id) from site_t)
 	WHEN :'type' = 'program' THEN
-        (select 10 from ontology_t limit 1) --default min page size, no pagination implemented for that type
-		--(select max( project_id) from project_t)
+		(select 10) --default min page size, no pagination implemented for that type
+		--(select max(project_id) from project_t)
 	WHEN :'type' = 'observationUnit' THEN
-		(select max(study_subject_id) from study_subject_t )
+		(select max(study_subject_id) from study_subject_t)
 	WHEN :'type' = 'study' THEN
-        (select 10 from ontology_t limit 1) --default min page size, no pagination implemented for that type
-		--(select max(id) from (select max(trial_id) as id from trial_t
-		--    UNION select max(genotyping_experiment_id) as id from genotyping_experiment_t) as MAXIT)
+		(select 10) --default min page size, no pagination implemented for that type
+		--(select max(id) from (
+			--select max(trial_id) as id from trial_t
+			--union select max(genotyping_experiment_id) as id from genotyping_experiment_t
+		--) as MAXID)
 	WHEN :'type' = 'trial' THEN
-        (select 10 from ontology_t limit 1) --default min page size, no pagination implemented for that type
-		--(select max( trial_set_id) from trial_set_t)
-	END
-as count;
+		(select 10) --default min page size, no pagination implemented for that type
+		--(select max(trial_set_id) from trial_set_t)
+END as maxID;
-- 
GitLab


From 20c559dace205f011b1babfca665bbf9cfc3c49d Mon Sep 17 00:00:00 2001
From: Celia Michotey <celia.michotey@inra.fr>
Date: Thu, 13 Apr 2023 13:49:25 +0200
Subject: [PATCH 2/3] Import GnpIS-core ETL for DataDiscovery, rename
 GnpIS-core ETL for BrAPI.

---
 .../extract-gnpis-core-brapi.sh               |   0
 .../gnpis-pg-to-json/count_extracted_data.sql |   0
 .../gnpis-pg-to-json/germplasm.sql            |   0
 .../gnpis-pg-to-json/germplasmAttribute.sql   |   0
 .../gnpis-pg-to-json/germplasmMcpd.sql        |   0
 .../gnpis-pg-to-json/germplasmPedigree.sql    |   0
 .../gnpis-pg-to-json/germplasmProgeny.sql     |   0
 .../gnpis-pg-to-json/location.sql             |   0
 .../max_id_by_document_type.sql               |   0
 .../gnpis-pg-to-json/observationUnit.sql      |   0
 .../gnpis-pg-to-json/program.sql              |   0
 .../gnpis-pg-to-json/study.sql                |   0
 .../gnpis-pg-to-json/trial.sql                |   0
 .../gnpis_params.cfg                          |   6 +-
 etl_gnpis-core_dd/convert_to_json.sh          |  75 ++++
 etl_gnpis-core_dd/csv2json.jq                 |  63 +++
 etl_gnpis-core_dd/csv_manipulator.py          |  48 +++
 etl_gnpis-core_dd/extract_gnpis-core.sh       | 380 ++++++++++++++++++
 .../extract_observation_variables.jq          |  29 ++
 etl_gnpis-core_dd/map_values_to_json.jq       |  28 ++
 .../sql/count_extracted_data.sql              |  83 ++++
 ...ransplant_gnpis_association_extraction.sql |  85 ++++
 ...ant_gnpis_genetic_resources_extraction.sql | 146 +++++++
 ...transplant_gnpis_genotyping_extraction.sql | 103 +++++
 .../transplant_gnpis_mapping_extraction.sql   | 277 +++++++++++++
 ...ransplant_gnpis_phenotyping_extraction.sql |  89 ++++
 .../transplant_gnpis_sequences_extraction.sql | 102 +++++
 .../transplant_gnpis_synteny_extraction.sql   | 149 +++++++
 ...nsplant_gnpis_transcriptome_extraction.sql | 163 ++++++++
 etl_gnpis-core_dd/variables_enrichment.sh     | 151 +++++++
 30 files changed, 1974 insertions(+), 3 deletions(-)
 rename {etl_gnpis-core => etl_gnpis-core_brapi}/extract-gnpis-core-brapi.sh (100%)
 rename {etl_gnpis-core => etl_gnpis-core_brapi}/gnpis-pg-to-json/count_extracted_data.sql (100%)
 rename {etl_gnpis-core => etl_gnpis-core_brapi}/gnpis-pg-to-json/germplasm.sql (100%)
 rename {etl_gnpis-core => etl_gnpis-core_brapi}/gnpis-pg-to-json/germplasmAttribute.sql (100%)
 rename {etl_gnpis-core => etl_gnpis-core_brapi}/gnpis-pg-to-json/germplasmMcpd.sql (100%)
 rename {etl_gnpis-core => etl_gnpis-core_brapi}/gnpis-pg-to-json/germplasmPedigree.sql (100%)
 rename {etl_gnpis-core => etl_gnpis-core_brapi}/gnpis-pg-to-json/germplasmProgeny.sql (100%)
 rename {etl_gnpis-core => etl_gnpis-core_brapi}/gnpis-pg-to-json/location.sql (100%)
 rename {etl_gnpis-core => etl_gnpis-core_brapi}/gnpis-pg-to-json/max_id_by_document_type.sql (100%)
 rename {etl_gnpis-core => etl_gnpis-core_brapi}/gnpis-pg-to-json/observationUnit.sql (100%)
 rename {etl_gnpis-core => etl_gnpis-core_brapi}/gnpis-pg-to-json/program.sql (100%)
 rename {etl_gnpis-core => etl_gnpis-core_brapi}/gnpis-pg-to-json/study.sql (100%)
 rename {etl_gnpis-core => etl_gnpis-core_brapi}/gnpis-pg-to-json/trial.sql (100%)
 rename {etl_gnpis-core => etl_gnpis-core_brapi}/gnpis_params.cfg (56%)
 create mode 100755 etl_gnpis-core_dd/convert_to_json.sh
 create mode 100644 etl_gnpis-core_dd/csv2json.jq
 create mode 100644 etl_gnpis-core_dd/csv_manipulator.py
 create mode 100755 etl_gnpis-core_dd/extract_gnpis-core.sh
 create mode 100644 etl_gnpis-core_dd/extract_observation_variables.jq
 create mode 100644 etl_gnpis-core_dd/map_values_to_json.jq
 create mode 100644 etl_gnpis-core_dd/sql/count_extracted_data.sql
 create mode 100644 etl_gnpis-core_dd/sql/transplant_gnpis_association_extraction.sql
 create mode 100644 etl_gnpis-core_dd/sql/transplant_gnpis_genetic_resources_extraction.sql
 create mode 100644 etl_gnpis-core_dd/sql/transplant_gnpis_genotyping_extraction.sql
 create mode 100644 etl_gnpis-core_dd/sql/transplant_gnpis_mapping_extraction.sql
 create mode 100644 etl_gnpis-core_dd/sql/transplant_gnpis_phenotyping_extraction.sql
 create mode 100644 etl_gnpis-core_dd/sql/transplant_gnpis_sequences_extraction.sql
 create mode 100644 etl_gnpis-core_dd/sql/transplant_gnpis_synteny_extraction.sql
 create mode 100644 etl_gnpis-core_dd/sql/transplant_gnpis_transcriptome_extraction.sql
 create mode 100755 etl_gnpis-core_dd/variables_enrichment.sh

diff --git a/etl_gnpis-core/extract-gnpis-core-brapi.sh b/etl_gnpis-core_brapi/extract-gnpis-core-brapi.sh
similarity index 100%
rename from etl_gnpis-core/extract-gnpis-core-brapi.sh
rename to etl_gnpis-core_brapi/extract-gnpis-core-brapi.sh
diff --git a/etl_gnpis-core/gnpis-pg-to-json/count_extracted_data.sql b/etl_gnpis-core_brapi/gnpis-pg-to-json/count_extracted_data.sql
similarity index 100%
rename from etl_gnpis-core/gnpis-pg-to-json/count_extracted_data.sql
rename to etl_gnpis-core_brapi/gnpis-pg-to-json/count_extracted_data.sql
diff --git a/etl_gnpis-core/gnpis-pg-to-json/germplasm.sql b/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasm.sql
similarity index 100%
rename from etl_gnpis-core/gnpis-pg-to-json/germplasm.sql
rename to etl_gnpis-core_brapi/gnpis-pg-to-json/germplasm.sql
diff --git a/etl_gnpis-core/gnpis-pg-to-json/germplasmAttribute.sql b/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasmAttribute.sql
similarity index 100%
rename from etl_gnpis-core/gnpis-pg-to-json/germplasmAttribute.sql
rename to etl_gnpis-core_brapi/gnpis-pg-to-json/germplasmAttribute.sql
diff --git a/etl_gnpis-core/gnpis-pg-to-json/germplasmMcpd.sql b/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasmMcpd.sql
similarity index 100%
rename from etl_gnpis-core/gnpis-pg-to-json/germplasmMcpd.sql
rename to etl_gnpis-core_brapi/gnpis-pg-to-json/germplasmMcpd.sql
diff --git a/etl_gnpis-core/gnpis-pg-to-json/germplasmPedigree.sql b/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasmPedigree.sql
similarity index 100%
rename from etl_gnpis-core/gnpis-pg-to-json/germplasmPedigree.sql
rename to etl_gnpis-core_brapi/gnpis-pg-to-json/germplasmPedigree.sql
diff --git a/etl_gnpis-core/gnpis-pg-to-json/germplasmProgeny.sql b/etl_gnpis-core_brapi/gnpis-pg-to-json/germplasmProgeny.sql
similarity index 100%
rename from etl_gnpis-core/gnpis-pg-to-json/germplasmProgeny.sql
rename to etl_gnpis-core_brapi/gnpis-pg-to-json/germplasmProgeny.sql
diff --git a/etl_gnpis-core/gnpis-pg-to-json/location.sql b/etl_gnpis-core_brapi/gnpis-pg-to-json/location.sql
similarity index 100%
rename from etl_gnpis-core/gnpis-pg-to-json/location.sql
rename to etl_gnpis-core_brapi/gnpis-pg-to-json/location.sql
diff --git a/etl_gnpis-core/gnpis-pg-to-json/max_id_by_document_type.sql b/etl_gnpis-core_brapi/gnpis-pg-to-json/max_id_by_document_type.sql
similarity index 100%
rename from etl_gnpis-core/gnpis-pg-to-json/max_id_by_document_type.sql
rename to etl_gnpis-core_brapi/gnpis-pg-to-json/max_id_by_document_type.sql
diff --git a/etl_gnpis-core/gnpis-pg-to-json/observationUnit.sql b/etl_gnpis-core_brapi/gnpis-pg-to-json/observationUnit.sql
similarity index 100%
rename from etl_gnpis-core/gnpis-pg-to-json/observationUnit.sql
rename to etl_gnpis-core_brapi/gnpis-pg-to-json/observationUnit.sql
diff --git a/etl_gnpis-core/gnpis-pg-to-json/program.sql b/etl_gnpis-core_brapi/gnpis-pg-to-json/program.sql
similarity index 100%
rename from etl_gnpis-core/gnpis-pg-to-json/program.sql
rename to etl_gnpis-core_brapi/gnpis-pg-to-json/program.sql
diff --git a/etl_gnpis-core/gnpis-pg-to-json/study.sql b/etl_gnpis-core_brapi/gnpis-pg-to-json/study.sql
similarity index 100%
rename from etl_gnpis-core/gnpis-pg-to-json/study.sql
rename to etl_gnpis-core_brapi/gnpis-pg-to-json/study.sql
diff --git a/etl_gnpis-core/gnpis-pg-to-json/trial.sql b/etl_gnpis-core_brapi/gnpis-pg-to-json/trial.sql
similarity index 100%
rename from etl_gnpis-core/gnpis-pg-to-json/trial.sql
rename to etl_gnpis-core_brapi/gnpis-pg-to-json/trial.sql
diff --git a/etl_gnpis-core/gnpis_params.cfg b/etl_gnpis-core_brapi/gnpis_params.cfg
similarity index 56%
rename from etl_gnpis-core/gnpis_params.cfg
rename to etl_gnpis-core_brapi/gnpis_params.cfg
index 79a90bf..70670f6 100644
--- a/etl_gnpis-core/gnpis_params.cfg
+++ b/etl_gnpis-core_brapi/gnpis_params.cfg
@@ -1,14 +1,14 @@
 
 host='localhost'
 db='aster'
-port=54322
+port=5432
 user='aster'
 fetchCount=1000
 
 # production 
-documentTypes=("germplasm" "observationUnit" "location" "germplasmAttribute" "germplasmPedigree" "germplasmProgeny" "program" "study" "trial")
+documentTypes=("germplasm" "germplasmAttribute" "germplasmPedigree" "germplasmProgeny" "location" "program" "study" "trial" "observationUnit")
 # tests
-#documentTypes=("germplasm" "location" "germplasmMcpd" "observationUnit" "germplasmAttribute" "germplasmPedigree" "germplasmProgeny" "program" "study" "trial")
+#documentTypes=("germplasm" "germplasmMcpd" "germplasmAttribute" "germplasmPedigree" "germplasmProgeny" "location" "program" "study" "trial" "observationUnit")
 #documentTypes=("trial")
 
 # Restrict the generated documents to be linked to a specific GnpIS trial aka MIAPPE study
diff --git a/etl_gnpis-core_dd/convert_to_json.sh b/etl_gnpis-core_dd/convert_to_json.sh
new file mode 100755
index 0000000..075fd09
--- /dev/null
+++ b/etl_gnpis-core_dd/convert_to_json.sh
@@ -0,0 +1,75 @@
+#!/bin/bash
+
+
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+ORANGE='\033[0;33m'
+BOLD='\033[1m'
+RED_BOLD="${RED}${BOLD}"
+NC='\033[0m' # No format
+export RED GREEN ORANGE BOLD RED_BOLD NC
+
+# shellcheck disable=SC2120,SC2162
+colorize() {
+	# -- #!/bin/bash#colorize  by JohnnyB - boeroboy@gmail.com
+	RED='\033[0;31m'
+	NC='\033[0m' # No format
+
+	while read line
+	do
+	echo -e "${RED}${line}${NC}"
+	done < "${1:-/dev/stdin}"
+}
+export -f colorize
+
+[ -z $VERBOSE ] && VERBOSE=0
+
+SCRIPT_DIR=$(readlink -f "$(dirname "$0")")
+
+help() {
+    echo "Usages (using command line options, or using environment variables):"
+    echo "$0 -node DD_NODE -data DATA_DIR [-verbose {1,2}]"
+    echo "DD_NODE=<node_name> DATA_DIR=</path_to_data_dir> VERBOSE=1 $0"
+}
+
+# get params
+while [ -n "$1" ]; do
+	case $1 in
+		-node) DD_NODE=$2;shift 2;;
+		-data) DATA_DIR=$(readlink -f "${2}");shift 2;;
+		-verbose) VERBOSE=$2;shift 2;;
+        -h) help; exit 0;;
+        --help) help; exit 0;;
+		--) shift;break;;
+		-*) echo -e "${RED}ERROR: Unknown option: $1${NC}" && echo && help && echo;exit 1;;
+		*) echo -e "${RED}ERROR: Number or arguments unexpected." && echo && help && echo;exit 1;;
+	esac
+done
+
+[ -z "$DD_NODE" ] && { echo -e "${RED}ERROR: missing -node argument or DD_NODE env variable.${NC}" ; help ; exit 1; }
+[ -z "$DATA_DIR" ] && { echo -e "${RED}ERROR: missing -data argument or DATA_DIR env variable.${NC}" ; help ; exit 1; }
+
+[ ! -d "$DATA_DIR" ] && { echo -e "${RED}ERROR: given argument is not a directory: $DATA_DIR${NC}" ; exit 1; }
+[ ! -r "$DATA_DIR" ] && { echo -e "${RED}ERROR: given directory is not readable: $DATA_DIR${NC}" ; exit 1; }
+[ $VERBOSE -ge 0 ] 2>/dev/null || { echo -e "${RED}ERROR: -verbose option must be a positive integer, not: $VERBOSE${NC}" ; exit 1 ; }
+
+# (readlink -f ${DATA_DIR})
+for FILE in $DATA_DIR/*.csv; do
+    [ $VERBOSE -ge 2 ] && echo "Matching file: ${FILE}" ;
+    [ -f $FILE ] && FOUND_FILE=TRUE
+done;
+[ "$FOUND_FILE" == "TRUE" ] || { echo "ERROR: no valid csv file found in $DATA_DIR" ; exit 4 ; }
+
+[ -z $SEPARATOR ] && SEPARATOR='\t' # using tabulation as default separator
+
+[ -z $HEADER ] && HEADER="entryType${SEPARATOR}databaseName${SEPARATOR}identifier${SEPARATOR}name${SEPARATOR}description${SEPARATOR}url${SEPARATOR}species"
+[ $VERBOSE -ge 1 ] && echo -e "Using header:\n$HEADER"
+export DD_NODE
+
+for CSV_FILE in $DATA_DIR/*.csv; do
+    FILE=$(basename $CSV_FILE .csv)
+    [ $VERBOSE -ge 1 ] && echo "Processing $FILE from $CSV_FILE"
+    parallel --pipe-part --block 10M "sed '1 i$HEADER' | jq -Rr -s -f ${SCRIPT_DIR}/csv2json.jq > $DATA_DIR/${DD_NODE}_${FILE}_{#}_all_species.json 2> >(colorize)" :::: $CSV_FILE
+done
+
+[ $VERBOSE -ge 0 ] && echo "JSON files generated into $DATA_DIR"
diff --git a/etl_gnpis-core_dd/csv2json.jq b/etl_gnpis-core_dd/csv2json.jq
new file mode 100644
index 0000000..d8f91a4
--- /dev/null
+++ b/etl_gnpis-core_dd/csv2json.jq
@@ -0,0 +1,63 @@
+# USAGE:
+# $ jq -Rr -s -f csv2json.jq $CSV > $JSON
+# Requires jq 1.6+
+
+# replaces the leading and trailing blank only once, only for strings
+def trimq:
+  if type == "string"
+    then (.|sub("^ +";"") | sub(" +$";""))
+    else .
+  end
+;
+
+# to_array_if_needed/1 splits the string on comma separator
+# only if header = species
+def to_array_if_needed(header):
+  if type == "string"
+      and (.|index(",") != null)
+      and header == "species"
+    then ( . | [split(",")[]| trimq ] )
+    else .
+  end
+  ;
+
+# objectify/1 takes an array of string values as inputs, converts
+# numeric values to numbers, and packages the results into an object
+# with keys specified by the "headers" array
+def objectify(headers):
+  def tonumberq: tonumber? // .;
+  def tonullq: if . == "" then null else . end;
+
+  . as $in
+    | reduce range(0; headers|length) as $i (
+      {}; headers[$i] as $header
+      | .[headers[$i]] = (
+        $in[$i]
+        | to_array_if_needed($header) 
+        | tonumberq 
+        | trimq 
+        | tonullq
+        )
+      )
+    ;
+
+def csv2table:
+  def trim: sub("^ +";"") | sub(" +$";"");     # remove all leading and trailing spaces
+  split("\n") 
+  | map(
+      split("\t")
+      | map(trim)
+    );
+
+def csv2json:
+  csv2table
+  | .[0] as $headers
+  | reduce (.[1:][] | select(length > 0) ) as $row (
+    []; . + [ $row|objectify($headers) 
+        | .node = if $ENV.DD_NODE == null then "[ERROR]: the environment variable DD_NODE is missing to specify the name of the data provider (ie. INRAE-URGI, EBI or else).\n" | halt_error(1) else $ENV.DD_NODE end
+        | .name = if (.name == null) then .identifier? else .name end
+        | del(.dbVersion,.dbId,.xref,.featureType,.sequenceId,.sequenceVersion,.startPosition,.endPosition,.map,.mapPosition,.authority,.trait,.traitId,.environment,.environmentId,.statistic,.unit,.genotype,.experimentType,.linkedResourcesID) | select(keys | length > 1)]
+    )
+  ;
+
+csv2json | if length == 0 then empty else . end
diff --git a/etl_gnpis-core_dd/csv_manipulator.py b/etl_gnpis-core_dd/csv_manipulator.py
new file mode 100644
index 0000000..a015c2a
--- /dev/null
+++ b/etl_gnpis-core_dd/csv_manipulator.py
@@ -0,0 +1,48 @@
+#!/usr/bin/python
+
+##############################################################################
+# Script used to manipulate csv and replace value in this file related to key
+#                           stored in dictionnary.
+#
+#
+# Check '<script>.py -h' for usage help
+#
+#
+# Author: F. PHILIPPE
+#
+# Copyright INRA-URGI 2017
+##############################################################################
+
+import sys,csv,re
+
+csv.field_size_limit(sys.maxsize)
+
+def createDict_from_csv(csvfile):
+	dicoID={}
+	with open(csvfile) as csvfile:
+	    reader = csv.DictReader(csvfile)
+	    for row in reader:
+	       if row['key'] not in dicoID:
+	       		dicoID[row['key']]=row["value"]
+	return(dicoID)
+
+
+def _main():
+	dicoID=createDict_from_csv(sys.argv[1])
+	with open(sys.argv[3],"w") as out:
+		with open(sys.argv[2], "r") as file:
+		    reader = csv.reader(file, delimiter=',')
+		    for row in reader:
+		    	# To handle elasticsearch and solr indexation, it is necessary to know the columns' number in order to localize description field.
+		    	if len(row)<25:
+		        	string = row[4]
+		        else:
+		        	string = row[6]
+		        sequence=string
+		        for cle in dicoID.keys():
+		             if(cle in string):
+		                 sequence=sequence.replace(cle, dicoID[cle])
+		        out.write("\""+"\",\"".join(row).replace(string,sequence)+"\"\n")
+
+if __name__ == "__main__":
+	_main()
diff --git a/etl_gnpis-core_dd/extract_gnpis-core.sh b/etl_gnpis-core_dd/extract_gnpis-core.sh
new file mode 100755
index 0000000..81f0bb6
--- /dev/null
+++ b/etl_gnpis-core_dd/extract_gnpis-core.sh
@@ -0,0 +1,380 @@
+#!/bin/bash
+#
+# extract_gnpis-core.sh
+#
+# Script used to extract a thematic data from GnpIS-core.
+#
+# Author: E. Kimmel, R. Flores, C. Michotey, D. Charruaud
+#
+
+
+### colorization
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+ORANGE='\033[0;33m'
+BOLD='\033[1m'
+RED_BOLD="${RED}${BOLD}"
+NC='\033[0m' # No format
+export RED GREEN ORANGE BOLD RED_BOLD NC
+
+# shellcheck disable=SC2120,SC2162
+colorize() {
+	# -- #!/bin/bash#colorize  by JohnnyB - boeroboy@gmail.com
+	RED='\033[0;31m'
+	NC='\033[0m' # No format
+
+	while read line
+	do
+	echo -e "${RED}${line}${NC}"
+	done < "${1:-/dev/stdin}"
+}
+export -f colorize
+
+
+CURRENT_DIR=$(readlink -f "$(dirname "$0")")
+PASSFILE="${HOME}/.pgpass"
+SQL_FILE="${CURRENT_DIR}/sql/count_extracted_data.sql"
+export CURRENT_DIR PASSFILE SQL_FILE
+
+### default values
+DATABASE="aster"
+DB_HOST="shelob.versailles.inrae.fr"
+DB_PORT="9121"
+DB_USER="aster"
+
+GNPIS_LEGACY_URL="https://urgi.versailles.inrae.fr"
+FAIDARE_URL="https://urgi.versailles.inrae.fr/faidare"
+
+SOURCE_NAME="GnpIS"
+DD_NODE="INRAE-URGI" # DD_NODE is used by tabulated/csv2json.jq for setting correctly the node name
+
+OUTPUT="${CURRENT_DIR}/output"
+VERBOSE=0
+EXTRACT="TRUE"
+THEMATIC=""
+
+DEFAULT_THEMATICS="  \
+association \
+sequences \
+mapping \
+synteny \
+transcriptome \
+genetic_resources \
+genotyping \
+phenotyping \
+static"
+
+# genetic_resources, phenotyping and genotyping are now managed by FAIDARE ETL but are used for XREF
+DD_THEMATICS=$(echo $DEFAULT_THEMATICS | sed -e "s/genetic_resources genotyping phenotyping //")
+XREF_THEMATICS=$(echo $DEFAULT_THEMATICS | sed -e "s/mapping synteny transcriptome //")
+export DEFAULT_THEMATICS DD_THEMATICS XREF_THEMATICS
+
+
+help4me () {
+    cat <<EOF
+
+USAGE:
+  $0 [-thematic <see below...>] [-static <static_dir>] [-database <database>] [-db_user <database_user>] [-db_host <database_host>] [-db_port <database_port>] [-gnpis_legacy_url <GNPIS_LEGACY_URL>] [-faidare_url <FAIDARE_URL>] [-v[v]]
+
+DESCRIPTION:
+  Script used to extract data from GnpIS-core database.
+
+PARAMS:
+  -thematic         thematics to process, by default all thematics are processed
+  -static           static directory containing files following "gnpis_static_[source]_*.csv" name pattern, if you want to manage GnpIS static data (XXX@GnpIS)
+  -database         the database to extract data from (DEFAULT: ${DATABASE})
+  -db_user          the user to connect to the database (DEFAULT: ${DB_USER})
+  -db_host          the host of the database to index (DEFAULT: ${DB_HOST})
+  -db_port          the port of the database to index (DEFAULT: ${DB_PORT})
+  -gnpis_legacy_url prefix URL of the GnpIS legacy applications to point to (DEFAULT: ${GNPIS_LEGACY_URL})
+  -faidare_url      url of the FAIDARE application to point to (DEFAULT: ${FAIDARE_URL})
+  --transform_only  does not extract data, use data already present into output directory (${OUTPUT})
+  -v                display verbose informations
+  -vv               display very verbose informations
+  -h or --help      print this help
+
+WARNING:
+  If not given, the credentials of the user MUST be referenced in ${PASSFILE} using the database name as a suffix.
+
+AVAILABLE THEMATICS ARE:
+${DEFAULT_THEMATICS}
+EOF
+    exit 1
+}
+
+check_error() {
+	CODE=$1
+	 if [[ $CODE -ne 0 ]];then
+		echo -e "${RED_BOLD}Error $CODE occured: should check that passfile is given and correclty filled (user, password for given database, etc.): ${PASSFILE}${NC}"
+		echo -e "${RED_BOLD}Exiting.${NC}"
+		exit 1
+	fi
+}
+export -f check_error
+
+echo_and_eval_cmd() {
+	local CMD=$1
+	if [ -z "$CMD" ]; then
+		echo -e "${RED_BOLD}Missing command to eval. Exiting.${NC}"
+		exit 1
+	fi
+	[ $VERBOSE -ge 2 ] && echo -e "Going to exec command: \n\t${CMD}"
+	eval $CMD 2> >(colorize)
+	check_error $?
+}
+export -f echo_and_eval_cmd
+
+### get params
+while [ -n "$1" ]; do
+	case $1 in
+        -h) help4me;shift 1;;
+	    --help) help4me;shift 1;;
+        -thematic) THEMATICS=$(echo "$2" | tr ',' ' '); if [ -z "$2" ]; then shift 1; else shift 2; fi;;
+	    -static) STATIC_DIR=$2;shift 2;;
+	    -database) DATABASE=$2;shift 2;;
+	    -db_user) DB_USER=$2; shift 2;;
+	    -db_host) DB_HOST=$2;shift 2;;
+	    -db_port) DB_PORT=$2;shift 2;;
+	    -gnpis_legacy_url) GNPIS_LEGACY_URL="$2"; shift 2;;
+	    -faidare_url) FAIDARE_URL="$2"; shift 2;;
+	    --transform_only) EXTRACT="FALSE"; shift 1;;
+	    -v) VERBOSE=1 ; VERBOSE_OPTION="-v" ; PARALLEL_VERBOSE="--bar" ; shift 1;;
+	    -vv) VERBOSE=2 ; VERBOSE_OPTION="-vv" PARALLEL_VERBOSE_2="--bar" ; shift 1;;
+	    --) shift;break;;
+	    -*) echo && echo -e "${RED_BOLD}Unknown option: $1${NC}" && echo;exit 1;;
+	    *) break;;
+	esac
+done
+
+### check options
+if [ "${EXTRACT}" == "TRUE" ] ; then
+	[ -z "$DATABASE" ] && echo -e "${RED_BOLD}-database is required.${NC}" && help4me && exit 1
+
+	# check password file
+	if [ ! -f "$PASSFILE" ]; then
+		echo -e "${RED_BOLD}The password file ($PASSFILE) does not exists.${NC}"
+		exit 2
+	fi
+else
+	[[ $(find "${OUTPUT}" -type f -name "*.csv" -ls | wc -l) -eq 0 ]] && echo -e "${RED_BOLD}No CSV found in ${OUTPUT}. Please provide a not empty directory." && help4me && exit 4
+fi
+
+if [ -z "$THEMATICS" ] || [ "$THEMATICS" == "all" ]; then
+	echo -e "${ORANGE}No thematics specified, using default list.${NC}"
+	THEMATICS=$DEFAULT_THEMATICS
+else
+	for THEMATIC in ${THEMATICS}; do
+		if [ -z "$(echo ${THEMATICS} | grep ${THEMATIC})" ]; then
+		    echo -e "${ORANGE}Unknown thematic ${THEMATIC} to extract. Ignoring it.${NC}"
+		fi
+	done
+	if [ "$(echo ${THEMATICS} | grep 'static')" ]; then
+		if [ -z "$STATIC_DIR" ]; then
+			echo -e "${RED_BOLD}Static directory is mandatory if you want to manage static data.${NC}"
+			exit 2
+		elif [ ! -d "${STATIC_DIR}" ] || [ $(find ${STATIC_DIR} -type f -name "gnpis_static_*.csv" -ls | wc -l) -eq 0 ] ; then
+			echo -e "${RED_BOLD}You want to manage static data but static directory does not exist or it contains no files to manage ('gnpis_static_[source]_*.csv').${NC}"
+			exit 2
+		fi
+	fi
+fi
+
+[ ! -d "$OUTPUT" ] && mkdir "$OUTPUT"
+
+PG_CONNECT="-U ${DB_USER} -p ${DB_PORT} -h ${DB_HOST} -d ${DATABASE}"
+echo_and_eval_cmd "psql -w ${PG_CONNECT} -c '\conninfo'" # .pgpass file must be correct such as: host:port:database:user:password
+
+# Need to export variables and functions for them to be available inside parallel command
+export EXTRACT VERBOSE VERBOSE_OPTION PARALLEL_VERBOSE PARALLEL_VERBOSE_2
+export PG_CONNECT GNPIS_LEGACY_URL FAIDARE_URL SOURCE_NAME DD_NODE STATIC_DIR OUTPUT
+
+
+extract_thematic(){
+	local LOCAL_THEMATIC="$1"
+	local SQL_SCRIPT="sql/transplant_gnpis_${LOCAL_THEMATIC}_extraction.sql"
+	
+	local APPLICATION_URL="${GNPIS_LEGACY_URL}"
+	if [ "${LOCAL_THEMATIC}" == "genetic_resources" ]; then
+		APPLICATION_URL="${FAIDARE_URL}"
+	fi
+
+	# check sql file
+	if [ ! -f "${SQL_SCRIPT}" ]; then
+		echo -e "${RED_BOLD}The SQL script (${SQL_SCRIPT}) does not exists.${NC}"
+		exit 2
+	fi
+
+	[ $VERBOSE -ge 2 ] && echo "Sql script is : ${SQL_SCRIPT}"
+	# Execute command to extract data
+	echo_and_eval_cmd "psql -q -w ${PG_CONNECT} -v source_name=${SOURCE_NAME} -v thematic=${LOCAL_THEMATIC} -v application_url=${APPLICATION_URL} -f ${SQL_SCRIPT}"
+}
+export -f extract_thematic
+
+check_extracted_data() {
+	local LOCAL_THEMATIC="$1"
+	local COUNT_STATUS=0
+
+	if [ -n "$(ls -1 -- *.csv)" ]; then
+		for FILE in "gnpis_${LOCAL_THEMATIC}"*.csv; do
+			TYPE=$(echo "$FILE" | sed -r 's/^gnpis_('"${LOCAL_THEMATIC}"'_.+)\.csv$/\1/' 2> >(colorize))
+			
+			COUNT_DATA_DB=$(psql -w ${PG_CONNECT} -tA -v type="${TYPE}" -f "${SQL_FILE}" 2> >(colorize))
+			COUNT_DATA_FILE=$(wc -l "$FILE" 2> >(colorize) | cut -d ' ' -f1 2> >(colorize))
+			
+			if [ "$COUNT_DATA_DB" = "" ]; then
+				echo -e "${ORANGE}Warning: can not check data count for ${TYPE} (SQL query missing in count_extracted_data.sql script)${NC}"
+			elif [ "$COUNT_DATA_DB" != "$COUNT_DATA_FILE" ]; then
+				echo -e "${RED_BOLD}Error: expected ${COUNT_DATA_DB} data but got ${COUNT_DATA_FILE} extracted data for ${TYPE}${NC}"
+				((COUNT_STATUS++))
+			fi
+			
+			if [ "$COUNT_DATA_DB" = "0" ]; then
+				rm "$FILE" 2> >(colorize)
+			else
+				mv "$FILE" "${OUTPUT}"/ 2> >(colorize)
+			fi
+		done
+		if [[ $COUNT_STATUS -ne 0 ]] ; then
+			echo -e "${RED_BOLD}Errors detected, aborting extraction process.${NC}"
+			#exit 1
+		fi
+	else
+		[ $VERBOSE -ge 1 ] && echo -e "${ORANGE}No csv files found...${NC}"
+	fi
+}
+export -f check_extracted_data
+
+enrich_csv(){
+	CSV_FILE="$1"
+	CURRENT_THEMATIC="$2"
+	
+	if [ "${CURRENT_THEMATIC}" == "phenotyping" ]; then
+		[ $VERBOSE -ge 1 ] && echo "Enrich variables..."
+		[ $VERBOSE -ge 2 ] && echo "processing with file : ${CSV_FILE}..."
+		"${CURRENT_DIR}"/variables_enrichment.sh -f "${CSV_FILE}" "${VERBOSE_OPTION}"
+	fi
+}
+export -f enrich_csv
+
+transform_private_url(){
+	# change URL to their private form when first field does not start by a zero (2 pass: 1 => gnpis legacy ; 2 => faidare)
+	# transformed files are written into `${OUTPUT}/privatised` sub-directory
+
+	CSV_FILE="$1"
+	if [[ ! "${FAIDARE_URL}" =~ "https://urgi.versailles.inrae.fr/faidare-" ]] ; then # we have a production URL
+		sed 's/\t/ /g ; s/^"//g ; s/","/\t/g ; s/"$//g' "${CSV_FILE}" | sed -r "s#^([^0].*)(https://urgi.versailles.inrae.fr)(.*)#\1\2/private\3#g ; s#^([^0].*)(https://urgi.versailles.inrae.fr/private/faidare)(.*)#\1https://urgi.versailles.inrae.fr/faidare-private\3#g" > "${OUTPUT}/privatised/$(basename "$CSV_FILE")"
+	else
+		sed 's/\t/ /g ; s/^"//g ; s/","/\t/g ; s/"$//g' "${CSV_FILE}" > "${OUTPUT}/privatised/$(basename "$CSV_FILE")"
+	fi
+	# FAIDARE public/private is only for production env because faidare-int handles the groups via Apache.
+	# In case of int or staging env, the URL given in script parameter should be already handled by Apache withotu further modification.
+}
+export -f transform_private_url
+
+dd_convert_csv_to_json() {
+	[ ! -d  "${OUTPUT}/data_discovery" ] && mkdir "${OUTPUT}/data_discovery"
+	[ $VERBOSE -ge 1 ] && echo "Transform CSV to DataDiscovery JSON..."
+	
+	for LOCAL_THEMATIC in $THEMATICS; do
+		if [ -z "$(echo ${DD_THEMATICS} | grep ${LOCAL_THEMATIC})" ]; then
+            echo -e "${ORANGE}Thematic ${LOCAL_THEMATIC} cannot be transformed in data-discovery format. Ignoring it.${NC}"
+            continue
+        fi
+
+        if [ -n "$(ls -1 ${OUTPUT}/privatised/gnpis_${LOCAL_THEMATIC}*.csv)" ]; then
+			cp ${OUTPUT}/privatised/gnpis_${LOCAL_THEMATIC}*.csv ${OUTPUT}/data_discovery/
+            parallel ${PARALLEL_VERBOSE_2} enrich_csv "{}" "${LOCAL_THEMATIC}" ::: "${OUTPUT}/data_discovery/gnpis_${LOCAL_THEMATIC}*.csv"
+        else
+            [ $VERBOSE -ge 1 ] && echo -e "${ORANGE}No CSV file matching gnpis_${LOCAL_THEMATIC}*.csv found...${NC}"
+            continue
+        fi
+    done
+
+	export HEADER="groupId\tentryType\tdatabaseName\tidentifier\tname\tdescription\turl\tspecies"
+	export VERBOSE
+	[ $VERBOSE -ge 2 ] && echo "Running convert_to_json.sh"
+	"${CURRENT_DIR}"/convert_to_json.sh -data "${OUTPUT}/data_discovery"
+	
+	[ $? -ne 0 ] || echo -e "${GREEN}Transformation finished, DD JSON available into ${OUTPUT}/data_discovery!\n${NC}"
+}
+export -f dd_convert_csv_to_json
+
+xref_convert_csv_to_json() {
+	local LOCAL_THEMATIC="$1"
+	local FILENAME PREFIX CSV_LENGTH SPLIT_VALUE
+	
+	[ ! -d "${OUTPUT}/xref" ] && mkdir "${OUTPUT}/xref"
+	[ $VERBOSE -ge 1 ] && echo "Generate XREF JSON for ${LOCAL_THEMATIC} thematic..."
+
+	if [ -z "$(echo ${XREF_THEMATICS} | grep ${LOCAL_THEMATIC})" ]; then
+		echo -e "${ORANGE}Thematic ${LOCAL_THEMATIC} cannot be transformed in xref format. Ignoring it.${NC}"
+		return 1
+	fi
+
+	if [ ! -n "$(ls -1 ${OUTPUT}/privatised/gnpis_${LOCAL_THEMATIC}*.csv)" ]; then
+		[ $VERBOSE -ge 1 ] && echo -e "${ORANGE}No CSV file matching gnpis_${LOCAL_THEMATIC}*.csv found...${NC}"
+		return 1
+	fi
+
+	for CSV_FILE in "${OUTPUT}/privatised/gnpis_${LOCAL_THEMATIC}"*.csv; do
+		[ $VERBOSE -ge 2 ] && echo "processing with file : ${CSV_FILE}..."
+		PREFIX="xref-${LOCAL_THEMATIC}-"
+		CSV_LENGTH=$(wc -l < "$CSV_FILE")
+		SPLIT_VALUE=10000
+		if [ "${CSV_LENGTH}" -gt "$SPLIT_VALUE" ];then
+			[ $VERBOSE -ge 2 ] && echo "Splitting processus..."
+			split -d -l $SPLIT_VALUE "${CSV_FILE}" "${PREFIX}"
+			for SPLIT_FILE in ${PREFIX}*;
+			do
+				CMD_JQ="jq --slurp --raw-input --raw-output --compact-output -f ${CURRENT_DIR}/map_values_to_json.jq '${SPLIT_FILE}' > ${OUTPUT}/xref/${SPLIT_FILE}.json"
+				echo_and_eval_cmd "${CMD_JQ}"
+			done
+			rm -v ${CURRENT_DIR}/${PREFIX}*
+		else
+			[ $VERBOSE -ge 2 ] && echo "No splitting processus needed..."
+			CMD_JQ="jq --slurp --raw-input --raw-output --compact-output -f ${CURRENT_DIR}/map_values_to_json.jq '${CSV_FILE}' > '${OUTPUT}/xref/${PREFIX}1.json'"
+			echo_and_eval_cmd "${CMD_JQ}"
+		fi
+	done
+
+	[ $? -ne 0 ] || echo -e "${GREEN}Generation finished, ${LOCAL_THEMATIC} XREF JSON available into ${OUTPUT}/xref!\n${NC}"
+}
+export -f xref_convert_csv_to_json
+
+process_thematic() {
+	local LOCAL_THEMATIC="$1"
+
+	if [ "${EXTRACT}" == "TRUE" ] && [ "${LOCAL_THEMATIC}" != "static" ]; then
+		[ $VERBOSE -ge 1 ] && echo "Process ${LOCAL_THEMATIC} thematic..."
+		extract_thematic "${LOCAL_THEMATIC}"
+		[ $VERBOSE -ge 1 ] && echo "Check extracted data..."
+		check_extracted_data "${LOCAL_THEMATIC}"
+	elif [ "${LOCAL_THEMATIC}" = "static" ]; then
+		[ $VERBOSE -ge 1 ] && echo "Process static files..."
+		cp ${STATIC_DIR}/gnpis_*.csv ${OUTPUT}
+	else
+		return 1
+	fi
+	
+	echo -e "${GREEN}Extraction finished for ${LOCAL_THEMATIC} thematic!\n${NC}"
+}
+export -f process_thematic
+
+echo -e "\n${BOLD}Extract data...${NC}"
+parallel -j4 ${PARALLEL_VERBOSE} process_thematic ::: ${THEMATICS}
+
+echo -e "\n${BOLD}Manage private data...${NC}"
+[ ! -d "${OUTPUT}/privatised" ] && mkdir "${OUTPUT}/privatised"
+if [ -n "$(ls -1 ${OUTPUT}/gnpis_*.csv)" ]; then
+	[ $VERBOSE -ge 1 ] && echo "Transform private URL..."
+	parallel ${PARALLEL_VERBOSE_2} transform_private_url ::: "${OUTPUT}/gnpis_"*.csv
+else
+	[ $VERBOSE -ge 1 ] && echo -e "${ORANGE}No CSV file matching gnpis_*.csv found...${NC}"
+fi
+echo -e "${GREEN}Privatisation finished!${NC}"
+
+echo -e "\n${BOLD}Convert CSV to JSON...${NC}"
+dd_convert_csv_to_json
+parallel -j4 ${PARALLEL_VERBOSE} xref_convert_csv_to_json ::: ${THEMATICS}
+
+exit 
diff --git a/etl_gnpis-core_dd/extract_observation_variables.jq b/etl_gnpis-core_dd/extract_observation_variables.jq
new file mode 100644
index 0000000..4b5c0d4
--- /dev/null
+++ b/etl_gnpis-core_dd/extract_observation_variables.jq
@@ -0,0 +1,29 @@
+#!/usr/bin/env jq -Mf
+# Produces: 
+# [
+#   { "BFF:0000001": "BFF:0000001 CH_cm (Canopy Height - Hauteur de canopï¿½e)"},
+#   { "BFF:0000005": "BFF:0000005 HMax_cm (Plant maximum height - Hauteur maximale plante)"}
+# ]
+#
+# Then transform it to CSV format: 
+# BFF:0000001,BFF:0000001 CH_cm (Canopy Height - Hauteur de canopï¿½e)
+# BFF:0000005,BFF:0000005 HMax_cm (Plant maximum height - Hauteur maximale plante)
+[
+    (.  | group_by(.observationVariableDbId|tostring)
+        |.[]
+        |
+        { (.[0].observationVariableDbId|tostring ) : 
+            (.[0].observationVariableDbId + " "  + .[0].name + # use first observationVariableDbId and name as they are the same for each entries grouped by observationVariableDbId
+                " (" +
+                    (
+                        [ .[] | .synonyms? ]    # create an array of all synonyms if present
+                        | add                   # merge all synonyms in a unique array
+                        | select (length > 0)   # continue only if at least have 1 synonym
+                        | join(" - ")           # concat synonyms with a dash
+                    ) 
+                + ")"
+            )
+        }
+    )| to_entries[]
+]
+| .[] | .key + "," + .value # transform to CSV format
\ No newline at end of file
diff --git a/etl_gnpis-core_dd/map_values_to_json.jq b/etl_gnpis-core_dd/map_values_to_json.jq
new file mode 100644
index 0000000..f457189
--- /dev/null
+++ b/etl_gnpis-core_dd/map_values_to_json.jq
@@ -0,0 +1,28 @@
+#!/usr/bin/env jq -Mf
+[ 
+split("\n") | .[] 
+    | select(length>0) # ignore empty lines
+    | split("\t") # produces an array by line with 
+        |
+        {
+            "groupId": .[0]|tonumber,
+            "entryType": .[1],
+            "databaseName": .[2],
+            "identifier": .[3],
+            "name": .[4],
+            "description": .[5],
+            "url": .[6],
+            "species": .[7]|tostring|split("%2C "), # TODO: check that this split is done correctly, I doubt...
+            "linkedResourcesID": 
+                ([
+                    foreach (.[8]? | tostring | split(", ")[]) as $pui
+                    ([[],[]];
+                        if ($pui != null and $pui != "" and $pui != "null") then
+                            ($pui | @base64)
+                        else
+                            empty
+                        end
+                    )
+                ])
+        }
+]
diff --git a/etl_gnpis-core_dd/sql/count_extracted_data.sql b/etl_gnpis-core_dd/sql/count_extracted_data.sql
new file mode 100644
index 0000000..bfe6e9e
--- /dev/null
+++ b/etl_gnpis-core_dd/sql/count_extracted_data.sql
@@ -0,0 +1,83 @@
+-- Example of usage:
+-- psql -h shelob.versailles.inrae.fr -p 9122 -U scratchy -d scratchy -tA -v type=germplasm -v trialId=NULL -f count_extracted_data.sql
+
+select CASE
+	--
+	-- genetique ressources
+	WHEN :'type' = 'genetic_resources_accessions' THEN
+		(select count(distinct accession_id)
+		from accession)
+	--
+	-- association
+	WHEN :'type' = 'association_analyses' THEN
+		(select count(distinct association_analysis_id)
+		from association_analysis)
+	--
+	-- phenotyping
+	WHEN :'type' = 'phenotyping_trials' THEN
+		(select count(distinct trial_id)
+		from trial)
+	--
+	-- genotyping/polymorphism
+	WHEN :'type' = 'genotyping_experiments' THEN
+		(select count(distinct genotyping_experiment_id)
+		from genotyping_experiment)
+	--
+	-- cartography
+	WHEN :'type' = 'mapping_maps' THEN
+		(select count(distinct map_id)
+		from map)
+	WHEN :'type' = 'mapping_mapped_markers' THEN
+		(select count(distinct m.marker_id)
+		from marker m
+		inner join locus l on l.marker_id = m.marker_id)
+	WHEN :'type' = 'mapping_not_mapped_markers' THEN
+		(select count(distinct m.marker_id)
+		from marker m
+		left join locus l on l.marker_id = m.marker_id
+		where l.marker_id is null)
+	WHEN :'type' = 'mapping_qtls' THEN
+		(select count(distinct mappable_elemt_id)
+		from qtl)
+	WHEN :'type' = 'mapping_metaqtls' THEN
+		(select count(distinct mappable_elemt_id)
+		from meta_qtl)
+	--
+	-- sequences
+	WHEN :'type' = 'sequences_ngs_experiments' THEN
+		(select count(distinct experiment_id)
+		from ngs_experiment)
+	WHEN :'type' = 'sequences_ngs_analyses' THEN
+		(select count(distinct analysis_id)
+		from ngs_analysis)
+	--
+	-- synteny
+	--WHEN :'type' = 'synteny_genes' THEN
+		--(select count(distinct ga.GENE_ASSIGNMENT_ID)
+		-- --COUNT(distinct 'SYNTENY_DS_' || d.dataset_id || '_AC_' || ac.ANCESTRAL_CHROMOSOME_NAME || '_' || g.gene_name )
+		-- --distinct ('SYNTENY_DS_' || d.dataset_id || '_AC_' || ac.ANCESTRAL_CHROMOSOME_NAME || '_' || g.gene_name )
+		--FROM GENE_ASSIGNMENT ga 
+		--JOIN GENE g on ga.GENE_ID = g.GENE_ID
+		--JOIN GENE_HOMOLOGY_GROUP ghg ON ghg.GENE_ID = g.GENE_ID
+		--JOIN HOMOLOGY_GROUP hg ON hg.HOMOLOGY_GROUP_ID = ghg.HOMOLOGY_GROUP_ID
+		--JOIN DATASET d ON d.DATASET_ID = hg.DATASET_ID
+		--JOIN ANCESTRAL_GENE ag on hg.ancestral_gene_id=ag.ancestral_gene_id
+		--JOIN ANCESTRAL_CHROMOSOME ac on ac.ANCESTRAL_CHROMOSOME_ID=ag.ANCESTRAL_CHROMOSOME_ID
+		--WHERE d.IS_CURRENT_VERSION='true' 
+		--AND d.DATASET_TYPE_ID=450)
+		-- --AND d.DATASET_ID = 6
+	--
+	-- transcriptome
+	WHEN :'type' = 'transcriptome_experiments' THEN
+		(select count(distinct experiment_id)
+		from experiment)
+	WHEN :'type' = 'transcriptome_genes' THEN
+		(select count(distinct g.gene_id)
+		from gene g 
+		join gene_gene_list ggl on ggl.gene_id = g.gene_id
+		join gene_list gl on gl.gene_list_id = ggl.gene_list_id)
+	WHEN :'type' = 'transcriptome_gene_lists' THEN
+		(select count(distinct gene_list_id)
+		from gene_list)
+	END
+as count;
diff --git a/etl_gnpis-core_dd/sql/transplant_gnpis_association_extraction.sql b/etl_gnpis-core_dd/sql/transplant_gnpis_association_extraction.sql
new file mode 100644
index 0000000..03357bf
--- /dev/null
+++ b/etl_gnpis-core_dd/sql/transplant_gnpis_association_extraction.sql
@@ -0,0 +1,85 @@
+-- ####################################################################
+-- Copyright (C) 2014 INRA-URGI
+-- Author(s): E. Kimmel, R. Flores, D. Charruaud
+-- Created on 2014/07/22
+-- Contact: urgi-contact@versailles.inrae.fr
+-- It is strictly forbidden to transfer, use or re-use this code
+-- or part of it without explicit written authorization from INRA-URGI.
+-- ####################################################################
+
+-- ###################################################################################
+-- SQL script used to extract data for transPLANT indices, gnpis thematic: association
+-- ###################################################################################
+
+\pset format unaligned
+\pset tuples_only
+\pset fieldsep ,
+
+
+-- extract association analyses
+
+\o gnpis_'':thematic''_analyses.csv
+
+SELECT DISTINCT
+  '"' || CAST(A.group_id as VARCHAR(3)) || '"' AS group_id,
+  '"GWAS analysis"' AS entry_type,
+  '"' ||:'source_name'|| '"' AS database_name,
+  '"' || CONCAT('GWAS_ANALYSIS_' , A.ASSOCIATION_ANALYSIS_ID) || '_' || A.ANALYSIS_NAME || '"' AS identifier,
+  '"' || REPLACE(A.ANALYSIS_NAME, '"', '''') ||'"' AS name,
+  '"' ||
+    CONCAT(REPLACE(A.ANALYSIS_NAME, '"', ''''), ' is a GWAS analysis',
+      CASE WHEN OT.NAME_EN IS NOT NULL AND OT.NAME_EN != '' THEN 
+        ' related to ' || OT.NAME_EN 
+      END,
+      CASE WHEN OT.DEFINITION_EN IS NOT NULL AND OT.DEFINITION_EN != '' THEN 
+        ' (' || OT.DEFINITION_EN || ')'
+      END,
+      ' involving ' , DNA.PANEL_NAME, ' panel, in the scope of ', GE.EXPERIMENT_NAME, ' experiment', 
+      -- ' involving ' , P.PANEL_NAME, ' panel, in the scope of ', GE.EXPERIMENT_NAME, ' experiment', 
+      CASE WHEN GE.DESCRIPTION IS NOT NULL AND GE.DESCRIPTION != '' THEN 
+        ' which is described as: ''' || GE.DESCRIPTION || '''' END,
+      CASE WHEN G.GENOME_NAME IS NOT NULL AND G.GENOME_NAME != '' THEN
+        ' on genome ' || G.GENOME_NAME || ' (' || T.SCIENTIFIC_NAME || ')' END
+     , '. Phenotyping campaign: ' , PC.NAME ,
+      ' markers :', string_agg(DISTINCT(DNA.MARKER_NAME), ' , '),
+      '  and traits: ', string_agg(DISTINCT(DNA.TRAIT_NAME), ' , ')
+     ) || '."' AS description,
+  '"' || CONCAT(:'application_url', '/association/association/viewer.do#results/analysisIds=', A.ASSOCIATION_ANALYSIS_ID) || '"' AS url,
+  '"' || CASE WHEN taxons IS NULL THEN '' ELSE taxons END || '"' AS species,
+  '"' || nullif(concat_ws(', ',
+      CASE WHEN encoded_puids IS NOT NULL THEN encoded_puids END,
+      CASE WHEN tr.trial_number IS NOT NULL THEN ('urn:URGI/study/'||tr.trial_number)::text END,
+      CASE WHEN tr.site_id IS NOT NULL THEN ('urn:URGI/location/'||tr.site_id)::text END
+  ), '') || '"' AS linkedRessourcesID
+FROM ASSOCIATION_ANALYSIS A
+  JOIN OBSERVATION_VARIABLE OV ON OV.OBSERVATION_VARIABLE_ID = A.VARIABLE_ID
+  LEFT JOIN ONTOLOGY_TERM OT ON OT.ONTOLOGY_TERM_ID = OV.DESCRIPTOR_ID
+  LEFT JOIN PHENOTYPING_CAMPAIGN PC ON PC.PHENOTYPING_CAMPAIGN_ID = A.PHENOTYPING_CAMPAIGN_ID
+  LEFT JOIN TRIAL TR ON TR.TRIAL_ID = PC.TRIAL_ID
+  JOIN GWAS_EXPERIMENT GE ON GE.GWAS_EXPERIMENT_ID = A.GWAS_EXPERIMENT_ID
+  -- LEFT JOIN PANEL P ON P.PANEL_ID = GE.PANEL_ID
+  LEFT JOIN TREATMENT_FACTOR TF ON TF.TREATMENT_FACTOR_ID = A.TREATMENT_FACTOR_ID
+  LEFT JOIN GENOME G ON G.GENOME_ID = A.GENOME_VERSION_ID
+  LEFT JOIN TAXON T ON T.TAXON_ID = G.TAXON_ID
+  LEFT JOIN DN_ASSOCIATION DNA ON DNA.ASSOCIATION_ANALYSIS_ID = A.ASSOCIATION_ANALYSIS_ID
+  LEFT JOIN(
+    SELECT DISTINCT GE.GWAS_EXPERIMENT_ID AS ge_id,
+        string_agg(
+        distinct(
+            CASE WHEN a.puid like 'gnpis_pui%' then
+                'urn:URGI/' ||(replace(a.puid, ':', '%3A'))
+            ELSE
+                a.puid
+            END
+        )
+        , ', '
+        ) AS encoded_puids,
+      string_agg(distinct(t.SCIENTIFIC_NAME), ', ') AS taxons
+    FROM GWAS_EXPERIMENT GE
+    LEFT JOIN PANEL_LOT pl ON pl.PANEL_ID = GE.PANEL_ID
+    LEFT JOIN LOT l ON l.LOT_ID = pl.LOT_ID
+    LEFT JOIN ACCESSION a ON a.ACCESSION_ID = l.ACCESSION_ID
+    JOIN TAXON t on t.TAXON_ID = a.TAXON_ID
+    GROUP BY GE.GWAS_EXPERIMENT_ID) AS PUID_ACCESSIONS ON PUID_ACCESSIONS.ge_id = GE.GWAS_EXPERIMENT_ID
+  group by GE.GWAS_EXPERIMENT_ID, GE.EXPERIMENT_NAME, A.GROUP_ID, PUID_ACCESSIONS.ENCODED_PUIDS, A.ANALYSIS_NAME, A.ASSOCIATION_ANALYSIS_ID, OT.NAME_EN, OT.DEFINITION_EN, DNA.PANEL_NAME, G.GENOME_NAME, GE.DESCRIPTION, PC.NAME, TR.SITE_ID, TR.TRIAL_NUMBER, T.SCIENTIFIC_NAME, PUID_ACCESSIONS.TAXONS
+ORDER BY identifier;
diff --git a/etl_gnpis-core_dd/sql/transplant_gnpis_genetic_resources_extraction.sql b/etl_gnpis-core_dd/sql/transplant_gnpis_genetic_resources_extraction.sql
new file mode 100644
index 0000000..6488536
--- /dev/null
+++ b/etl_gnpis-core_dd/sql/transplant_gnpis_genetic_resources_extraction.sql
@@ -0,0 +1,146 @@
+-- ####################################################################
+-- Copyright (C) 2014 INRA-URGI
+-- Author(s): R. Flores, D. Charruaud, E. Kimmel
+-- Created on 2014/12/08
+-- Contact: urgi-contact@versailles.inrae.fr
+-- It is strictly forbidden to transfer, use or re-use this code
+-- or part of it without explicit written authorization from INRA-URGI.
+-- ####################################################################
+
+-- #########################################################################################
+-- SQL script used to extract data for transPLANT indices, gnpis thematic: genetic resources
+-- #########################################################################################
+
+\pset format unaligned
+\pset tuples_only
+\pset fieldsep ,
+
+-- extract ACCESSION
+
+\o gnpis_'':thematic''_accessions.csv
+
+SELECT DISTINCT
+  '"' || CAST(A.group_id as VARCHAR(3)) || '"' AS group_id,
+  '"Germplasm"' AS entry_type,
+  '"' || :'source_name' || '"' AS database_name,
+  '"' || a.puid || '"' AS identifier,
+  '"' || replace(a.accession_name, '"', '''') || ' (' || a.accession_number || ')"' AS name,
+  '"' || CONCAT(
+      replace(a.accession_name, '"', '''') ,
+      ' is a ' || t.scientific_name ,
+      ' accession (number: ' || accession_number || ')',
+      CASE WHEN status.name_en IS NOT NULL THEN 
+        ' (status: ' || status.name_en || ')' END ,
+      CASE WHEN grc.grc_code IS NOT NULL THEN 
+        ' maintained by the ' || lower(grc.grc_code) || ' BRC (managed by ' || grci.organization || ')' END ,
+      CASE WHEN hi.organization IS NOT NULL THEN 
+        ', held by ' || hi.organization END ,
+      CASE WHEN di.organization IS NOT NULL THEN 
+        ', given by ' || di.organization ||
+        CASE WHEN a.donation_date IS NOT NULL THEN 
+          ' on ' || a.donation_date END
+      END ,
+      CASE WHEN ci.organization IS NOT NULL THEN 
+        ', collected by ' || ci.organization ||
+        CASE WHEN a.collecting_date IS NOT NULL THEN
+          ' on ' || a.collecting_date END
+      END ,
+      CASE WHEN acc_coll.collections IS NOT NULL THEN 
+        '. This accession is part of collection(s): ' || acc_coll.collections END ,
+      CASE WHEN agg_taxon_common_names.taxon_synonym_names IS NOT NULL THEN
+        '. Its taxon is also known as: ' || agg_taxon_common_names.taxon_synonym_names END ,
+      CASE WHEN agg_taxon_synonym.taxon_synonym_names IS NOT NULL THEN 
+        '. This taxon has also some synonym(s): ' || agg_taxon_synonym.taxon_synonym_names END ,
+      CASE WHEN synonym_names IS NOT NULL THEN 
+        '. This accession has also some synonym(s): ' || synonym_names END
+    ) ||'"' AS description,
+  '"' || CONCAT(:'application_url', '/germplasm?pui=', a.puid) || '"' AS url,
+  '"' || t.scientific_name || '"' AS species,
+  '"' || concat_ws(', ',
+      CASE WHEN acc_sites.encoded_sites IS NOT NULL AND acc_sites.encoded_sites != '' THEN acc_sites.encoded_sites END,
+      CASE WHEN acc_trial.encoded_trials_sites IS NOT NULL AND acc_trial.encoded_trials_sites != '' THEN acc_trial.encoded_trials_sites END,
+      CASE WHEN acc_trial.encoded_trials IS NOT NULL AND acc_trial.encoded_trials != '' THEN acc_trial.encoded_trials END,
+      CASE WHEN acc_geno.encoded_genotypings IS NOT NULL AND acc_geno.encoded_genotypings != '' THEN acc_geno.encoded_genotypings END
+  ) || '"' AS linkedRessourcesID
+FROM accession a
+JOIN taxon t ON a.taxon_id = t.taxon_id
+-- aggregates sites in one line
+LEFT JOIN
+  (select accession_id as aid, 
+    concat_ws(', ', 
+      ('urn:URGI/location/'||site_id)::text,
+      ('urn:URGI/location/'||origin_site_id)::text
+    ) AS encoded_sites
+  from accession
+  ) as acc_sites on acc_sites.aid = a.accession_id
+LEFT JOIN institution hi ON a.holding_institution_id = hi.institution_id
+LEFT JOIN institution bi ON a.breeder_institution_id = bi.institution_id
+LEFT JOIN institution di ON a.donor_institution_id = di.institution_id
+LEFT JOIN institution ci ON a.collector_institution_id = ci.institution_id
+LEFT JOIN ontology_term status ON a.presence_status_id = status.ontology_term_id
+LEFT JOIN grc grc ON grc.grc_id = a.grc_id
+LEFT JOIN institution grci ON grc.managing_institution_id = grci.institution_id
+-- aggregates accession's collections in one line
+LEFT JOIN (
+  SELECT
+  a.accession_id AS aid, string_agg(distinct(tr.translated_name), ', ') AS collections
+  FROM accession a
+  LEFT JOIN accession_collection ac ON ac.accession_id = a.accession_id
+  LEFT JOIN collections coll ON coll.collection_id = ac.collection_id
+  LEFT JOIN translations tr ON tr.named_collection_id = coll.collection_id
+  WHERE tr.language_id = (SELECT language_id FROM languages WHERE language_code = 'en')
+  GROUP BY a.accession_id
+  ORDER BY a.accession_id
+) AS acc_coll ON acc_coll.aid = a.accession_id
+-- aggregates accession's synonyms in one line
+LEFT JOIN (
+  SELECT acc.accession_id AS aids, string_agg(distinct(accsyn.accession_synonym_name), ', ') AS synonym_names
+  FROM accession acc
+  JOIN accession_synonym accsyn on accsyn.accession_id = acc.accession_id
+  GROUP BY acc.accession_id) AS acc_synonyms ON a.accession_id = acc_synonyms.aids
+-- aggregates taxon's common names in one line
+LEFT JOIN
+  (select taxon_id as t_id, string_agg(distinct(ta_synonym_name), ', ') as taxon_synonym_names
+  from (
+    select distinct ta.taxon_id as taxon_id, ts.taxon_synonym_name as ta_synonym_name
+    from taxon ta 
+    join taxon_synonym_taxon tst on tst.taxons_id = ta.taxon_id
+    join taxon_synonym ts on ts.taxon_synonym_id = tst.taxon_synonyms_id
+    join ontology_term ot ON ot.ontology_term_id = ts.name_type_id
+    where ot.textual_code != 'SCIENTIFIC') as t_id_t_synonym
+    group by t_id_t_synonym.taxon_id
+  ) as agg_taxon_common_names on agg_taxon_common_names.t_id = t.taxon_id
+-- aggregates taxon's synonyms in one line
+LEFT JOIN
+  (select taxon_id as t_id, string_agg(distinct(ta_synonym_name), ', ') as taxon_synonym_names
+  from (
+    select distinct ta.taxon_id as taxon_id, ts.taxon_synonym_name as ta_synonym_name
+    from taxon ta 
+    join taxon_synonym_taxon tst on tst.taxons_id = ta.taxon_id
+    join taxon_synonym ts on ts.taxon_synonym_id = tst.taxon_synonyms_id
+    join ontology_term ot ON ot.ontology_term_id = ts.name_type_id
+    where ot.textual_code = 'SCIENTIFIC') as t_id_t_synonym
+    group by t_id_t_synonym.taxon_id
+  ) as agg_taxon_synonym on agg_taxon_synonym.t_id = t.taxon_id
+-- aggregates trials in one line
+LEFT JOIN (
+  SELECT a.accession_id AS aid, string_agg(distinct(('urn:URGI/study/'||trial.trial_number)::text), ', ') AS encoded_trials, 
+  string_agg(distinct(('urn:URGI/location/'||site.site_id)::text), ', ') AS encoded_trials_sites
+  FROM accession a
+  LEFT JOIN lot l ON l.accession_id = a.accession_id
+  LEFT JOIN trial_lot tl ON tl.lots_id = l.lot_id
+  LEFT JOIN trial trial ON trial.trial_id = tl.trials_id
+  LEFT JOIN site site ON site.site_id = trial.site_id
+  GROUP BY a.accession_id
+) AS acc_trial ON acc_trial.aid = a.accession_id
+-- aggregates genotypings in one line
+LEFT JOIN (
+  SELECT a.accession_id AS aid, string_agg(distinct(('urn:URGI/study/'||ge.genotyping_experiment_id)::text), ', ') AS encoded_genotypings
+  FROM accession a
+  LEFT JOIN lot l ON l.accession_id = a.accession_id
+  LEFT JOIN genotyping_exp_lot gel ON gel.lot_id = l.lot_id
+  LEFT JOIN genotyping_experiment ge ON ge.genotyping_experiment_id = gel.genotyping_experiment_id
+  GROUP BY a.accession_id
+) AS acc_geno ON acc_geno.aid = a.accession_id
+
+ORDER BY identifier;
diff --git a/etl_gnpis-core_dd/sql/transplant_gnpis_genotyping_extraction.sql b/etl_gnpis-core_dd/sql/transplant_gnpis_genotyping_extraction.sql
new file mode 100644
index 0000000..f29d332
--- /dev/null
+++ b/etl_gnpis-core_dd/sql/transplant_gnpis_genotyping_extraction.sql
@@ -0,0 +1,103 @@
+-- ####################################################################
+-- Copyright (C) 2014 INRA-URGI
+-- Author(s): E. Kimmel, D. Charruaud
+-- Created on 2014/12/05
+-- Contact: urgi-contact@versailles.inrae.fr
+-- It is strictly forbidden to transfer, use or re-use this code
+-- or part of it without explicit written authorization from INRA-URGI.
+-- ####################################################################
+
+-- ##################################################################################
+-- SQL script used to extract data for transPLANT indices, gnpis thematic: genotyping
+-- ##################################################################################
+
+\pset format unaligned
+\pset tuples_only
+\pset fieldsep ,
+
+-- extracting GENOTYPING_EXPERIMENT
+
+\o gnpis_'':thematic''_experiments.csv
+
+SELECT DISTINCT
+  '"' || CAST(ge.group_id as VARCHAR(3)) || '"' AS group_id,
+  '"Genotyping Study"' AS entry_type,
+  '"' ||:'source_name'|| '"' AS database_name,
+  '"' ||  ge.GENOTYPING_EXPERIMENT_NAME || ge.GENOTYPING_EXPERIMENT_ID || '"' AS identifier,
+  '"' || REPLACE(
+    ge.GENOTYPING_EXPERIMENT_NAME, '"', ''''
+    ) ||'"' AS name,
+  '"' || 
+    CONCAT(
+      replace(ge.GENOTYPING_EXPERIMENT_NAME, '"', '''') ,
+      ' is an experiment (type: ' , bt.name, ')' ,
+      CASE WHEN scientific_names IS NULL THEN ', ' 
+        ELSE ' ran using samples of ' || scientific_names || ', ' END ,
+      ' based on the marker set ', ms.MARKER_SET_NAME, '.',
+      CASE WHEN ge.PANEL_ID IS NULL THEN ''
+        ELSE ' The panel used is ' || p.PANEL_NAME || '.' 
+      END ,
+      CASE WHEN ge.PROJECT_ID IS NULL THEN ''
+        ELSE ' This experiment belongs to the scientific project ' || pr.PROJECT_CODE || ''
+      END,
+      CASE WHEN h.HARDWARE_ID IS NULL OR h.HARDWARE_NAME = 'unknown' THEN '.'
+        ELSE ' and use the hardware ' || h.HARDWARE_NAME || ', model ' || h.MODEL || '.'
+      END,
+      CASE WHEN acc_names IS NULL THEN ''
+        ELSE ' Accession names: ' || acc_names || '.'
+      END,
+      CASE WHEN acc_numbers IS NULL THEN ''
+        ELSE ' Accession number: ' || acc_numbers || '.'
+      END,
+      CASE WHEN acc_synonyms IS NULL THEN ''
+        ELSE ' Accession synonyms: ' || acc_synonyms ||  '.'
+      END
+    ) ||'"' AS description,
+  '"' || CONCAT(:'application_url', '/GnpSNP/snp/genotyping/form.do#results/experimentIds=', ge.GENOTYPING_EXPERIMENT_ID) || '"' AS url,
+  '"' || CASE WHEN scientific_names IS NULL THEN '' ELSE scientific_names END || '"' AS species,
+  '"' || CASE WHEN encoded_puids IS NULL THEN '' ELSE encoded_puids END || '"' AS linkedRessourcesID
+FROM
+  GENOTYPING_EXPERIMENT ge
+  LEFT JOIN PANEL p ON p.PANEL_ID = ge.PANEL_ID
+  JOIN MARKER_SET ms ON ms.MARKER_SET_ID = ge.MARKER_SET_ID
+  LEFT JOIN PROJECT pr ON pr.PROJECT_ID = ge.PROJECT_ID
+  LEFT JOIN PROTOCOL pro ON pro.PROTOCOL_ID = ge.PROTOCOL_ID
+  LEFT JOIN HARDWARE h ON h.HARDWARE_ID = pro.HARDWARE_ID
+  JOIN BIO_TYPE bt ON bt.BIO_TYPE_ID = ge.GENOTYPING_TYPE_ID
+  LEFT JOIN GENOTYPING_EXP_LOT gel ON gel.GENOTYPING_EXPERIMENT_ID = ge.GENOTYPING_EXPERIMENT_ID
+  LEFT JOIN LOT l ON l.LOT_ID = gel.LOT_ID
+  LEFT JOIN ACCESSION acc ON acc.ACCESSION_ID = l.ACCESSION_ID
+  LEFT JOIN TAXON tax ON tax.TAXON_ID = acc.TAXON_ID
+  INNER JOIN 
+  (SELECT
+      GENOTYPING_EXPERIMENT_ID AS ge_id,
+      string_agg(distinct(acc_name), ', ') AS acc_names,
+      string_agg(distinct(acc_number), ', ') AS acc_numbers,
+      string_agg(distinct(acc_synonym), ', ') AS acc_synonyms,
+      string_agg(distinct(scientific_name), ', ') AS scientific_names,
+      string_agg(
+        distinct(
+            CASE WHEN acc_puid like 'gnpis_pui%' then
+                'urn:URGI/' ||(replace(acc_puid, ':', '%3A'))
+            ELSE
+                acc_puid
+            END
+        )
+        , ', '
+      ) AS encoded_puids
+    FROM
+      (SELECT DISTINCT
+          ge.GENOTYPING_EXPERIMENT_ID AS GENOTYPING_EXPERIMENT_ID,
+          a.ACCESSION_NAME AS acc_name,
+          a.ACCESSION_NUMBER AS acc_number,
+          acs.ACCESSION_SYNONYM_NAME AS acc_synonym,
+          tax.SCIENTIFIC_NAME AS scientific_name,
+          a.PUID AS acc_puid
+        FROM GENOTYPING_EXPERIMENT ge
+        LEFT JOIN GENOTYPING_EXP_LOT gel ON gel.GENOTYPING_EXPERIMENT_ID = ge.GENOTYPING_EXPERIMENT_ID
+        LEFT JOIN LOT l ON l.LOT_ID = gel.LOT_ID
+        LEFT JOIN ACCESSION a ON a.ACCESSION_ID = l.ACCESSION_ID
+        LEFT JOIN ACCESSION_SYNONYM acs ON acs.ACCESSION_ID = a.ACCESSION_ID
+        LEFT JOIN TAXON tax ON tax.TAXON_ID = a.TAXON_ID) AS GEXP_ID_W_ACCESSIONS
+    GROUP BY GEXP_ID_W_ACCESSIONS.GENOTYPING_EXPERIMENT_ID) AS DISTINCT_AGG_ACCESSIONS ON DISTINCT_AGG_ACCESSIONS.ge_id = ge.GENOTYPING_EXPERIMENT_ID
+ORDER BY identifier;
diff --git a/etl_gnpis-core_dd/sql/transplant_gnpis_mapping_extraction.sql b/etl_gnpis-core_dd/sql/transplant_gnpis_mapping_extraction.sql
new file mode 100644
index 0000000..a1a00a9
--- /dev/null
+++ b/etl_gnpis-core_dd/sql/transplant_gnpis_mapping_extraction.sql
@@ -0,0 +1,277 @@
+-- ####################################################################
+-- Copyright (C) 2014 INRA-URGI
+-- Author(s): E. Kimmel, R. Flores, D. Charruaud
+-- Created on 2014/07/22
+-- Contact: urgi-contact@versailles.inrae.fr
+-- It is strictly forbidden to transfer, use or re-use this code
+-- or part of it without explicit written authorization from INRA-URGI.
+-- ####################################################################
+
+-- ###############################################################################
+-- SQL script used to extract data for transPLANT indices, gnpis thematic: mapping
+-- ###############################################################################
+
+\pset format unaligned
+\pset tuples_only
+\pset fieldsep ,
+
+-- extract MAPS
+
+\o gnpis_'':thematic''_maps.csv
+
+select distinct
+  '"' || CAST(m.group_id as VARCHAR(3)) || '"' AS group_id,
+  '"Genetic map"' AS entry_type,
+  '"' ||:'source_name'|| '"' AS database_name,
+  QUOTE_IDENT(CONCAT('GENETIC_MAP_' , MAP_ID,'_',MAP_NAME)) AS identifier,
+  '"' || replace(m.map_name, '"', '''') || '"' AS name,
+  QUOTE_IDENT( 
+    CONCAT(
+      MAP_NAME , ' is a ' ,
+      CASE (m.IS_CONSENSUS) 
+        WHEN 0 THEN ''
+        WHEN 1 THEN 'consensus '
+      END ,
+      CASE WHEN t.SCIENTIFIC_NAME IS NULL THEN ''
+        ELSE t.SCIENTIFIC_NAME || ' ' END ,
+      CASE WHEN bt.NAME IS NULL THEN ' map'
+        ELSE bt.NAME || ' map' END ,
+      ' created on ', CAST(m.map_date AS DATE),
+      ' involving population ' , pop.POPULATION_NAME ,
+      CASE WHEN pop.POPULATION_AUTHOR IS NULL THEN ''
+        ELSE ' (authored by ' || pop.POPULATION_AUTHOR || ')' END,
+      CASE WHEN m.UNIT IS NULL OR m.UNIT = '' THEN ''
+        ELSE '. Its unit is: ' || m.UNIT END ,
+      '. Map contact is ' , c.FIRST_NAME, ' ' , c.LAST_NAME , ' from ' , i.INSTITUTION_NAME , 
+      CASE WHEN i.ORGANIZATION IS NULL OR i.ORGANIZATION = '' THEN '.'
+        ELSE ', ' || i.ORGANIZATION || '.' END
+    )
+  ) AS DESCRIPTION,
+  QUOTE_IDENT(CONCAT(:'application_url','/GnpMap/mapping/id.do?action=MAP&id=', MAP_ID)) AS url,
+  QUOTE_IDENT(t.SCIENTIFIC_NAME) AS species,
+  '""' AS linkedRessourcesID
+FROM map m
+  JOIN taxon t ON m.taxon_id = t.taxon_id
+  JOIN bio_type bt ON bt.BIO_TYPE_ID = m.BIO_TYPE_ID
+  JOIN population pop ON m.POPULATION_ID = pop.POPULATION_ID
+  JOIN contact c ON c.CONTACT_ID = m.CONTACT_ID
+  JOIN institution i ON i.INSTITUTION_ID = c.INSTITUTION_ID
+ORDER BY identifier;
+
+-- extract QTL
+
+\o gnpis_'':thematic''_qtls.csv
+
+SELECT DISTINCT
+  '"' || CAST(q.group_id as VARCHAR(3)) || '"' AS group_id,
+  '"QTL"' AS entry_type,
+  '"' ||:'source_name'|| '"' AS database_name,
+  QUOTE_IDENT(CONCAT('QTL_', q.MAPPABLE_ELEMT_ID,'_', q.QTL_NAME)) AS identifier,
+  '"' || replace(q.QTL_NAME, '"', '''') ||'"' AS name,
+  QUOTE_IDENT( 
+    CONCAT(
+      q.QTL_NAME , ' is a ', t.trait_name, ' QTL which has been detected on ', qd.QTL_DETEC_DATE ,
+      CASE WHEN qd.METHOD IS NOT NULL AND qd.METHOD != '' THEN
+        ', using method ' || qd.METHOD || 
+        CASE (lower(qd.PARAMETER)) WHEN 'unknown' THEN ''
+          ELSE ' with parameter(s) ' || qd.PARAMETER END
+      END,
+      '. This QTL is mapped on ', map_names, ' map(s) (', taxon_names, ').'
+    )
+  ) AS description,
+  QUOTE_IDENT(CONCAT(:'application_url','/GnpMap/mapping/id.do?action=QTL&id=',q.MAPPABLE_ELEMT_ID)) AS url,
+  QUOTE_IDENT(CASE WHEN taxon_names IS NOT NULL THEN taxon_names ELSE '' END) AS species,
+  '""' AS linkedRessourcesID
+FROM QTL q
+JOIN QTL_DETECTION qd ON q.QTL_DETEC_ID = qd.QTL_DETEC_ID
+JOIN ASSIGNMENT a ON a.MAPPABLE_ELEMT_ID = q.MAPPABLE_ELEMT_ID
+JOIN MEASURE mea ON mea.MEASURE_ID = qd.MEASURE_ID
+JOIN TRAIT t ON mea.TRAIT_ID = t.TRAIT_ID
+left join
+(select qtl.MAPPABLE_ELEMT_ID as qtl_id, 
+  string_agg(distinct(map.MAP_NAME), ', ') as map_names, 
+  string_agg(distinct(tax.SCIENTIFIC_NAME), ',') as taxon_names
+  from qtl qtl
+  JOIN assignment ass ON ass.MAPPABLE_ELEMT_ID = qtl.MAPPABLE_ELEMT_ID
+  JOIN map map on map.MAP_ID = ass.MAP_ID
+  JOIN taxon tax on tax.TAXON_ID = map.TAXON_ID
+  group by qtl.MAPPABLE_ELEMT_ID) as qtl_map on qtl_map.QTL_ID = q.MAPPABLE_ELEMT_ID
+WHERE a.IS_QTL = 'yes'
+ORDER BY identifier;
+
+-- extract MetaQTL
+
+\o gnpis_'':thematic''_metaqtls.csv
+
+SELECT DISTINCT
+  '"' || CAST(mq.group_id as VARCHAR(3)) || '"' AS group_id,
+  '"QTL"' AS entry_type,
+  '"' ||:'source_name'|| '"' AS database_name,
+  QUOTE_IDENT(CONCAT('META_QTL_', mq.MAPPABLE_ELEMT_ID,'_', mq.META_QTL_NAME)) AS identifier,
+  '"' || replace(mq.META_QTL_NAME, '"', '''') ||'"' AS name,
+  QUOTE_IDENT(CONCAT( 
+    mq.META_QTL_NAME, ' is a ', t.TRAIT_NAME, 
+    ' MetaQTL found from the meta-analysis: ', ma.META_ANALYSIS_NAME,
+    ' with the ', ma.META_ANALYSIS_METHOD, ' method.',
+    ' This MetaQTL is mapped on ', tax.SCIENTIFIC_NAME, ' ', m.MAP_NAME, ' map.')
+  ) AS description,
+  QUOTE_IDENT(CONCAT(:'application_url','/GnpMap/mapping/card.do?&dbName=mapping&className=metaqtl.MetaQtlImpl&id=', mq.MAPPABLE_ELEMT_ID)) AS url,
+  QUOTE_IDENT(tax.SCIENTIFIC_NAME) AS species,
+  '""' AS linkedRessourcesID
+FROM META_QTL mq
+JOIN META_ANALYSIS ma ON mq.META_ANALYSIS_ID = ma.META_ANALYSIS_ID
+JOIN ASSIGNMENT a ON a.MAPPABLE_ELEMT_ID = mq.MAPPABLE_ELEMT_ID
+JOIN MAP m ON m.MAP_ID = a.MAP_ID
+JOIN TAXON tax ON tax.TAXON_ID = m.TAXON_ID
+JOIN TRAIT t ON mq.TRAIT_ID = t.TRAIT_ID
+WHERE a.IS_META_QTL = 'yes'
+ORDER BY identifier;
+
+-- extract MAPPED MARKERS 
+
+\o gnpis_'':thematic''_mapped_markers.csv
+
+SELECT DISTINCT
+  '"' || CAST(m.group_id as VARCHAR(3)) || '"' AS group_id,
+  '"Marker"' AS entry_type,
+  '"' ||:'source_name'|| '"' AS database_name,
+  QUOTE_IDENT(CONCAT('MARKER_', m.MARKER_ID,'_', m.MARKER_NAME)) AS identifier,
+  '"' || replace(m.MARKER_NAME, '"', '''') ||'"' AS name,
+  QUOTE_IDENT(
+    CONCAT(
+      MARKER_NAME , ' is a' ,
+      CASE (lower(bt.NAME)) WHEN 'unknown' THEN ''
+        ELSE ' ' || bt.NAME END ,
+        ' mapped marker from taxon ', t.SCIENTIFIC_NAME,
+      CASE WHEN agg_marker_synonym.MARKER_SYNONYM_NAMES IS NOT NULL THEN 
+        '. This marker has some synonyms: ' || agg_marker_synonym.MARKER_SYNONYM_NAMES END ,
+      '. Its locus is/are: ', locus_names,
+      CASE WHEN positions IS NOT NULL THEN
+        ' at positions ' || positions END ,
+      CASE WHEN map_names IS NOT NULL THEN 
+        ' on map(s): ' || map_names END ,
+      CASE WHEN m.GENE_FUNCTION IS NULL OR m.GENE_FUNCTION = '' THEN ''
+        ELSE 
+          CASE (lower(m.GENE_FUNCTION)) WHEN 'unknown' THEN ''
+            ELSE '. Its gene function is: ' || m.GENE_FUNCTION
+          END
+        END ,
+      CASE WHEN m.CONTIG_NAME IS NULL OR m.CONTIG_NAME = '' THEN ''
+        ELSE '. Its contig name is: ' || m.CONTIG_NAME END ,
+      CASE WHEN m.INSERT_LENGTH IS NULL THEN ''
+        ELSE '. Its insert length is: ' || m.INSERT_LENGTH END ,
+      CASE WHEN m.REVERSE_PRIMER IS NULL OR m.REVERSE_PRIMER = '' THEN ''
+        ELSE '. Reverse primer: ' || m.REVERSE_PRIMER END ,
+      CASE WHEN m.FORWARD_PRIMER IS NULL OR m.FORWARD_PRIMER = '' THEN ''
+        ELSE '. Forward primer: ' || m.FORWARD_PRIMER END ,
+      -- gestion des marqueurs Kaspar (Don't remove for FD, Don't uncomment for URGI)
+      -- CASE WHEN sequence_names IS NOT NULL OR sequence_names != '' THEN 
+      --  '. Sequence name(s): ' || sequence_names END ,
+      CASE WHEN m.SHORT_REMARK IS NULL OR m.SHORT_REMARK = '' THEN ''
+        -- remove all quotes in user's remarks
+        ELSE '. Short remark linked: ''' || REPLACE(m.SHORT_REMARK, '"', '') || '''' END
+    ) 
+  ) AS description,
+  QUOTE_IDENT(CONCAT(:'application_url','/GnpMap/mapping/id.do?dbName=mapping&action=MARKER&className=MarkerImpl&id=', m.MARKER_ID)) AS url,
+  QUOTE_IDENT(t.SCIENTIFIC_NAME) AS species,
+  '""' AS linkedRessourcesID
+FROM marker m
+JOIN taxon t ON m.TAXON_ID = t.TAXON_ID
+JOIN bio_type bt ON bt.BIO_TYPE_ID = m.BIO_TYPE_ID
+INNER JOIN locus l ON l.MARKER_ID = m.MARKER_ID
+LEFT JOIN
+(select marker_id as m_id, string_agg(distinct(ma_synonym_name), ', ') as marker_synonym_names
+from (     
+select distinct ma.MARKER_ID as marker_id, ms.MARKER_SYNONYM_NAME as ma_synonym_name
+from marker ma 
+join marker_synonym_marker msm on msm.MARKER_ID = ma.MARKER_ID
+join marker_synonym ms on ms.MARKER_SYNONYM_ID = msm.MARKER_SYNONYM_ID) as m_id_m_synonym
+group by m_id_m_synonym.MARKER_ID) as agg_marker_synonym on agg_marker_synonym.M_ID = m.MARKER_ID
+-- gestion des marqueurs Kaspar (Don't remove for FD, Don't uncomment for URGI)
+-- LEFT JOIN
+-- (select marker_id as mid, string_agg(distinct(sequence_name), ', ') as sequence_names
+-- from (
+-- select distinct ma.marker_id as marker_id, mseq.sequence_name as sequence_name
+-- from marker ma
+-- join marker_sequence mseq on ma.MARKER_ID = mseq.MARKER_ID) as seq_m_id
+-- group by seq_m_id.marker_id) as marker_seq on marker_seq.mid = m.marker_id
+left join
+(select ma.MARKER_ID as ma_id, 
+  string_agg(distinct(map.MAP_NAME), ', ') as map_names, 
+  string_agg(distinct(l.LOCUS_NAME), ', ') as locus_names, 
+  string_agg(CAST(pa.ABS_DISTANCE AS VARCHAR(6)), ', ') as positions
+  from marker ma
+  INNER JOIN locus l ON l.MARKER_ID = ma.MARKER_ID
+  JOIN mappable_element me ON me.MAPPABLE_ELEMT_ID = l.MAPPABLE_ELEMT_ID
+  LEFT JOIN assignment ass ON ass.MAPPABLE_ELEMT_ID = me.MAPPABLE_ELEMT_ID
+  LEFT JOIN point_assignment pa ON pa.ASSIGNMENT_ID = ass.ASSIGNMENT_ID
+  LEFT JOIN map map on map.MAP_ID = ass.MAP_ID
+  group by ma.MARKER_ID) AS marker_map ON marker_map.ma_id = m.MARKER_ID
+ORDER BY identifier;
+
+
+-- extract NOT MAPPED MARKERS
+
+\o gnpis_'':thematic''_not_mapped_markers.csv
+
+SELECT DISTINCT
+  '"' || CAST(m.group_id as VARCHAR(3)) || '"' AS group_id,
+  '"Marker"' AS entry_type,
+  '"' ||:'source_name'|| '"' AS database_name,
+  QUOTE_IDENT(CONCAT('MARKER_', m.MARKER_ID,'_NOT_MAPPED_', m.MARKER_NAME)) AS identifier,
+  '"' || replace(m.MARKER_NAME, '"', '''') ||'"' AS name,
+  QUOTE_IDENT(
+    CONCAT(
+      MARKER_NAME , ' is a' ,
+      CASE (lower(bt.NAME)) WHEN 'unknown' THEN ''
+        ELSE ' ' || bt.NAME END ,
+        ' marker from taxon ' , t.SCIENTIFIC_NAME, 
+      CASE WHEN agg_marker_synonym.MARKER_SYNONYM_NAMES IS NOT NULL THEN 
+      '. This marker has some synonyms: ' || agg_marker_synonym.MARKER_SYNONYM_NAMES END ,
+      CASE WHEN l.LOCUS_NAME IS NULL THEN ''
+        ELSE '. Its locus is: ' || l.LOCUS_NAME END ,
+      CASE WHEN m.GENE_FUNCTION IS NULL OR m.GENE_FUNCTION = '' THEN ''
+        ELSE 
+          CASE (lower(m.GENE_FUNCTION)) WHEN 'unknown' THEN ''
+            ELSE '. Its gene function is: ' || m.GENE_FUNCTION
+          END
+        END ,
+      CASE WHEN m.CONTIG_NAME IS NULL OR m.CONTIG_NAME = '' THEN ''
+        ELSE '. Its contig name is: ' || m.CONTIG_NAME END ,
+      CASE WHEN m.INSERT_LENGTH IS NULL THEN ''
+        ELSE '. Its insert length is: ' || m.INSERT_LENGTH END ,
+      CASE WHEN m.REVERSE_PRIMER IS NULL OR m.REVERSE_PRIMER = '' THEN ''
+        ELSE '. Reverse primer: ' || m.REVERSE_PRIMER END ,
+      CASE WHEN m.FORWARD_PRIMER IS NULL OR m.FORWARD_PRIMER = '' THEN ''
+        ELSE '. Forward primer: ' || m.FORWARD_PRIMER END ,
+      -- gestion des marqueurs Kaspar
+      CASE WHEN sequence_names IS NOT NULL OR sequence_names != '' THEN
+        '. Sequence name: ' || sequence_names END ,
+      CASE WHEN m.SHORT_REMARK IS NULL OR m.SHORT_REMARK = '' THEN ''
+        ELSE '. Short remark linked: ''' || REPLACE(m.SHORT_REMARK, '"', '') || '''' END
+    )
+  ) AS description,
+  QUOTE_IDENT(CONCAT(:'application_url','/GnpMap/mapping/id.do?dbName=mapping&action=MARKER&className=MarkerImpl&id=', m.MARKER_ID)) AS url,
+  QUOTE_IDENT(t.SCIENTIFIC_NAME) AS species,
+  '""' AS linkedRessourcesID
+FROM marker m
+JOIN taxon t ON m.TAXON_ID = t.TAXON_ID
+JOIN bio_type bt ON bt.BIO_TYPE_ID = m.BIO_TYPE_ID
+LEFT JOIN locus l ON l.MARKER_ID = m.MARKER_ID
+LEFT JOIN
+(select marker_id as m_id, string_agg(distinct(ma_synonym_name), ', ') as marker_synonym_names
+from (     
+select distinct ma.MARKER_ID as marker_id, ms.MARKER_SYNONYM_NAME as ma_synonym_name
+from marker ma 
+join marker_synonym_marker msm on msm.MARKER_ID = ma.MARKER_ID
+join marker_synonym ms on ms.marker_synonym_id = msm.MARKER_SYNONYM_ID) as m_id_m_synonym
+group by m_id_m_synonym.MARKER_ID) as agg_marker_synonym on agg_marker_synonym.M_ID = m.MARKER_ID
+LEFT JOIN
+(select marker_id as mid, string_agg(distinct(sequence_name), ', ') as sequence_names
+from (
+select distinct ma.MARKER_ID as marker_id, mseq.SEQUENCE_NAME as sequence_name
+from marker ma
+join marker_sequence mseq on ma.MARKER_ID = mseq.MARKER_ID) as seq_m_id
+group by seq_m_id.MARKER_ID) as marker_seq on marker_seq.MID = m.MARKER_ID
+WHERE l.MARKER_ID is null
+ORDER BY identifier;
diff --git a/etl_gnpis-core_dd/sql/transplant_gnpis_phenotyping_extraction.sql b/etl_gnpis-core_dd/sql/transplant_gnpis_phenotyping_extraction.sql
new file mode 100644
index 0000000..4d1c95d
--- /dev/null
+++ b/etl_gnpis-core_dd/sql/transplant_gnpis_phenotyping_extraction.sql
@@ -0,0 +1,89 @@
+-- ####################################################################
+-- Copyright (C) 2014 INRA-URGI
+-- Author(s): R. Flores, D. Charruaud, E. Kimmel
+-- Created on 2014/12/08
+-- Contact: urgi-contact@versailles.inrae.fr
+-- It is strictly forbidden to transfer, use or re-use this code
+-- or part of it without explicit written authorization from INRA-URGI.
+-- ####################################################################
+
+-- ###################################################################################
+-- SQL script used to extract data for transPLANT indices, gnpis thematic: phenotyping 
+-- ###################################################################################
+
+\pset format unaligned
+\pset tuples_only
+\pset fieldsep ,
+
+-- extract TRIALS
+
+\o gnpis_'':thematic''_trials.csv
+
+SELECT DISTINCT
+  '"' || CAST(t.group_id as VARCHAR(3)) || '"' AS group_id,
+  '"Phenotyping study"' AS entry_type,
+  '"' ||:'source_name'|| '"' AS database_name,
+  CONCAT('"TRIAL_', t.trial_id,'_',t.trial_number, '"') AS identifier,
+  '"' || replace(t.name, '"', '''') ||'"' AS name,
+  '"' || CONCAT(
+    REPLACE(t.trial_number, '"', ''''), ' is a trial lead at site: ', s.site_name,
+    CASE WHEN s.latitude IS NOT NULL AND s.longitude IS NOT NULL THEN
+      ' (lat/long: ' || CAST(s.latitude AS VARCHAR(6)) || '/' || CAST(s.longitude AS VARCHAR(6)) || ')' END,
+    CASE WHEN t.name != t.trial_number THEN
+      ', described as ''' || t.name || '''' END,
+    CASE WHEN pa.panel_name IS NOT NULL THEN
+      ', involving panel ' || pa.panel_name END,
+    CASE WHEN t.trial_design IS NOT NULL THEN
+      ', designed as follows: ''' || t.trial_design || '''' END,
+    CASE WHEN t.comments IS NOT NULL THEN
+      ', which comment is: ''' || t.comments || '''' END,
+    CASE WHEN observation_names IS NOT NULL THEN
+      '. Observation variables: ' || observation_names END,
+    CASE WHEN t.date_begin IS NOT NULL AND t.date_end IS NOT NULL THEN 
+      '. This trial started on ' || CAST(t.date_begin AS DATE) || ' and finished on ' || CAST(t.date_end AS DATE) END,
+    CASE WHEN p.project_code IS NOT NULL THEN
+      ', in the frame of project: ''' || p.project_code || '''' END,
+    CASE WHEN accession_names IS NOT NULL THEN
+      '. Accession names: ' || accession_names || ' from taxon(s) ' || taxon_names END
+    ) ||'"' AS description,
+  '"' || CONCAT(:'application_url', '/ephesis/ephesis/viewer.do#trialCard/trialId=', t.trial_id) || '"' AS url,
+  '"' || CASE WHEN taxon_names IS NULL THEN '' ELSE taxon_names END || '"' AS species,
+  '"' || nullif(concat_ws(', ',
+      CASE WHEN encoded_puids IS NOT NULL THEN encoded_puids END,
+      CASE WHEN s.site_id IS NOT NULL THEN ('urn:URGI/location/'||s.site_id)::text END
+  ), '') || '"' AS linkedRessourcesID
+FROM trial t
+LEFT JOIN trial_lot tl ON tl.trials_id = t.trial_id
+LEFT JOIN lot l ON l.lot_ID = tl.lots_id
+LEFT JOIN accession a on a.accession_id = l.accession_id
+JOIN site s ON s.site_id = t.site_id
+LEFT JOIN panel pa ON pa.panel_id = t.panel_id
+LEFT JOIN project p ON p.project_id = t.project_id
+LEFT JOIN
+  (select tr.trial_id as tid, 
+    string_agg(distinct(a.accession_name), ', ') AS accession_names,
+    string_agg(distinct(t.scientific_name), ', ') AS taxon_names,
+    string_agg(
+      distinct(
+        CASE WHEN a.puid like 'gnpis_pui%' then
+            'urn:URGI/' ||(replace(a.puid, ':', '%3A'))
+        ELSE
+            a.puid
+        END
+      )
+    , ', '
+    ) AS encoded_puids
+    from trial tr
+    join trial_lot tl on tl.trials_id = tr.trial_id
+    join lot l on l.lot_id = tl.lots_id
+    join accession a on a.accession_id = l.accession_id
+    join taxon t on t.taxon_id = a.taxon_id
+    group by tr.trial_id) as trial_acc_tax on trial_acc_tax.tid = t.trial_id
+LEFT JOIN
+  (select tr.trial_id as tid,
+    string_agg(distinct(CONCAT(ov.term_identifier,' ',ov.variable_specific_name)), ', ') AS observation_names
+    from trial tr
+    join trial_observation_variable tov on tov.trials_id = tr.trial_id
+    join observation_variable ov on tov.observation_variables_id = ov.observation_variable_id
+    group by tr.trial_id) as trial_ov on trial_ov.tid = t.trial_id
+ORDER BY identifier;
diff --git a/etl_gnpis-core_dd/sql/transplant_gnpis_sequences_extraction.sql b/etl_gnpis-core_dd/sql/transplant_gnpis_sequences_extraction.sql
new file mode 100644
index 0000000..441ea68
--- /dev/null
+++ b/etl_gnpis-core_dd/sql/transplant_gnpis_sequences_extraction.sql
@@ -0,0 +1,102 @@
+-- ####################################################################
+-- Copyright (C) 2014 INRA-URGI
+-- Author(s): E. Kimmel, D. Charruaud
+-- Created on 2014/12/05
+-- Contact: urgi-contact@versailles.inrae.fr
+-- It is strictly forbidden to transfer, use or re-use this code
+-- or part of it without explicit written authorization from INRA-URGI.
+-- ####################################################################
+
+-- #################################################################################
+-- SQL script used to extract data for transPLANT indices, gnpis thematic: sequences
+-- #################################################################################
+
+\pset format unaligned
+\pset tuples_only
+\pset fieldsep ,
+
+-- extracting ngs experiment
+
+\o gnpis_'':thematic''_ngs_experiments.csv
+
+SELECT DISTINCT
+  '"' || CAST(ne.group_id as VARCHAR(3)) || '"' AS group_id,
+  '"Sequencing experiment"' AS entry_type,
+  '"' ||:'source_name'|| '"' AS database_name,
+  QUOTE_IDENT(CONCAT('NGS_EXPERIMENT_' , ne.experiment_id,'_',ne.experiment_name)) AS identifier,
+  '"' || replace(ne.experiment_name, '"', '''') ||'"' AS name,
+  QUOTE_IDENT(CONCAT(ne.experiment_name ,
+      ' is an experiment (type: ' , btstudy.name , ')',
+      CASE WHEN sample_names IS NOT NULL THEN
+        ' involving sample(s) ' || sample_names END,
+      CASE WHEN accession_names IS NOT NULL THEN
+        ' and accession(s) ' || accession_names || ' (' || taxon_names || ')' END,
+      CASE WHEN subrun_names IS NOT NULL THEN
+        ' in subrun(s) ' || subrun_names END, '.',
+        ' Sequencing type is ', btseqtype.name, '.', 
+        ' The project is ', p.project_name, '.',
+      CASE WHEN ne.description IS NOT NULL OR ne.description != '' THEN
+        ' The description is: ' || ne.description END)
+  ) AS description,
+  QUOTE_IDENT(CONCAT(:'application_url', '/sequence/sequence/card/experiment.do?dbName=sequence&className=ngs.NgsExperimentImpl&id=' , ne.experiment_id)) AS url,
+  QUOTE_IDENT(CASE WHEN taxon_names IS NOT NULL THEN taxon_names ELSE '' END) AS species,
+  '"' || CASE WHEN encoded_puids IS NULL THEN '' ELSE encoded_puids END || '"' AS linkedRessourcesID
+FROM NGS_EXPERIMENT ne
+JOIN BIO_TYPE btstudy on btstudy.BIO_TYPE_ID = ne.study_type_id
+JOIN BIO_TYPE btseqtype on btseqtype.BIO_TYPE_ID = ne.sequencing_type_id
+JOIN PROJECT p on p.project_id = ne.project_id
+LEFT JOIN
+(select ep.experiment_id as eid, 
+  string_agg(distinct(s.subrun_name), ', ') as subrun_names, 
+  string_agg(distinct(ngss.sample_name), ', ') as sample_names, 
+  string_agg(distinct(acc.accession_name), ', ') as accession_names,
+  string_agg(
+    distinct(
+        CASE WHEN acc.puid like 'gnpis_pui%' then
+            'urn:URGI/' ||(replace(acc.puid, ':', '%3A'))
+        ELSE
+            acc.puid
+        END
+    )
+    , ', '
+  ) AS encoded_puids,
+  string_agg(distinct(t.scientific_name), ', ') as taxon_names
+from experiment_pool ep
+join subrun s on s.subrun_id = ep.subrun_id
+join ngs_sample ngss on ngss.sample_id = ep.sample_id
+left join accession acc on acc.accession_id = ngss.accession_id
+join taxon t on t.taxon_id = acc.taxon_id
+group by ep.experiment_id) as exp_subrun_sample on exp_subrun_sample.eid = ne.experiment_id
+ORDER BY identifier;
+
+-- extracting ngs analysis
+
+\o gnpis_'':thematic''_ngs_analyses.csv
+
+SELECT DISTINCT
+  '"' || CAST(na.group_id as VARCHAR(3)) || '"' AS group_id,
+  '"Sequences analysis"' AS entry_type,
+  '"' ||:'source_name'|| '"' AS database_name,
+  QUOTE_IDENT(CONCAT('NGS_ANALYSIS_' , na.analysis_id,'_', na.analysis_name)) AS identifier,
+  '"' || replace(na.analysis_name, '"', '''') ||'"' AS name,
+  QUOTE_IDENT(CONCAT(na.analysis_name,
+    ' is an analysis (type: ', bt.name, ') realized with the software ', s.software_name, '.',
+  CASE WHEN g.genome_name IS NOT NULL THEN
+    ' The ref. genome used is ' || g.genome_name END,
+  CASE WHEN t.scientific_name IS NOT NULL THEN
+    ' whose taxon is ' || t.scientific_name || '.' END,
+  CASE WHEN na.comments IS NOT NULL THEN
+    ' Whose comments are: ' || na.comments || '.' END,
+  CASE WHEN p.project_name IS NOT NULL THEN
+    ' The project is ' || p.project_name || '.' END
+    )) AS description,
+  QUOTE_IDENT(CONCAT(:'application_url', '/sequence/sequence/card/analysis.do?dbName=sequence&className=ngs.NgsAnalysisImpl&id=' , na.analysis_id)) AS url,
+  QUOTE_IDENT(CASE WHEN t.SCIENTIFIC_NAME IS NOT NULL THEN t.SCIENTIFIC_NAME ELSE '' END) AS species,
+  '""' AS linkedRessourcesID
+FROM NGS_ANALYSIS na
+JOIN BIO_TYPE bt on bt.BIO_TYPE_ID = na.analysis_type_id
+JOIN SOFTWARE s on s.software_id = na.software_id
+LEFT JOIN PROJECT p on p.project_id = na.project_id
+LEFT JOIN GENOME g on g.genome_id = na.genome_id
+LEFT JOIN TAXON t on t.taxon_id = g.taxon_id
+ORDER BY identifier;
diff --git a/etl_gnpis-core_dd/sql/transplant_gnpis_synteny_extraction.sql b/etl_gnpis-core_dd/sql/transplant_gnpis_synteny_extraction.sql
new file mode 100644
index 0000000..28365d7
--- /dev/null
+++ b/etl_gnpis-core_dd/sql/transplant_gnpis_synteny_extraction.sql
@@ -0,0 +1,149 @@
+-- ####################################################################
+-- Copyright (C) 2014 INRA-URGI
+-- Author(s): E. Kimmel, D. Charruaud
+-- Created on 2014/12/09
+-- Contact: urgi-contact@versailles.inrae.fr
+-- It is strictly forbidden to transfer, use or re-use this code
+-- or part of it without explicit written authorization from INRA-URGI.
+-- ####################################################################
+
+-- ###############################################################################
+-- SQL script used to extract data for transPLANT indices, gnpis thematic: synteny
+-- ###############################################################################
+
+\pset format unaligned
+\pset tuples_only
+\pset fieldsep ,
+
+-- extracting GENE
+
+\o gnpis_'':thematic''_genes.csv
+
+-- SELECT count (DISTINCT CONCAT('SYNTENY_' , g.GENE_NAME , '_DS_' , d.DATASET_ID) )
+SELECT DISTINCT
+  '"' || CAST(g.group_id as VARCHAR(3)) || '"' AS group_id,
+  '"Gene annotation"' AS entry_type,
+  '"' ||:'source_name'|| '"' AS database_name,
+  QUOTE_IDENT(CONCAT(g.GENE_NAME, '_in_' , d.DATASET_NAME || ' ' || d.VERSION, '_AC_', ac.ANCESTRAL_CHROMOSOME_NAME)) AS identifier,
+  '"' || replace(g.GENE_NAME, '"', '''') ||'"' AS name,
+  QUOTE_IDENT(
+    CONCAT(
+      g.GENE_NAME ,
+      ' is a syntenic gene from dataset: ''' , d.DATASET_NAME , '_version_', d.VERSION , ''', located on chromosome ''' , c.CHROMOSOME_NAME , 
+      ''' of ''' , tax.scientific_name , '''' ,
+      CASE WHEN ga.START_POSITION IS NULL THEN '.'
+        ELSE ' between positions ' || ga.START_POSITION || ' and ' || ga.STOP_POSITION || '.'
+      END,
+      ' It is linked to ancestral chromosome ''' , ac.ANCESTRAL_CHROMOSOME_NAME , '''.' ,
+      CASE WHEN homolog_gene_names IS NOT NULL THEN ' It belongs to an homology group with: ' || homolog_gene_names || '.'  END,
+      CASE WHEN qtl_names IS NOT NULL THEN 
+        ' This gene is positioned on QTL: ' || qtl_names || '''.' 
+      END
+      ,
+      ' It is also linked to MetaQTL ' || MQTL_INFO.mqtl_trait_name || ' ' || MQTL_INFO.mqtl_name,
+      ' identified from following QTLs: ' || MQTL_INFO.AGG_QTL_INFOS || '.'
+    )
+  ) AS description,
+  QUOTE_IDENT(
+    CONCAT(
+      :'application_url', '/synteny/synteny/viewer.do#results/',
+      'datasetId=' , d.DATASET_ID , 
+      '&geneName=' , g.GENE_NAME, 
+      '&ancestralChromosomeId=', ac.ANCESTRAL_CHROMOSOME_ID
+    )
+  ) AS url,
+  QUOTE_IDENT(
+    CASE WHEN d.DATASET_TYPE_ID=450 AND d.IS_CURRENT_VERSION='true' AND d.DATASET_NAME = 'Wheat' THEN -- trick for Wheat V2 dataset (dataset_id 6) which store Wheat modern genome in ancestral_gene table
+      concat(tax.SCIENTIFIC_NAME, ',', DISTINCT_AGG_GENE_ID.HOMOLOG_TAXON_NAMES, ',Triticum aestivum')
+    ELSE 
+      concat(tax.SCIENTIFIC_NAME, ',', DISTINCT_AGG_GENE_ID.HOMOLOG_TAXON_NAMES)
+    END
+  ) AS species,
+  '""' AS linkedRessourcesID
+FROM (
+  SELECT 
+    GENE_ID AS G_ID, DS_ID,
+    STRING_AGG(DISTINCT GENE_NAME, ', ') AS homolog_gene_names,
+    STRING_AGG(DISTINCT QTL_NAME || '(trait: '|| TRAIT_NAME || ' - ' || TRAIT_DESCRIPTION || ')', ', ') AS qtl_names,
+    STRING_AGG(DISTINCT HOMOLOG_SCIENTIFIC_NAME, ',') AS HOMOLOG_TAXON_NAMES
+  FROM (
+    SELECT DISTINCT
+      g1.GENE_ID AS GENE_ID,
+      hg.DATASET_ID AS DS_ID,
+      g2.GENE_NAME AS GENE_NAME, -- gene name of g1's homologs
+      q.QTL_NAME AS QTL_NAME,
+      t.TRAIT_NAME AS TRAIT_NAME,
+      t.DESCRIPTION AS TRAIT_DESCRIPTION,
+      homolog_taxons.SCIENTIFIC_NAME AS HOMOLOG_SCIENTIFIC_NAME
+    FROM GENE g1
+      JOIN GENE_HOMOLOGY_GROUP ghg ON ghg.GENE_ID = g1.GENE_ID
+        JOIN HOMOLOGY_GROUP hg ON hg.HOMOLOGY_GROUP_ID= ghg.HOMOLOGY_GROUP_ID
+          JOIN DATASET d ON d.DATASET_ID=HG.DATASET_ID
+          LEFT JOIN GENE_HOMOLOGY_GROUP ghg2 ON ghg2.HOMOLOGY_GROUP_ID = hg.HOMOLOGY_GROUP_ID -- some genes are alone in their group => left join!
+            LEFT JOIN GENE g2 ON g2.GENE_ID = ghg2.GENE_ID AND g1.GENE_ID <> g2.GENE_ID
+              LEFT JOIN GENE_ASSIGNMENT ga2 ON ga2.GENE_ID = g2.GENE_ID AND ga2.DATASET_ID = hg.DATASET_ID
+                LEFT JOIN REF_SEQ rf ON rf.ref_seq_id=ga2.ref_seq_id
+                  LEFT JOIN CHROMOSOME c ON c.CHROMOSOME_ID = rf.CHROMOSOME_ID
+                    LEFT JOIN TAXON homolog_taxons ON homolog_taxons.TAXON_ID = c.TAXON_ID
+      JOIN GENE_ASSIGNMENT ga ON ga.GENE_ID = g1.GENE_ID AND ga.DATASET_ID = hg.DATASET_ID
+        LEFT JOIN GENE_QTL gq ON gq.GENE_ASSIGNMENT_ID = ga.GENE_ASSIGNMENT_ID
+          LEFT JOIN QTL q ON q.MAPPABLE_ELEMT_id = gq.MAPPABLE_ELEMT_id
+            LEFT JOIN QTL_DETECTION qd ON q.QTL_DETEC_ID = qd.QTL_DETEC_ID
+              LEFT JOIN MEASURE m ON qd.MEASURE_ID = m.MEASURE_ID
+                LEFT JOIN TRAIT t ON m.TRAIT_ID = t.TRAIT_ID
+    -- WHERE d.DATASET_ID=6 -- use restriction for test purpose only
+    WHERE d.IS_CURRENT_VERSION='true' 
+    AND d.DATASET_TYPE_ID=450
+    ORDER BY GENE_NAME
+  ) AS DISTINCT_GENE_ID
+  GROUP BY DISTINCT_GENE_ID.GENE_ID, DISTINCT_GENE_ID.DS_ID
+) AS DISTINCT_AGG_GENE_ID 
+  JOIN GENE g ON g.GENE_ID = DISTINCT_AGG_GENE_ID.G_ID
+    JOIN GENE_ASSIGNMENT ga ON ga.GENE_ID = g.GENE_ID and ga.DATASET_ID = DISTINCT_AGG_GENE_ID.DS_ID
+      JOIN REF_SEQ rf ON rf.ref_seq_id=ga.ref_seq_id
+        JOIN CHROMOSOME c ON c.CHROMOSOME_ID = rf.CHROMOSOME_ID
+          JOIN TAXON tax ON tax.TAXON_ID = c.TAXON_ID
+    LEFT JOIN GENE_HOMOLOGY_GROUP ghg ON ghg.GENE_ID = g.GENE_ID
+      JOIN BIO_TYPE bt ON bt.BIO_TYPE_ID = ghg.BIO_TYPE_ID
+      LEFT JOIN HOMOLOGY_GROUP hg ON hg.HOMOLOGY_GROUP_ID = ghg.HOMOLOGY_GROUP_ID and hg.DATASET_ID = DISTINCT_AGG_GENE_ID.DS_ID
+        JOIN DATASET d ON d.DATASET_ID = hg.DATASET_ID
+        LEFT JOIN ANCESTRAL_GENE ag ON ag.ANCESTRAL_GENE_ID = hg.ANCESTRAL_GENE_ID
+          LEFT JOIN ANCESTRAL_CHROMOSOME ac ON ac.ANCESTRAL_CHROMOSOME_ID = ag.ANCESTRAL_CHROMOSOME_ID
+          LEFT JOIN (
+            SELECT DISTINCT
+              mqtl.ANCESTRAL_CHROMOSOME_ID AS ANCESTRAL_CHROMOSOME_ID,
+              syntenome_left.RELATIVE_POSITION AS SYNTENOME_LEFT_POSITION,
+              syntenome_right.RELATIVE_POSITION AS SYNTENOME_RIGHT_POSITION,
+              mqtl.MQTL_NAME AS MQTL_NAME,
+              mqtl.MQTL_TRAIT_NAME AS MQTL_TRAIT_NAME,
+              QTL_INFOS_GROUPED.QTL_INFOS_AGGREGATED AS AGG_QTL_INFOS
+            FROM mqtl
+              JOIN ancestral_gene syntenome_left ON syntenome_left.ANCESTRAL_GENE_ID = mqtl.SYNTENOME_LEFT_ID
+              JOIN ancestral_gene syntenome_right ON syntenome_right.ANCESTRAL_GENE_ID = mqtl.SYNTENOME_RIGHT_ID
+              JOIN (
+                SELECT
+                  STRING_AGG(A_QTL_INFOS,', ') AS QTL_INFOS_AGGREGATED,
+                  MQTL_ID AS MQTL_ID
+                FROM (
+                  SELECT DISTINCT
+                    qi.MQTL_ID AS MQTL_ID,
+                    CONCAT (
+                      qi.QTL_NAME,
+                      ' is a QTL for trait ', qi.TRAIT_NAME,
+                      ' referenced in publication ''', qi.PUBLICATION,
+                      ''' from research station ', qi.RESEARCH_STATION,' (',qi.COUNTRY,') in ', qi.YEAR,
+                      ' found from a ', qi.POPULATION_TYPE, ' population involving ', qi.P1,' and ', qi.P2
+                    ) AS A_QTL_INFOS
+                  FROM QTL_INFOS qi
+                ) AS QTL_INFOS_AGGREGATION
+                GROUP BY QTL_INFOS_AGGREGATION.MQTL_ID
+              ) AS QTL_INFOS_GROUPED ON QTL_INFOS_GROUPED.MQTL_ID = mqtl.MQTL_ID
+          ) AS MQTL_INFO on
+              MQTL_INFO.SYNTENOME_LEFT_POSITION >= ag.RELATIVE_POSITION
+              AND MQTL_INFO.SYNTENOME_RIGHT_POSITION <= ag.RELATIVE_POSITION
+              AND MQTL_INFO.ANCESTRAL_CHROMOSOME_ID = ag.ANCESTRAL_CHROMOSOME_ID
+-- WHERE d.DATASET_ID=6 -- use restriction for test purpose only
+WHERE d.IS_CURRENT_VERSION='true'
+AND d.DATASET_TYPE_ID=450
+ORDER BY identifier
+;
diff --git a/etl_gnpis-core_dd/sql/transplant_gnpis_transcriptome_extraction.sql b/etl_gnpis-core_dd/sql/transplant_gnpis_transcriptome_extraction.sql
new file mode 100644
index 0000000..b1bc442
--- /dev/null
+++ b/etl_gnpis-core_dd/sql/transplant_gnpis_transcriptome_extraction.sql
@@ -0,0 +1,163 @@
+-- ####################################################################
+-- Copyright (C) 2014 INRA-URGI
+-- Author(s): E. Kimmel, D. Charruaud
+-- Created on 2014/12/09
+-- Contact: urgi-contact@versailles.inrae.fr
+-- It is strictly forbidden to transfer, use or re-use this code
+-- or part of it without explicit written authorization from INRA-URGI.
+-- ####################################################################
+
+-- #####################################################################################
+-- SQL script used to extract data for transPLANT indices, gnpis thematic: transcriptome
+-- #####################################################################################
+
+\pset format unaligned
+\pset tuples_only
+\pset fieldsep ,
+
+-- extracting EXPERIMENT
+
+\o gnpis_'':thematic''_experiments.csv
+
+SELECT DISTINCT
+  '"' || CAST(exp.group_id as VARCHAR(3)) || '"' AS group_id,
+  '"Transcriptomic experiment"' AS entry_type,
+  '"' ||:'source_name'|| '"' AS database_name,
+  QUOTE_IDENT(CONCAT('TRANSCRIPTOMIC_EXPERIMENT_' , exp.EXPERIMENT_ID,'_1')) AS identifier,
+  '"' || replace(exp.EXPERIMENT_NAME, '"', '''') ||'"' AS name,
+  QUOTE_IDENT(CONCAT(exp.EXPERIMENT_NAME,
+      ' is an experiment (type: ' , bt.NAME , ')' ,
+      ' using samples ', SAMPLE_NAMES, ' of species ' , scientific_names , '.' ,
+      ' This experiment belongs to the scientific project ', pr.PROJECT_CODE, '. ',
+      exp.DESCRIPTION)
+  ) AS description,
+  QUOTE_IDENT(CONCAT(:'application_url', '/GnpArray/transcriptome/id.do?action=EXPERIMENT&id=' , exp.EXPERIMENT_ID )) AS url,
+  QUOTE_IDENT(scientific_names) AS species,
+  '""' AS linkedRessourcesID
+FROM (
+  SELECT EXPERIMENT_ID AS e_id, 
+    STRING_AGG(distinct(SCIENTIFIC_NAME), ',') AS scientific_names,
+    string_agg(distinct(SAMPLE_NAME), ', ') AS SAMPLE_NAMES
+    FROM (
+    SELECT DISTINCT exp.EXPERIMENT_ID AS EXPERIMENT_ID, 
+                    tax.SCIENTIFIC_NAME AS SCIENTIFIC_NAME,
+                    s.SAMPLE_NAME AS SAMPLE_NAME
+      FROM EXPERIMENT exp
+      JOIN EXP_HYBR eh ON eh.EXPERIMENT_ID = exp.EXPERIMENT_ID
+      JOIN HYBRIDIZATION h ON h.HYBRIDIZATION_ID = eh.HYBRIDIZATION_ID
+      JOIN HYBR_LABELED_EXTRACT hle ON hle.HYBRIDIZATION_ID = h.HYBRIDIZATION_ID
+      JOIN LABELED_EXTRACT le ON le.LABELED_EXTRACT_ID = hle.LABELED_EXTRACT_ID
+      JOIN EXTRACT_LABELED_EXTRACT ele ON ele.LABELED_EXTRACT_ID = le.LABELED_EXTRACT_ID
+      JOIN EXTRACT e ON e.EXTRACT_ID = ele.EXTRACT_ID
+      JOIN SAMPLE_EXTRACT se ON se.EXTRACT_ID = e.EXTRACT_ID
+      JOIN SAMPLE s ON s.SAMPLE_ID = se.SAMPLE_ID
+      JOIN SAMPLE_SOURCE ss ON ss.SAMPLE_SOURCE_ID = s.SAMPLE_SOURCE_ID
+      JOIN TAXON tax ON tax.TAXON_ID = ss.TAXON_ID) AS DISTINCT_EXPERIMENT_ID
+      GROUP BY DISTINCT_EXPERIMENT_ID.EXPERIMENT_ID) AS DISTINCT_AGG_EXPERIMENT_ID
+JOIN EXPERIMENT exp ON exp.EXPERIMENT_ID = DISTINCT_AGG_EXPERIMENT_ID.e_id
+LEFT JOIN PROJECT pr ON pr.PROJECT_ID = exp.PROJECT_ID
+JOIN BIO_TYPE bt ON bt.BIO_TYPE_ID = exp.BIO_TYPE_ID
+ORDER BY identifier;
+
+
+-- extracting GENE
+
+\o gnpis_'':thematic''_genes.csv
+
+SELECT DISTINCT
+  '"' || CAST(g.group_id as VARCHAR(3)) || '"' AS group_id,
+  '"Gene annotation"' AS entry_type,
+  '"' ||:'source_name'|| '"' AS database_name,
+  QUOTE_IDENT(CONCAT('TRANSCRIPTOMIC_GENE_' , g.GENE_ID,'_1')) AS identifier,
+  '"' || replace(g.GENE_NAME, '"', '''') ||'"' AS name,
+  QUOTE_IDENT(CONCAT(g.GENE_NAME ,
+    ' is a gene involved in a transcriptomic experiments having expression level ''' , REGULATIONS , '''' ,
+    ' in the gene lists ''' , GENE_LISTS , '''.' ,
+    ' This experiment used samples ', SAMPLE_NAMES, ' of species ' , scientific_names , '.' ,
+    ' This gene list belongs to the scientific project ''', PROJECTS, '''.')
+  ) AS description,
+  QUOTE_IDENT(CONCAT(:'application_url', '/GnpArray/transcriptome/card.do?&dbName=common&className=GeneImpl&id=' , g.GENE_ID)) AS url,
+  QUOTE_IDENT(scientific_names) AS species,
+  '""' AS linkedRessourcesID
+FROM (
+  SELECT GENE_ID AS g_id, 
+    STRING_AGG(distinct(SCIENTIFIC_NAME), ',') AS scientific_names,
+    string_agg(distinct(SAMPLE_NAME), ', ') AS SAMPLE_NAMES,
+    string_agg(distinct(GENE_LIST_NAME), ' , ') AS GENE_LISTS,
+    string_agg(distinct(PROJECTS), ' , ') AS PROJECTS,
+    string_agg(distinct(REGULATION), ' , ') AS REGULATIONS
+  FROM (
+    SELECT DISTINCT g.GENE_ID AS GENE_ID, 
+                    tax.SCIENTIFIC_NAME AS SCIENTIFIC_NAME,
+                    s.SAMPLE_NAME AS SAMPLE_NAME,
+                    gl.GENE_LIST_NAME AS GENE_LIST_NAME,
+                    p.PROJECT_CODE AS PROJECTS,
+                    bt.name AS REGULATION
+      FROM GENE g
+      JOIN GENE_GENE_LIST ggl ON ggl.GENE_ID = g.GENE_ID
+      JOIN GENE_LIST gl ON gl.GENE_LIST_ID = ggl.GENE_LIST_ID
+      JOIN BIO_TYPE bt ON bt.BIO_TYPE_ID = gl.BIO_TYPE_ID
+      JOIN PROJECT p ON p.PROJECT_ID = gl.PROJECT_ID
+      JOIN EXPERIMENT exp ON exp.PROJECT_ID = p.PROJECT_ID
+      JOIN EXP_HYBR eh ON eh.EXPERIMENT_ID = exp.EXPERIMENT_ID
+      JOIN HYBRIDIZATION h ON h.HYBRIDIZATION_ID = eh.HYBRIDIZATION_ID
+      JOIN HYBR_LABELED_EXTRACT hle ON hle.HYBRIDIZATION_ID = h.HYBRIDIZATION_ID
+      JOIN LABELED_EXTRACT le ON le.LABELED_EXTRACT_ID = hle.LABELED_EXTRACT_ID
+      JOIN EXTRACT_LABELED_EXTRACT ele ON ele.LABELED_EXTRACT_ID = le.LABELED_EXTRACT_ID
+      JOIN EXTRACT e ON e.EXTRACT_ID = ele.EXTRACT_ID
+      JOIN SAMPLE_EXTRACT se ON se.EXTRACT_ID = e.EXTRACT_ID
+      JOIN SAMPLE s ON s.SAMPLE_ID = se.SAMPLE_ID
+      JOIN SAMPLE_SOURCE ss ON ss.SAMPLE_SOURCE_ID = s.SAMPLE_SOURCE_ID
+      JOIN TAXON tax ON tax.TAXON_ID = ss.TAXON_ID) AS DISTINCT_GENE_ID
+      GROUP BY DISTINCT_GENE_ID.GENE_ID) AS DISTINCT_AGG_GENE_ID
+JOIN GENE g ON g.GENE_ID = DISTINCT_AGG_GENE_ID.g_id
+JOIN GENE_GENE_LIST ggl ON ggl.GENE_ID = g.GENE_ID
+JOIN GENE_LIST gl ON gl.GENE_LIST_ID = ggl.GENE_LIST_ID
+JOIN PROJECT p ON p.PROJECT_ID = gl.PROJECT_ID
+JOIN BIO_TYPE bt ON bt.BIO_TYPE_ID = gl.BIO_TYPE_ID
+ORDER BY identifier;
+
+
+-- extracting GENE_LIST
+
+\o gnpis_'':thematic''_gene_lists.csv
+
+select distinct
+  '"' || CAST(gl.group_id as VARCHAR(3)) || '"' AS group_id,
+  '"Transcriptomic gene list"' AS entry_type,
+  '"' ||:'source_name'|| '"' AS database_name,
+  QUOTE_IDENT(CONCAT('TRANSCRIPTOMIC_GENE_LIST_' , gl.GENE_LIST_ID,'_1')) AS identifier,
+  '"' || replace(gl.GENE_LIST_NAME, '"', '''') ||'"' AS name,
+  QUOTE_IDENT(CONCAT(gl.GENE_LIST_NAME ,
+      ' is a gene list produced in a transcriptomic experiment and for which the expression level of the genes is ''' , bt.name , '''. ' ,
+      CASE WHEN p.PROJECT_ID IS NULL THEN '' 
+        ELSE ' This gene list belongs to the scientific project ''' || p.PROJECT_CODE || '''. ' END ,
+      gl.DESCRIPTION)
+  ) AS description,
+  QUOTE_IDENT(CONCAT(:'application_url', '/GnpArray/transcriptome/geneListAction.do?content=all&method=details&geneListId=' , gl.GENE_LIST_ID)) AS url,
+  QUOTE_IDENT(scientific_names) AS species,
+  '""' AS linkedRessourcesID
+FROM (
+  SELECT GENE_LIST_ID AS gl_id, 
+         STRING_AGG(distinct(SCIENTIFIC_NAME), ',') AS scientific_names 
+    FROM (
+    SELECT DISTINCT gl.GENE_LIST_ID AS GENE_LIST_ID, 
+                    tax.SCIENTIFIC_NAME AS SCIENTIFIC_NAME
+    FROM GENE_LIST gl
+    JOIN PROJECT p ON p.PROJECT_ID = gl.PROJECT_ID
+    JOIN EXPERIMENT exp ON exp.PROJECT_ID = p.PROJECT_ID
+    JOIN EXP_HYBR eh ON eh.EXPERIMENT_ID = exp.EXPERIMENT_ID
+    JOIN HYBRIDIZATION h ON h.HYBRIDIZATION_ID = eh.HYBRIDIZATION_ID
+    JOIN HYBR_LABELED_EXTRACT hle ON hle.HYBRIDIZATION_ID = h.HYBRIDIZATION_ID
+    JOIN LABELED_EXTRACT le ON le.LABELED_EXTRACT_ID = hle.LABELED_EXTRACT_ID
+    JOIN EXTRACT_LABELED_EXTRACT ele ON ele.LABELED_EXTRACT_ID = le.LABELED_EXTRACT_ID
+    JOIN EXTRACT e ON e.EXTRACT_ID = ele.EXTRACT_ID
+    JOIN SAMPLE_EXTRACT se ON se.EXTRACT_ID = e.EXTRACT_ID
+    JOIN SAMPLE s ON s.SAMPLE_ID = se.SAMPLE_ID
+    JOIN SAMPLE_SOURCE ss ON ss.SAMPLE_SOURCE_ID = s.SAMPLE_SOURCE_ID
+    JOIN TAXON tax ON tax.TAXON_ID = ss.TAXON_ID) AS DISTINCT_GENE_LIST_ID
+  GROUP BY DISTINCT_GENE_LIST_ID.GENE_LIST_ID) AS DISTINCT_AGG_GENE_LIST_ID
+JOIN GENE_LIST gl ON gl.GENE_LIST_ID = DISTINCT_AGG_GENE_LIST_ID.gl_id
+JOIN PROJECT p ON p.PROJECT_ID = gl.PROJECT_ID
+JOIN BIO_TYPE bt ON bt.BIO_TYPE_ID = gl.BIO_TYPE_ID
+ORDER BY identifier;
diff --git a/etl_gnpis-core_dd/variables_enrichment.sh b/etl_gnpis-core_dd/variables_enrichment.sh
new file mode 100755
index 0000000..8c0cc0d
--- /dev/null
+++ b/etl_gnpis-core_dd/variables_enrichment.sh
@@ -0,0 +1,151 @@
+#!/bin/bash
+#
+# variables_enrichment.sh
+#
+# Author: F. PHILIPPE, R. FLORES, C. MICHOTEY
+#
+# Copyright INRAE-URGI 2017-2021
+#
+
+check_jq() {
+    which jq > /dev/null
+    [ $? -ne 0 ] && echo "jq is not installed on this server, or not specified in PATH: " && echo "$PATH" && echo "Please install it via package manager or via tarball: https://stedolan.github.io/jq/download/. Exiting." && exit 1
+
+    JQ_VERSION=$(jq --version)
+    { [ "${JQ_VERSION}" != "jq-1.5" ] && [ "${JQ_VERSION}" != "jq-1.6" ] ; } && echo "jq version expected is jq-1.5 or above. Please use these versions only. Exiting." && exit 1
+}
+
+check_jq
+
+TMP_DIR=$(mktemp -d)
+SCRIPT_DIR=$(readlink -f $(dirname $0))
+DEBUG=0
+VERBOSE=0
+PAGE_SIZE=1000
+WS_BASE_URL="https://urgi.versailles.inrae.fr/ws/webresources"
+## Help display
+###############
+
+usage() {
+    cat <<EOF
+
+Script used to enrich any ontology term with its name and synonym from input file.
+
+USAGE: ${0} -f <CSV_FILE> \
+[-v|-vv] \
+[-h|--help]
+
+PARAMS:
+    -f              the input file to enrich
+    -ws             URL base of BrAPI webservices. Default: ${WS_BASE_URL}
+    -v              display verbose informations
+    -vv             display very verbose informations
+    --debug         do not remove intermediate files for debuging purpose
+    -h or --help    print this help
+
+EXAMPLES:
+
+
+EOF
+  exit 1
+}
+
+# [ $# -eq 0 ] && usage && exit 0 # exit after displaying help if no argument given
+
+## Get commandline params
+#########################
+
+# get params
+while [ -n "$1" ]; do
+    case $1 in
+        -h) usage;shift 1;;
+        --help) usage;shift 1;;
+        -f) CSV_FILE="$2";shift 2;;
+        -ws) WS_BASE_URL="$2";shift 2;;
+        -v) VERBOSE=1;shift 1;;
+        -vv) VERBOSE=2;shift 1;;
+        --debug) DEBUG=1;shift 1;;
+        --) shift;break;;
+        -*) echo && echo "Unknown option: $1" && echo;exit 1;;
+        *) break;;
+    esac
+done
+
+[ $VERBOSE -ge 1 ] && echo "Using temp dir: $TMP_DIR"
+[ $VERBOSE -ge 2 ] && PARALLEL_VERBOSE="--bar"
+
+[ -z "${CSV_FILE}" ] && echo "ERROR: missing input file. Exiting." && usage && exit 1
+TMP_OUTPUT_FILE="$(basename "${CSV_FILE}" .csv)_enriched.csv"
+
+export TMP_DIR
+
+fetch_observation_variables(){
+    local currentPage=0
+    [ -n "$1" ] && currentPage=$1 # $1 is optional parameter only used in recursion
+    [ $VERBOSE -ge 1 ] && [ $currentPage == 0 ] && echo "Fetching observation variables..."
+    [ $VERBOSE -ge 2 ] && echo "Process page $currentPage with size $PAGE_SIZE "
+    local CURL_CMD="curl -Ss -XGET '${WS_BASE_URL}/brapi/v1/variables?page=${currentPage}&pageSize=$PAGE_SIZE' | jq '.' > ${TMP_DIR}/observation_variables_list_${currentPage}.json"
+    [ $VERBOSE -ge 2 ] && echo "Executing cmd: $CURL_CMD"
+    eval "${CURL_CMD}"
+    totalPages=$(jq --raw-output '.metadata.pagination.totalPages' "${TMP_DIR}/observation_variables_list_${currentPage}.json" )
+    local nextPage=$((1 + currentPage))
+    if [ ${nextPage} -lt $totalPages ]; then
+        fetch_observation_variables $nextPage
+    fi
+    # once all page are fetched, merge data:
+    if [ ${currentPage} -eq 0 ]; then
+         jq '.result.data[] ' "${TMP_DIR}"/observation_variables_list_*.json | jq -s . > "${TMP_DIR}/observation_variables_list.json"
+    fi
+}
+
+generate_key_value_csv(){
+    [ $VERBOSE -ge 1 ] && echo "Generate key-value CSV file..."
+    jq --raw-output -f "${SCRIPT_DIR}/extract_observation_variables.jq" "${TMP_DIR}/observation_variables_list.json" > "${TMP_DIR}/value_list.csv"
+}
+
+emit_sed_command(){
+    line="$1"
+    key=$(echo $line | cut -f1 -d,)
+    value=$(echo $line | cut -f2- -d,)
+    printf '%s' "s#$key#$value#;"
+}
+export -f emit_sed_command
+
+create_sed_command() {
+    [ $VERBOSE -ge 1 ] && echo "Create sed command..."
+    parallel ${PARALLEL_VERBOSE} -k emit_sed_command :::: "${TMP_DIR}"/value_list.csv | cat - > "${TMP_DIR}"/enrich.sed
+}
+
+process_line() {
+    line="$1"
+    echo "$line" | sed -f "${TMP_DIR}"/enrich.sed
+}
+export -f process_line
+
+enrich_csv(){
+    [ $VERBOSE -ge 1 ] && echo "Enriching data"
+    # TODO: use ${CSV_FILE} from data_disscovery dir to avoid XREF oversize provoking a "Command line too long" error, because of an input line too long:
+    # 57% 1437:1055=13s "0","Phenotyping study","GnpIS","TRIAL_2322_0989010000","OrtetsCormiers","0989010000 is a trial lead at site: Paris - 0989010000 (lat/long: 48.853/2.3486), described as 'OrtetsCormiers'. Observation variables: CO_357:0000019 parallel: Error: Command line too long (408206 >= 131049) at input -1041: "0","Phenotyping study","GnpIS","TRIAL_2323_G5_25"...
+    parallel ${PARALLEL_VERBOSE} -k process_line :::: "${CSV_FILE}" > "${TMP_DIR}/${TMP_OUTPUT_FILE}"
+}
+
+clean() {
+    [ $VERBOSE -ge 1 ] && echo "Cleaning temporary files"
+    [ $VERBOSE -ge 2 ] && RM_OPTION="-v"
+    rm -rf "${RM_OPTION}" "${TMP_DIR}"
+}
+
+main(){
+    fetch_observation_variables
+    generate_key_value_csv
+    create_sed_command
+    enrich_csv
+    mv -f "${TMP_DIR}/${TMP_OUTPUT_FILE}" "${CSV_FILE}"
+    echo "Enriched file is located at: ${CSV_FILE}"
+    if [ $DEBUG -eq 0 ]; then
+        clean
+    fi
+}
+
+main
+exit 0
-- 
GitLab


From 46649119bda56c402dea37a4daa61bacfccb2b95 Mon Sep 17 00:00:00 2001
From: Celia Michotey <celia.michotey@inra.fr>
Date: Thu, 13 Apr 2023 17:28:16 +0200
Subject: [PATCH 3/3] Remove linkedResourcesID and group 0 filter for DD,
 correct bugs

---
 etl_gnpis-core_dd/extract_gnpis-core.sh | 23 ++++++++++++++---------
 etl_gnpis-core_dd/map_values_to_json.jq |  2 +-
 2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/etl_gnpis-core_dd/extract_gnpis-core.sh b/etl_gnpis-core_dd/extract_gnpis-core.sh
index 81f0bb6..3d48308 100755
--- a/etl_gnpis-core_dd/extract_gnpis-core.sh
+++ b/etl_gnpis-core_dd/extract_gnpis-core.sh
@@ -244,24 +244,30 @@ check_extracted_data() {
 }
 export -f check_extracted_data
 
-enrich_csv(){
+cut_enrich_csv(){
 	CSV_FILE="$1"
+	NEW_CSV_FILE="${OUTPUT}/data_discovery/$(basename "$CSV_FILE")" 
 	CURRENT_THEMATIC="$2"
 	
+	[ $VERBOSE -ge 2 ] && echo "Cut CSV file..."
+	[ $VERBOSE -ge 2 ] && echo "processing with file : ${CSV_FILE}..."
+	# keeping DD columns with groupId but removing linkedResourcesID
+	cut -f 1-8 "${CSV_FILE}" > "${NEW_CSV_FILE}" 
+	
 	if [ "${CURRENT_THEMATIC}" == "phenotyping" ]; then
-		[ $VERBOSE -ge 1 ] && echo "Enrich variables..."
-		[ $VERBOSE -ge 2 ] && echo "processing with file : ${CSV_FILE}..."
-		"${CURRENT_DIR}"/variables_enrichment.sh -f "${CSV_FILE}" "${VERBOSE_OPTION}"
+		[ $VERBOSE -ge 2 ] && echo "Enrich variables..."
+		[ $VERBOSE -ge 2 ] && echo "processing with file : ${NEW_CSV_FILE}..."
+		"${CURRENT_DIR}"/variables_enrichment.sh -f "${NEW_CSV_FILE}" "${VERBOSE_OPTION}"
 	fi
 }
-export -f enrich_csv
+export -f cut_enrich_csv
 
 transform_private_url(){
 	# change URL to their private form when first field does not start by a zero (2 pass: 1 => gnpis legacy ; 2 => faidare)
 	# transformed files are written into `${OUTPUT}/privatised` sub-directory
 
 	CSV_FILE="$1"
-	if [[ ! "${FAIDARE_URL}" =~ "https://urgi.versailles.inrae.fr/faidare-" ]] ; then # we have a production URL
+	if [[ "${FAIDARE_URL}" = "https://urgi.versailles.inrae.fr/faidare" ]] ; then # we have a production URL
 		sed 's/\t/ /g ; s/^"//g ; s/","/\t/g ; s/"$//g' "${CSV_FILE}" | sed -r "s#^([^0].*)(https://urgi.versailles.inrae.fr)(.*)#\1\2/private\3#g ; s#^([^0].*)(https://urgi.versailles.inrae.fr/private/faidare)(.*)#\1https://urgi.versailles.inrae.fr/faidare-private\3#g" > "${OUTPUT}/privatised/$(basename "$CSV_FILE")"
 	else
 		sed 's/\t/ /g ; s/^"//g ; s/","/\t/g ; s/"$//g' "${CSV_FILE}" > "${OUTPUT}/privatised/$(basename "$CSV_FILE")"
@@ -282,8 +288,7 @@ dd_convert_csv_to_json() {
         fi
 
         if [ -n "$(ls -1 ${OUTPUT}/privatised/gnpis_${LOCAL_THEMATIC}*.csv)" ]; then
-			cp ${OUTPUT}/privatised/gnpis_${LOCAL_THEMATIC}*.csv ${OUTPUT}/data_discovery/
-            parallel ${PARALLEL_VERBOSE_2} enrich_csv "{}" "${LOCAL_THEMATIC}" ::: "${OUTPUT}/data_discovery/gnpis_${LOCAL_THEMATIC}*.csv"
+            parallel ${PARALLEL_VERBOSE_2} cut_enrich_csv "{}" "${LOCAL_THEMATIC}" ::: ${OUTPUT}/privatised/gnpis_${LOCAL_THEMATIC}*.csv
         else
             [ $VERBOSE -ge 1 ] && echo -e "${ORANGE}No CSV file matching gnpis_${LOCAL_THEMATIC}*.csv found...${NC}"
             continue
@@ -367,7 +372,7 @@ echo -e "\n${BOLD}Manage private data...${NC}"
 [ ! -d "${OUTPUT}/privatised" ] && mkdir "${OUTPUT}/privatised"
 if [ -n "$(ls -1 ${OUTPUT}/gnpis_*.csv)" ]; then
 	[ $VERBOSE -ge 1 ] && echo "Transform private URL..."
-	parallel ${PARALLEL_VERBOSE_2} transform_private_url ::: "${OUTPUT}/gnpis_"*.csv
+	parallel ${PARALLEL_VERBOSE_2} transform_private_url ::: ${OUTPUT}/gnpis_*.csv
 else
 	[ $VERBOSE -ge 1 ] && echo -e "${ORANGE}No CSV file matching gnpis_*.csv found...${NC}"
 fi
diff --git a/etl_gnpis-core_dd/map_values_to_json.jq b/etl_gnpis-core_dd/map_values_to_json.jq
index f457189..eeb800e 100644
--- a/etl_gnpis-core_dd/map_values_to_json.jq
+++ b/etl_gnpis-core_dd/map_values_to_json.jq
@@ -12,7 +12,7 @@ split("\n") | .[]
             "name": .[4],
             "description": .[5],
             "url": .[6],
-            "species": .[7]|tostring|split("%2C "), # TODO: check that this split is done correctly, I doubt...
+            "species": .[7]|tostring|split(", "), # TODO: check that this split is done correctly, I doubt...
             "linkedResourcesID": 
                 ([
                     foreach (.[8]? | tostring | split(", ")[]) as $pui
-- 
GitLab