From 0d32d4e28e10bf158e4911d588efd4455efdc7c0 Mon Sep 17 00:00:00 2001 From: Vincent Delbar <vincent.delbar@latelescop.fr> Date: Fri, 4 Aug 2023 10:58:10 +0200 Subject: [PATCH 1/9] ENH: add gdal vsi prefixes for file within archive + fix tar.gz --- pyotb/core.py | 14 +++++++++----- tests/test_core.py | 16 ++++++++++------ 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/pyotb/core.py b/pyotb/core.py index 71a032c..e424534 100644 --- a/pyotb/core.py +++ b/pyotb/core.py @@ -2,6 +2,7 @@ """This module is the core of pyotb.""" from __future__ import annotations +import re from abc import ABC, abstractmethod from ast import literal_eval from pathlib import Path @@ -1580,7 +1581,7 @@ class Output(OTBObject): return str(self.filepath) -def add_vsi_prefix(filepath: str | Path) -> str: +def add_vsi_prefix(filepath: str | Path | Any) -> str: """Append vsi prefixes to file URL or path if needed. Args: @@ -1599,16 +1600,19 @@ def add_vsi_prefix(filepath: str | Path) -> str: # Compressed file prefixes = { ".tar": "vsitar", + ".tar.gz": "vsitar", ".tgz": "vsitar", ".gz": "vsigzip", ".7z": "vsi7z", ".zip": "vsizip", ".rar": "vsirar", } - basename = filepath.split("?")[0] - ext = Path(basename).suffix - if ext in prefixes: - filepath = f"/{prefixes[ext]}/{filepath}" + expr = r"(.*?)(\.7z|\.zip|\.rar|\.tar\.gz|\.tgz|\.tar|\.gz)(.*)" + parts = re.match(expr, filepath) + if parts: + file, ext = parts.group(1), parts.group(2) + if not Path(file + ext).is_dir(): + filepath = f"/{prefixes[ext]}/{filepath}" return filepath diff --git a/tests/test_core.py b/tests/test_core.py index 82173cd..970e244 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -49,17 +49,23 @@ def test_app_properties(): def test_app_input_vsi(): + # Ensure old way is still working: ExtractROI will raise RuntimeError if a path is malformed + pyotb.Input("/vsicurl/" + SPOT_IMG_URL) # Simple remote file info = pyotb.ReadImageInfo("https://fake.com/image.tif", frozen=True) assert info.app.GetParameterValue("in") == "/vsicurl/https://fake.com/image.tif" assert info.parameters["in"] == "https://fake.com/image.tif" - # Compressed remote file - info = pyotb.ReadImageInfo("https://fake.com/image.tif.zip", frozen=True) + # Compressed single file archive + info = pyotb.ReadImageInfo("image.tif.zip", frozen=True) + assert info.app.GetParameterValue("in") == "/vsizip/image.tif.zip" + assert info.parameters["in"] == "image.tif.zip" + # File within compressed remote archive + info = pyotb.ReadImageInfo("https://fake.com/archive.zip/image.tif", frozen=True) assert ( info.app.GetParameterValue("in") - == "/vsizip//vsicurl/https://fake.com/image.tif.zip" + == "/vsizip//vsicurl/https://fake.com/archive.zip/image.tif" ) - assert info.parameters["in"] == "https://fake.com/image.tif.zip" + assert info.parameters["in"] == "https://fake.com/archive.zip/image.tif" # Piped curl --> zip --> tiff ziped_tif_urls = ( "https://github.com/OSGeo/gdal/raw/master" @@ -70,8 +76,6 @@ def test_app_input_vsi(): for ziped_tif_url in ziped_tif_urls: info = pyotb.ReadImageInfo(ziped_tif_url) assert info["sizex"] == 20 - # Ensure old way is still working: ExtractROI will raise RuntimeError if a path is malformed - pyotb.Input("/vsicurl/" + SPOT_IMG_URL) def test_img_properties(): -- GitLab From 2a244a3cc623d5f98e8576febc178e10396465b8 Mon Sep 17 00:00:00 2001 From: Vincent Delbar <vincent.delbar@latelescop.fr> Date: Fri, 4 Aug 2023 11:00:35 +0200 Subject: [PATCH 2/9] ADD: test input vsi for tar.gz file --- tests/test_core.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_core.py b/tests/test_core.py index 970e244..3bfc5dd 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -60,12 +60,12 @@ def test_app_input_vsi(): assert info.app.GetParameterValue("in") == "/vsizip/image.tif.zip" assert info.parameters["in"] == "image.tif.zip" # File within compressed remote archive - info = pyotb.ReadImageInfo("https://fake.com/archive.zip/image.tif", frozen=True) + info = pyotb.ReadImageInfo("https://fake.com/archive.tar.gz/image.tif", frozen=True) assert ( info.app.GetParameterValue("in") - == "/vsizip//vsicurl/https://fake.com/archive.zip/image.tif" + == "/vsitar//vsicurl/https://fake.com/archive.tar.gz/image.tif" ) - assert info.parameters["in"] == "https://fake.com/archive.zip/image.tif" + assert info.parameters["in"] == "https://fake.com/archive.tar.gz/image.tif" # Piped curl --> zip --> tiff ziped_tif_urls = ( "https://github.com/OSGeo/gdal/raw/master" -- GitLab From c5b749ddc1b9c51fc272582bbbe26347c9e886f6 Mon Sep 17 00:00:00 2001 From: Vincent Delbar <vincent.delbar@latelescop.fr> Date: Fri, 4 Aug 2023 14:51:23 +0000 Subject: [PATCH 3/9] Apply 1 suggestion(s) to 1 file(s) --- pyotb/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyotb/core.py b/pyotb/core.py index e424534..f6c28f8 100644 --- a/pyotb/core.py +++ b/pyotb/core.py @@ -1581,7 +1581,7 @@ class Output(OTBObject): return str(self.filepath) -def add_vsi_prefix(filepath: str | Path | Any) -> str: +def add_vsi_prefix(filepath: str | Path | OTBObject) -> str | OTBObject: """Append vsi prefixes to file URL or path if needed. Args: -- GitLab From e9aab191189d319fcfb6a53f8b34e0901564c097 Mon Sep 17 00:00:00 2001 From: Vincent Delbar <vincent.delbar@latelescop.fr> Date: Fri, 4 Aug 2023 18:16:48 +0200 Subject: [PATCH 4/9] ENH: do not pass OTBObjects to add_vsi_prefix() --- pyotb/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyotb/core.py b/pyotb/core.py index f6c28f8..3fdc012 100644 --- a/pyotb/core.py +++ b/pyotb/core.py @@ -731,7 +731,7 @@ class App(OTBObject): key, ) try: - if self.is_input(key): + if self.is_input(key) and isinstance(obj, (str, Path)): if self.is_key_images_list(key): self.__set_param(key, [add_vsi_prefix(p) for p in obj]) else: @@ -1581,7 +1581,7 @@ class Output(OTBObject): return str(self.filepath) -def add_vsi_prefix(filepath: str | Path | OTBObject) -> str | OTBObject: +def add_vsi_prefix(filepath: str | Path) -> str: """Append vsi prefixes to file URL or path if needed. Args: -- GitLab From afbf936128fd59d899b8e0842ad8052528bea37a Mon Sep 17 00:00:00 2001 From: Vincent Delbar <vincent.delbar@latelescop.fr> Date: Fri, 4 Aug 2023 19:09:46 +0200 Subject: [PATCH 5/9] Revert "ENH: do not pass OTBObjects to add_vsi_prefix()" This reverts commit e9aab191189d319fcfb6a53f8b34e0901564c097. --- pyotb/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyotb/core.py b/pyotb/core.py index 3fdc012..f6c28f8 100644 --- a/pyotb/core.py +++ b/pyotb/core.py @@ -731,7 +731,7 @@ class App(OTBObject): key, ) try: - if self.is_input(key) and isinstance(obj, (str, Path)): + if self.is_input(key): if self.is_key_images_list(key): self.__set_param(key, [add_vsi_prefix(p) for p in obj]) else: @@ -1581,7 +1581,7 @@ class Output(OTBObject): return str(self.filepath) -def add_vsi_prefix(filepath: str | Path) -> str: +def add_vsi_prefix(filepath: str | Path | OTBObject) -> str | OTBObject: """Append vsi prefixes to file URL or path if needed. Args: -- GitLab From cf57407cae3526d12227c7533d3c2f14cb14f6d1 Mon Sep 17 00:00:00 2001 From: Vincent Delbar <vincent.delbar@latelescop.fr> Date: Sat, 5 Aug 2023 00:43:26 +0200 Subject: [PATCH 6/9] ENH: remove add_vsi_prefix, add App.__check_input_param --- pyotb/core.py | 83 +++++++++++++++++++++++++++------------------------ 1 file changed, 44 insertions(+), 39 deletions(-) diff --git a/pyotb/core.py b/pyotb/core.py index f6c28f8..2a1336a 100644 --- a/pyotb/core.py +++ b/pyotb/core.py @@ -732,10 +732,7 @@ class App(OTBObject): ) try: if self.is_input(key): - if self.is_key_images_list(key): - self.__set_param(key, [add_vsi_prefix(p) for p in obj]) - else: - self.__set_param(key, add_vsi_prefix(obj)) + self.__set_param(key, self.__check_input_param(obj)) else: self.__set_param(key, obj) except (RuntimeError, TypeError, ValueError, KeyError) as e: @@ -1007,6 +1004,49 @@ class App(OTBObject): else: self.app.SetParameterValue(key, obj) + def __check_input_param(self, obj: OTBObject | str | Path) -> OTBObject | str: + """Check the type and value of an input param. + + If obj is a string, add vsi prefixes to file URL or path if needed. + + Args: + filepath: file path or URL + + Returns: + string with new /vsi prefix(es) + + """ + if isinstance(obj, list): + return [self.__check_input_param(o) for o in obj] + # May be we could add some checks here + if isinstance(obj, OTBObject): + return obj + if isinstance(obj, Path): + obj = str(obj) + if isinstance(obj, str): + if not obj.startswith("/vsi"): + # Remote file. TODO: add support for S3 / GS / AZ + if obj.startswith(("https://", "http://", "ftp://")): + obj = "/vsicurl/" + obj + # Compressed file + prefixes = { + ".tar": "vsitar", + ".tar.gz": "vsitar", + ".tgz": "vsitar", + ".gz": "vsigzip", + ".7z": "vsi7z", + ".zip": "vsizip", + ".rar": "vsirar", + } + expr = r"(.*?)(\.7z|\.zip|\.rar|\.tar\.gz|\.tgz|\.tar|\.gz)(.*)" + parts = re.match(expr, obj) + if parts: + file, ext = parts.group(1), parts.group(2) + if not Path(file + ext).is_dir(): + obj = f"/{prefixes[ext]}/{obj}" + return obj + raise TypeError(f"{self.name}: wrong input parameter type ({type(obj)})") + def __sync_parameters(self): """Save app parameters in _auto_parameters or data dict. @@ -1581,41 +1621,6 @@ class Output(OTBObject): return str(self.filepath) -def add_vsi_prefix(filepath: str | Path | OTBObject) -> str | OTBObject: - """Append vsi prefixes to file URL or path if needed. - - Args: - filepath: file path or URL - - Returns: - string with new /vsi prefix(es) - - """ - if isinstance(filepath, Path): - filepath = str(filepath) - if isinstance(filepath, str) and not filepath.startswith("/vsi"): - # Remote file. TODO: add support for S3 / GS / AZ - if filepath.startswith(("https://", "http://", "ftp://")): - filepath = "/vsicurl/" + filepath - # Compressed file - prefixes = { - ".tar": "vsitar", - ".tar.gz": "vsitar", - ".tgz": "vsitar", - ".gz": "vsigzip", - ".7z": "vsi7z", - ".zip": "vsizip", - ".rar": "vsirar", - } - expr = r"(.*?)(\.7z|\.zip|\.rar|\.tar\.gz|\.tgz|\.tar|\.gz)(.*)" - parts = re.match(expr, filepath) - if parts: - file, ext = parts.group(1), parts.group(2) - if not Path(file + ext).is_dir(): - filepath = f"/{prefixes[ext]}/{filepath}" - return filepath - - def get_nbchannels(inp: str | Path | OTBObject) -> int: """Get the nb of bands of input image. -- GitLab From 692ce5874165e993fd57a0094629ea735d52cc1d Mon Sep 17 00:00:00 2001 From: Vincent Delbar <vincent.delbar@latelescop.fr> Date: Sat, 5 Aug 2023 00:49:39 +0200 Subject: [PATCH 7/9] DOC: fix __check_input_param docstring and type hints --- pyotb/core.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pyotb/core.py b/pyotb/core.py index 2a1336a..d99cbf0 100644 --- a/pyotb/core.py +++ b/pyotb/core.py @@ -1004,16 +1004,14 @@ class App(OTBObject): else: self.app.SetParameterValue(key, obj) - def __check_input_param(self, obj: OTBObject | str | Path) -> OTBObject | str: + def __check_input_param(self, obj: list | OTBObject | str | Path) -> list | OTBObject | str: """Check the type and value of an input param. - If obj is a string, add vsi prefixes to file URL or path if needed. - Args: - filepath: file path or URL + obj: input parameter value Returns: - string with new /vsi prefix(es) + object, string with new /vsi prefix(es) if needed """ if isinstance(obj, list): -- GitLab From 46057bd597cb53554836e1b528cc29b7be527e4b Mon Sep 17 00:00:00 2001 From: Vincent Delbar <vincent.delbar@latelescop.fr> Date: Sat, 5 Aug 2023 13:19:48 +0200 Subject: [PATCH 8/9] STYLE: move __check_input_param func def before __set_param --- pyotb/core.py | 74 +++++++++++++++++++++++++-------------------------- 1 file changed, 37 insertions(+), 37 deletions(-) diff --git a/pyotb/core.py b/pyotb/core.py index d99cbf0..7a8a2cd 100644 --- a/pyotb/core.py +++ b/pyotb/core.py @@ -967,43 +967,6 @@ class App(OTBObject): kwargs.update({self.input_key: arg}) return kwargs - def __set_param( - self, key: str, obj: list | tuple | OTBObject | otb.Application | list[Any] - ): - """Set one parameter, decide which otb.Application method to use depending on target object.""" - if obj is None or (isinstance(obj, (list, tuple)) and not obj): - self.app.ClearValue(key) - return - # Single-parameter cases - if isinstance(obj, OTBObject): - self.app.ConnectImage(key, obj.app, obj.output_image_key) - elif isinstance( - obj, otb.Application - ): # this is for backward comp with plain OTB - self.app.ConnectImage(key, obj, get_out_images_param_keys(obj)[0]) - elif ( - key == "ram" - ): # SetParameterValue in OTB<7.4 doesn't work for ram parameter cf gitlab OTB issue 2200 - self.app.SetParameterInt("ram", int(obj)) - elif not isinstance(obj, list): # any other parameters (str, int...) - self.app.SetParameterValue(key, obj) - # Images list - elif self.is_key_images_list(key): - # To enable possible in-memory connections, we go through the list and set the parameters one by one - for inp in obj: - if isinstance(inp, OTBObject): - self.app.ConnectImage(key, inp.app, inp.output_image_key) - elif isinstance( - inp, otb.Application - ): # this is for backward comp with plain OTB - self.app.ConnectImage(key, obj, get_out_images_param_keys(inp)[0]) - else: # here `input` should be an image filepath - # Append `input` to the list, do not overwrite any previously set element of the image list - self.app.AddParameterStringList(key, inp) - # List of any other types (str, int...) - else: - self.app.SetParameterValue(key, obj) - def __check_input_param(self, obj: list | OTBObject | str | Path) -> list | OTBObject | str: """Check the type and value of an input param. @@ -1045,6 +1008,43 @@ class App(OTBObject): return obj raise TypeError(f"{self.name}: wrong input parameter type ({type(obj)})") + def __set_param( + self, key: str, obj: list | tuple | OTBObject | otb.Application | list[Any] + ): + """Set one parameter, decide which otb.Application method to use depending on target object.""" + if obj is None or (isinstance(obj, (list, tuple)) and not obj): + self.app.ClearValue(key) + return + # Single-parameter cases + if isinstance(obj, OTBObject): + self.app.ConnectImage(key, obj.app, obj.output_image_key) + elif isinstance( + obj, otb.Application + ): # this is for backward comp with plain OTB + self.app.ConnectImage(key, obj, get_out_images_param_keys(obj)[0]) + elif ( + key == "ram" + ): # SetParameterValue in OTB<7.4 doesn't work for ram parameter cf gitlab OTB issue 2200 + self.app.SetParameterInt("ram", int(obj)) + elif not isinstance(obj, list): # any other parameters (str, int...) + self.app.SetParameterValue(key, obj) + # Images list + elif self.is_key_images_list(key): + # To enable possible in-memory connections, we go through the list and set the parameters one by one + for inp in obj: + if isinstance(inp, OTBObject): + self.app.ConnectImage(key, inp.app, inp.output_image_key) + elif isinstance( + inp, otb.Application + ): # this is for backward comp with plain OTB + self.app.ConnectImage(key, obj, get_out_images_param_keys(inp)[0]) + else: # here `input` should be an image filepath + # Append `input` to the list, do not overwrite any previously set element of the image list + self.app.AddParameterStringList(key, inp) + # List of any other types (str, int...) + else: + self.app.SetParameterValue(key, obj) + def __sync_parameters(self): """Save app parameters in _auto_parameters or data dict. -- GitLab From 718fbf9a9e48c1bb7abaf7d7e07210314e609e7b Mon Sep 17 00:00:00 2001 From: Vincent Delbar <vincent.delbar@latelescop.fr> Date: Sat, 5 Aug 2023 16:50:02 +0200 Subject: [PATCH 9/9] STYLE: apply black --- pyotb/core.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pyotb/core.py b/pyotb/core.py index 7a8a2cd..0047dd5 100644 --- a/pyotb/core.py +++ b/pyotb/core.py @@ -967,7 +967,9 @@ class App(OTBObject): kwargs.update({self.input_key: arg}) return kwargs - def __check_input_param(self, obj: list | OTBObject | str | Path) -> list | OTBObject | str: + def __check_input_param( + self, obj: list | OTBObject | str | Path + ) -> list | OTBObject | str: """Check the type and value of an input param. Args: -- GitLab