Coverage for src/rechunk_data/tests/test_rechunk_netcdf.py: 100%
51 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-30 09:58 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-30 09:58 +0000
1"""Test the actual rechunk method."""
2import logging
3from pathlib import Path
4import time
5from tempfile import NamedTemporaryFile, TemporaryDirectory
7import dask
8import pytest
9from rechunk_data import rechunk_netcdf_file, rechunk_dataset
10from rechunk_data._rechunk import _save_dataset
13def test_rechunk_data_dir_with_overwrite(data_dir: Path) -> None:
14 """The the overwriting of files in a directory."""
15 data_files = {f.name: f.stat().st_atime for f in data_dir.rglob(".nc")}
16 with dask.config.set({"array.chunk-size": "2kiB"}):
17 rechunk_netcdf_file(data_dir)
18 new_files = {f.name: f.stat().st_atime for f in data_dir.rglob(".nc")}
19 assert len(data_files) == len(new_files)
20 assert list(data_files.keys()) == list(new_files.keys())
23def test_rechunk_data_dir_without_overwrite(data_dir: Path) -> None:
24 """Testing the creation of new datafiles from a folder."""
25 with TemporaryDirectory() as temp_dir:
26 rechunk_netcdf_file(data_dir, Path(temp_dir))
27 new_files = sorted(f.relative_to(temp_dir) for f in Path(temp_dir).rglob(".nc"))
28 old_files = sorted(f.relative_to(data_dir) for f in data_dir.rglob(".nc"))
29 assert new_files == old_files
32def test_rechunk_single_data_file(data_file: Path) -> None:
33 """Testing rechunking of single data files."""
35 a_time = float(data_file.stat().st_mtime)
36 time.sleep(0.5)
37 with dask.config.set({"array.chunk-size": "1MiB"}):
38 rechunk_netcdf_file(data_file)
39 assert a_time < float(data_file.stat().st_mtime)
40 with NamedTemporaryFile(suffix=".nc") as temp_file:
41 rechunk_netcdf_file(data_file, Path(temp_file.name))
42 assert Path(temp_file.name).exists()
45def test_rechunk_dataset(small_chunk_data) -> None:
46 """Test rechunking an xarray dataset."""
47 with dask.config.set({"array.chunk-size": "1MiB"}):
48 new_data = rechunk_dataset(small_chunk_data)
49 assert list(new_data.data_vars) == list(small_chunk_data.data_vars)
52def test_wrong_or_format(small_chunk_data, caplog) -> None:
53 """Testing wrong file format."""
54 caplog.clear()
55 caplog.set_level(logging.DEBUG)
56 with NamedTemporaryFile(suffix=".nc") as temp:
57 temp_file = Path(temp.name)
58 rechunk_netcdf_file(temp_file)
59 _, loglevel, message = caplog.record_tuples[-1]
60 assert loglevel == logging.ERROR
61 assert "Error while" in message
62 _save_dataset(small_chunk_data, temp_file, {}, "foo")
63 _, loglevel, message = caplog.record_tuples[-1]
64 _save_dataset(small_chunk_data, temp_file, {"foo": "bar"}, "foo")
65 _, loglevel, message = caplog.record_tuples[-1]
66 assert loglevel == logging.ERROR
69def test_wrong_engine(small_chunk_data) -> None:
70 with pytest.raises(ValueError):
71 rechunk_dataset(small_chunk_data, engine="foo")