Coverage for src/rechunk_data/tests/test_rechunk_netcdf.py: 100%

51 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-09-30 09:58 +0000

1"""Test the actual rechunk method.""" 

2import logging 

3from pathlib import Path 

4import time 

5from tempfile import NamedTemporaryFile, TemporaryDirectory 

6 

7import dask 

8import pytest 

9from rechunk_data import rechunk_netcdf_file, rechunk_dataset 

10from rechunk_data._rechunk import _save_dataset 

11 

12 

13def test_rechunk_data_dir_with_overwrite(data_dir: Path) -> None: 

14 """The the overwriting of files in a directory.""" 

15 data_files = {f.name: f.stat().st_atime for f in data_dir.rglob(".nc")} 

16 with dask.config.set({"array.chunk-size": "2kiB"}): 

17 rechunk_netcdf_file(data_dir) 

18 new_files = {f.name: f.stat().st_atime for f in data_dir.rglob(".nc")} 

19 assert len(data_files) == len(new_files) 

20 assert list(data_files.keys()) == list(new_files.keys()) 

21 

22 

23def test_rechunk_data_dir_without_overwrite(data_dir: Path) -> None: 

24 """Testing the creation of new datafiles from a folder.""" 

25 with TemporaryDirectory() as temp_dir: 

26 rechunk_netcdf_file(data_dir, Path(temp_dir)) 

27 new_files = sorted(f.relative_to(temp_dir) for f in Path(temp_dir).rglob(".nc")) 

28 old_files = sorted(f.relative_to(data_dir) for f in data_dir.rglob(".nc")) 

29 assert new_files == old_files 

30 

31 

32def test_rechunk_single_data_file(data_file: Path) -> None: 

33 """Testing rechunking of single data files.""" 

34 

35 a_time = float(data_file.stat().st_mtime) 

36 time.sleep(0.5) 

37 with dask.config.set({"array.chunk-size": "1MiB"}): 

38 rechunk_netcdf_file(data_file) 

39 assert a_time < float(data_file.stat().st_mtime) 

40 with NamedTemporaryFile(suffix=".nc") as temp_file: 

41 rechunk_netcdf_file(data_file, Path(temp_file.name)) 

42 assert Path(temp_file.name).exists() 

43 

44 

45def test_rechunk_dataset(small_chunk_data) -> None: 

46 """Test rechunking an xarray dataset.""" 

47 with dask.config.set({"array.chunk-size": "1MiB"}): 

48 new_data = rechunk_dataset(small_chunk_data) 

49 assert list(new_data.data_vars) == list(small_chunk_data.data_vars) 

50 

51 

52def test_wrong_or_format(small_chunk_data, caplog) -> None: 

53 """Testing wrong file format.""" 

54 caplog.clear() 

55 caplog.set_level(logging.DEBUG) 

56 with NamedTemporaryFile(suffix=".nc") as temp: 

57 temp_file = Path(temp.name) 

58 rechunk_netcdf_file(temp_file) 

59 _, loglevel, message = caplog.record_tuples[-1] 

60 assert loglevel == logging.ERROR 

61 assert "Error while" in message 

62 _save_dataset(small_chunk_data, temp_file, {}, "foo") 

63 _, loglevel, message = caplog.record_tuples[-1] 

64 _save_dataset(small_chunk_data, temp_file, {"foo": "bar"}, "foo") 

65 _, loglevel, message = caplog.record_tuples[-1] 

66 assert loglevel == logging.ERROR 

67 

68 

69def test_wrong_engine(small_chunk_data) -> None: 

70 with pytest.raises(ValueError): 

71 rechunk_dataset(small_chunk_data, engine="foo")