Rendering fails when VTKHDF file is greater than 2 GB in size
I would like to convert a stack of 2D PNG images into a VTKHDF file, and subsequently render a single slice of interest in an interactive window. When the VTKHDF file is less than 2 GB in size, the rendering works fine, but when it is greater than 2 GB, I get the following error.
HDF5-DIAG: Error detected in HDF5 (1.13.1) thread 0:
#000: C:\glr\builds\vtk\vtk-ci-ext\0\ThirdParty\hdf5\vtkhdf5\src\H5D.c line 1021 in vtkhdf5_H5Dread(): can't synchronously read data
major: Dataset
minor: Read failed
#001: C:\glr\builds\vtk\vtk-ci-ext\0\ThirdParty\hdf5\vtkhdf5\src\H5D.c line 970 in H5D__read_api_common(): can't read data
major: Dataset
minor: Read failed
#002: C:\glr\builds\vtk\vtk-ci-ext\0\ThirdParty\hdf5\vtkhdf5\src\H5VLcallback.c line 2079 in vtkhdf5_H5VL_dataset_read(): dataset read failed
major: Virtual Object Layer
minor: Read failed
#003: C:\glr\builds\vtk\vtk-ci-ext\0\ThirdParty\hdf5\vtkhdf5\src\H5VLcallback.c line 2046 in H5VL__dataset_read(): dataset read failed
major: Virtual Object Layer
minor: Read failed
#004: C:\glr\builds\vtk\vtk-ci-ext\0\ThirdParty\hdf5\vtkhdf5\src\H5VLnative_dataset.c line 294 in vtkhdf5_H5VL__native_dataset_read(): can't read data
major: Dataset
minor: Read failed
#005: C:\glr\builds\vtk\vtk-ci-ext\0\ThirdParty\hdf5\vtkhdf5\src\H5Dio.c line 147 in vtkhdf5_H5D__read(): no output buffer
major: Invalid arguments to routine
minor: Bad value
2023-04-20 12:05:35.704 ( 10.539s) [ ]vtkHDFReaderImplementat:942 ERR| vtkHDFReader (0000027B47E4D300): Error H5Dread start: 0, 0, 0 count: 240, 3000, 3000
2023-04-20 12:05:35.755 ( 10.590s) [ ] vtkHDFReader.cxx:391 ERR| vtkHDFReader (0000027B47E4D300): Error reading array PNGImage
2023-04-20 12:05:35.764 ( 10.599s) [ ] vtkExecutive.cxx:741 ERR| vtkCompositeDataPipeline (0000027B47E52C20): Algorithm vtkHDFReader (0000027B47E4D300) returned failure for request: vtkInformation (0000027B47D383E0)
Debug: Off
Modified Time: 171
Reference Count: 1
Registered Events: (none)
Request: REQUEST_DATA
FORWARD_DIRECTION: 0
ALGORITHM_AFTER_FORWARD: 1
FROM_OUTPUT_PORT: 0
2023-04-20 12:05:36.350 ( 11.186s) [ ] vtkImageData.cxx:1412 ERR| vtkImageData (0000027B47F0C870): No Scalar Field has been specified - assuming 1 component!
Methods to reproduce:
Data: I'm providing an example png image file as an attachment here. Please copy this file to a separate folder on your system and run the following script to generate several copies of the file in the same folder. We will create 300 copies and give each of them a unique name.
import shutil
folderName = r'path\to\data'
fileName = 'example00000.png'
for k in range(1, 300):
shutil.copy2(folderName + '\\' + fileName, folderName + r'\\example{}.png'.format(str(k).zfill(5))
Code: The code below will take as input the path to the folder containing these images (in folder_path variable) and create an HDF file in the same folder, plot a single slice using matplotlib, and then render the first slice using VTK. The num_images variable controls how many images you want to use to create the HDF file. If it is over 239, the resulting HDF file is over 2 GB and throws an error.
import h5py
from PIL import Image
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import glob
import vtk
def create_vtkhdf_dataset(output_file, image_dir, image_height, image_width, num_images, pixel_size_xy, pixel_size_z):
with h5py.File(output_file,'w') as hdffile:
# write support data
vtkhdf_group = hdffile.create_group("VTKHDF")
vtkhdf_group.attrs.create("Version", [1, 0])
vtkhdf_group.attrs.create("Type", np.string_("ImageData"))
whole_extent = (0, image_width-1, 0, image_height-1, 0, num_images-1)
vtkhdf_group.attrs.create("WholeExtent", whole_extent)
vtkhdf_group.attrs.create("Origin", (0.0, 0.0, 0.0))
vtkhdf_group.attrs.create("Spacing", (pixel_size_xy, pixel_size_xy, pixel_size_z))
vtkhdf_group.attrs.create("Direction", (1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0))
# create the pointdata group and the dataset inside it
field_data_group = vtkhdf_group.create_group("PointData")
field_data_group.attrs.create('Scalars', np.string_("PNGImage"))
dset = field_data_group.create_dataset('PNGImage',dtype=np.uint8,shape=(num_images,image_height,image_width))
image_filenames = glob.glob(image_dir + '\\*.png')
# Iterate over the PNG image files and add them to the dataset
for i, filename in enumerate(tqdm(image_filenames[:num_images])):
with Image.open(filename) as img:
img = np.asarray(img)
dset[i, :, :] = img.reshape((image_height, image_width))
# To access individual 2D image slices from the HDF5 file, we can define a method like this:
def get_image_slice(hdf5_file, index):
with h5py.File(hdf5_file, "r") as f:
img_data = f["VTKHDF"]["PointData"]["PNGImage"][index, :, :]
return img_data
def render_data(file):
reader = vtk.vtkHDFReader()
reader.SetFileName(file)
reader.Update()
# Instead, this worked when I took the reader output, but only when num_images = 238, not 240
outDS = reader.GetOutput()
outDS.GetPointData().SetScalars(outDS.GetPointData().GetArray("PNGImage"))
imageXY = vtk.vtkExtractVOI()
imageXY.SetInputConnection(reader.GetOutputPort())
imageXY.SetVOI(0, 2999, 0, 2999, 0, 0)
# This did not work - got the same error 'No scalar Field has been specified - assuming 1 component!'
# outDS = imageXY.GetOutput()
# outDS.GetPointData().SetScalars(outDS.GetPointData().GetArray("PNGImage"))
imageXY.Update()
XYSliceActor = vtk.vtkImageActor()
XYSliceActor.SetPosition(-1500, -1500, -500)
XYSliceActor.GetMapper().SetInputConnection(imageXY.GetOutputPort())
ip = vtk.vtkImageProperty()
ip.SetColorWindow(255)
ip.SetColorLevel(128)
ip.SetAmbient(0.0)
ip.SetDiffuse(1.0)
ip.SetOpacity(1.0)
ip.SetInterpolationTypeToLinear()
XYSliceActor.SetProperty(ip)
XYSliceActor.Update()
colors = vtk.vtkNamedColors()
# Create the Renderer
renderer = vtk.vtkRenderer()
renderer.AddActor(XYSliceActor)
renderer.ResetCamera()
renderer.SetBackground(colors.GetColor3d('Silver'))
# Create the RendererWindow
renderer_window = vtk.vtkRenderWindow()
renderer_window.AddRenderer(renderer)
renderer_window.SetWindowName('ReadImageData')
# Create the RendererWindowInteractor and display the VTKHDF file
interactor = vtk.vtkRenderWindowInteractor()
interactor.SetRenderWindow(renderer_window)
interactor.Initialize()
interactor.Start()
if __name__ == '__main__':
# Define the paths to the PNG images and the output HDF5 file
# Specify path to the PNG images
folder_path = r'path\to\data'
image_dir = folder_path
output_file = folder_path + '\stack.hdf'
image_height = 3000
image_width = 3000
# Works when this is 238, but not 240
num_images = 240
pixel_size = 1
pixel_size_z = 1
# Create the dataset
create_vtkhdf_dataset(output_file, image_dir, image_height, image_width, num_images, pixel_size, pixel_size_z)
# The method takes the path to the HDF5 file and the index of the image slice to retrieve.
# It returns the image data as a numpy array.
# Example usage:
img_slice = get_image_slice(output_file, num_images - 10)
plt.imshow(img_slice)
plt.show()
render_data(output_file)
Any help would be really appreciated. The goal here is to find a format that will let me convert a large number of 2D png images into a format with significantly fewer files and render a single slice. The memory on my computer is limited, so I do need the file format conversion to happen one image at a time. I will not be able to create a numpy array with all the data beforehand. The rendering should not read the full dataset into memory either, just the requested 2D slice.
Tagging @julien.fausty based on our discussion on Discourse.
Please let me know if you need any additional information.
Thanks, Chaitanya