C++使用HDF5库实现将h5图像转为tif格式_C/C++

本文介绍基于c++ 语言的hdf5库与gdal库，将.h5格式的多波段hdf5图像批量转换为.tif格式的方法；其中，本方法支持对szip压缩的hdf5图像（例如高分一号卫星遥感影像）加以转换。

将hdf5图像批量转换为.tif格式，在部分场景下操作并不难——在我们之前的文章python中arcpy实现栅格图像文件由hdf格式批量转换为tiff格式中，就介绍过基于python中的arcpy模块实现这一需求的方法。但是，正如我们在文末补充内容中提到的那样，由于szip这个压缩模块不再受到hdf5库的支持，导致用szip程序压缩的hdf5图像，比如高分系列遥感影像的.h5文件，就没办法在windows中通过python的h5py、gdal等库直接打开了。

那么在这里，我们就介绍一下基于c++ 语言的hdf5库，打开.h5格式图像（包括那些用到szip压缩程序的hdf5图像）的方法。不过需要注意，我这里是在linux的ubuntu系统中操作的，至少可以保证这个代码在linux下可以正常运行；但能否在windows中的c++ 环境下也正常运行，我暂时还没试过——按道理应该也是可行的，大家如果有需要的话可以尝试一下。

本文所用代码如下。

#include <iostream>
#include <sstream>
#include <vector>
#include <filesystem>
#include <gdal.h>
#include <gdal_priv.h>
#include "hdf5.h"
#include "ogr_spatialref.h" 

int main(int argc, char *argv[]) {
    const std::string h5_path = "/home/ctj/data/h5/";
    const std::string tif_path = "/home/ctj/data/tiff_48sub/";
    // const std::string h5_path = argv[1];
    // const std::string tif_path = argv[2];
    const char *dataset_0 = "/cloud_mask/cloudmask";
    const char *dataset_1 = "/geometriccorrection/dataset_16_1";
    const char *dataset_2 = "/geometriccorrection/dataset_16_2";
    const char *dataset_3 = "/geometriccorrection/dataset_16_3";
    const char *dataset_4 = "/geometriccorrection/dataset_16_4";
    const char *projection_para = "projectionpara";
    const char *projection_str = "projectionstr";

    hid_t file_id;
    hid_t dataset_id;
    hid_t attr_id;
    hid_t attr_dtype;
    herr_t status;
    hid_t mem_type_id = h5t_native_uint16;
    int size = 6863;
    int band_num = 5;

    // namespace fs = filesystem;

    status = h5open();
    gdalallregister();

    for (const auto& entry : std::filesystem::directory_iterator(h5_path)) {
        if (entry.path().extension() == ".h5") {
            std::string filename = entry.path().filename().string();
            std::cout << filename << std::endl;
            std::string basename = filename.substr(0, filename.find_last_of('.'));
            const std::string output_filename = tif_path + basename + ".tif";

            file_id = h5fopen((h5_path + filename).c_str(), h5f_acc_rdonly, h5p_default);

            attr_id = h5aopen(file_id, projection_para, h5p_default);
            attr_dtype = h5aget_type(attr_id);
            size_t string_length = h5tget_size(attr_dtype);
            char *attr_data = new char[1000];
            status = h5aread(attr_id, attr_dtype, attr_data);

            std::istringstream iss(attr_data);
            std::vector<double> transform(6);
            int i = 0;
            std::string str;
            while (getline(iss, str, ',')) {
                    transform[i] = stod(str);
                    ++i;
                }

            attr_id = h5aopen(file_id, projection_str, h5p_default);
            attr_dtype = h5aget_type(attr_id);
            char *attr_data_str = new char[1000];
            status = h5aread(attr_id, attr_dtype, attr_data_str);

            dataset_id = h5dopen1(file_id, dataset_0);
            std::vector<u_int16_t> data_0(size * size);
            status = h5dread(dataset_id, mem_type_id, h5s_all, h5s_all, h5p_default, data_0.data());

            dataset_id = h5dopen1(file_id, dataset_1);
            std::vector<u_int16_t> data_1(size * size);
            status = h5dread(dataset_id, mem_type_id, h5s_all, h5s_all, h5p_default, data_1.data());

            dataset_id = h5dopen1(file_id, dataset_2);
            std::vector<u_int16_t> data_2(size * size);
            status = h5dread(dataset_id, mem_type_id, h5s_all, h5s_all, h5p_default, data_2.data());

            dataset_id = h5dopen1(file_id, dataset_3);
            std::vector<u_int16_t> data_3(size * size);
            status = h5dread(dataset_id, mem_type_id, h5s_all, h5s_all, h5p_default, data_3.data());

            dataset_id = h5dopen1(file_id, dataset_4);
            std::vector<u_int16_t> data_4(size * size);
            status = h5dread(dataset_id, mem_type_id, h5s_all, h5s_all, h5p_default, data_4.data());

            status = h5fclose(file_id);

            gdaldriver *podriver = getgdaldrivermanager()->getdriverbyname("gtiff");

            gdaldataset *podstds = podriver->create(output_filename.c_str(), size, size, band_num, gdt_uint16, nullptr);
            u_int16_t *band_data_0 = &data_0[0];
            podstds->getrasterband(1)->rasterio(gf_write, 0, 0, size, size, band_data_0, size, size, gdt_uint16, 0, 0);
            u_int16_t *band_data_1 = &data_1[0];
            podstds->getrasterband(2)->rasterio(gf_write, 0, 0, size, size, band_data_1, size, size, gdt_uint16, 0, 0);
            u_int16_t *band_data_2 = &data_2[0];
            podstds->getrasterband(3)->rasterio(gf_write, 0, 0, size, size, band_data_2, size, size, gdt_uint16, 0, 0);
            u_int16_t *band_data_3 = &data_3[0];
            podstds->getrasterband(4)->rasterio(gf_write, 0, 0, size, size, band_data_3, size, size, gdt_uint16, 0, 0);
            u_int16_t *band_data_4 = &data_4[0];
            podstds->getrasterband(5)->rasterio(gf_write, 0, 0, size, size, band_data_4, size, size, gdt_uint16, 0, 0);

            for (int i = 1; i <= band_num; ++i) {
                gdalrasterband *poband = podstds->getrasterband(i);
                if (poband != nullptr) {
                    poband->setnodatavalue(0);
                }
            }

            podstds->setgeotransform(transform.data());
            podstds->setprojection(attr_data_str);

            gdalclose(podstds);
        }
    }

    status = h5close();
    return 0;
}

上述是本文完整代码。接下来，就分段介绍一下每段代码的具体含义。

首先，需要包含必要的头文件。在这里，包括标准输入输出、字符串流、向量、文件系统等功能，以及hdf5库与gdal库。同时，定义了两个常量字符串h5_path与tif_path，分别指向转换前的hdf5图像和转换后的tiff图像的目录。

#include <iostream>
#include <sstream>
#include <vector>
#include <filesystem>
#include <gdal.h>
#include <gdal_priv.h>
#include "hdf5.h"
#include "ogr_spatialref.h" 

int main(int argc, char *argv[]) {
    const std::string h5_path = "/home/ctj/data/h5/";
    const std::string tif_path = "/home/ctj/data/tiff_48sub/";

随后，设定要读取的hdf5图像的数据集（波段）的路径，以及空间参考信息的属性名称；这些参数大家就按照自己hdf5图像的实际情况来修改即可。

接下来，初始化hdf5库的状态变量，这些变量是hdf5库操作需要的。同时，用size表示图像的宽度和高度，因为我这里hdf5图像是正方形，所以只需指定1个值。此外，band_num表示待转换遥感影像的波段数。

const char *dataset_0 = "/cloud_mask/cloudmask";
const char *dataset_1 = "/geometriccorrection/dataset_16_1";
// ... 省略部分代码 ...
const char *projection_para = "projectionpara";
const char *projection_str = "projectionstr";

hid_t file_id;
hid_t dataset_id;
hid_t attr_id;
hid_t attr_dtype;
herr_t status;
hid_t mem_type_id = h5t_native_uint16;
int size = 6863;
int band_num = 5;

紧接着，初始化hdf5库，注册所有可用的gdal驱动程序。

status = h5open();
gdalallregister();

随后，使用std::filesystem::directory_iterator遍历指定目录中的所有文件，并只处理扩展名为.h5的文件；对于这些文件，构建输出文件名——基于原始文件名，去掉扩展名并添加.tif扩展名。

for (const auto& entry : std::filesystem::directory_iterator(h5_path)) {
    if (entry.path().extension() == ".h5") {
        std::string filename = entry.path().filename().string();
        std::cout << filename << std::endl;
        std::string basename = filename.substr(0, filename.find_last_of('.'));
        const std::string output_filename = tif_path + basename + ".tif";

随后，使用h5fopen打开hdf5图像，在这里选择以只读模式访问。

file_id = h5fopen((h5_path + filename).c_str(), h5f_acc_rdonly, h5p_default);

随后，需要读取原本hdf5图像的空间参考信息。在这里，首先打开名为projection_para的属性，读取其内容到attr_data中；随后，解析attr_data为一个包含6个元素的double向量transform——这些元素用于地理变换。

attr_id = h5aopen(file_id, projection_para, h5p_default);
attr_dtype = h5aget_type(attr_id);
size_t string_length = h5tget_size(attr_dtype);
char *attr_data = new char[1000];
status = h5aread(attr_id, attr_dtype, attr_data);

std::istringstream iss(attr_data);
std::vector<double> transform(6);
int i = 0;
std::string str;
while (getline(iss, str, ',')) {
    transform[i] = stod(str);
    ++i;
}

类似地，读取名为projection_str的属性，该属性包含投影信息的wkt字符串。

attr_id = h5aopen(file_id, projection_str, h5p_default);
attr_dtype = h5aget_type(attr_id);
char *attr_data_str = new char[1000];
status = h5aread(attr_id, attr_dtype, attr_data_str);

到这里，我们就可以对每个数据集调用h5dopen1将其打开，并使用h5dread将数据读入向量中

dataset_id = h5dopen1(file_id, dataset_0);
std::vector<u_int16_t> data_0(size * size);
status = h5dread(dataset_id, mem_type_id, h5s_all, h5s_all, h5p_default, data_0.data());

// ... 重复上述步骤读取其他数据集 ...

随后，记得关闭hdf5图像以释放资源。

status = h5fclose(file_id);

接下来，就该gdal库登场了。使用gdal库创建一个新的tiff文件，并使用rasterio方法将每个波段的数据写入到tiff文件中。

gdaldriver *podriver = getgdaldrivermanager()->getdriverbyname("gtiff");

gdaldataset *podstds = podriver->create(output_filename.c_str(), size, size, band_num, gdt_uint16, nullptr);
u_int16_t *band_data_0 = &data_0[0];
podstds->getrasterband(1)->rasterio(gf_write, 0, 0, size, size, band_data_0, size, size, gdt_uint16, 0, 0);
// ... 写入其他波段 ...

同时，设置每个波段的nodata值为0，同时按照前述从hdf5图像中读取到的信息，设置tiff图像的地理变换参数和投影信息。

for (int i = 1; i <= band_num; ++i) {
    gdalrasterband *poband = podstds->getrasterband(i);
    if (poband != nullptr) {
        poband->setnodatavalue(0);
    }
}

podstds->setgeotransform(transform.data());
podstds->setprojection(attr_data_str);

gdalclose(podstds);

最后，不要忘记关闭hdf5库以释放资源。

status = h5close();

至此，大功告成。

知识补充

windows打开hdf5图像：hdfview软件的下载、安装

下面为大家介绍在windows电脑中，下载、安装用以查看hdf5图像数据的软件hdfview的方法。

hdf5（hierarchical data format 5）是一种用于存储和管理大量科学数据的文件格式，其由hdf group开发和维护，广泛应用于科学计算、工程、金融和医学等领域。谈及hdf5图像数据在windows中的打开方式，主要包括基于hdf group开发的hdfview软件来打开，以及用c++、python来打开等2种方式。

在之前，我很少选择用hdfview软件来打开hdf5，因为早些时候这个软件的安装比较麻烦，还需要修改一下环境变量什么的，不如在python中配置对应的库（比如h5py、gdal等）然后用代码读取来的容易。但是，后来发现由于szip这个压缩模块不再受到hdf5等库的支持（我看网上说好像是因为这个库不再是非盈利的了还是怎么），导致那些用到szip压缩的hdf5图像（比如高分系列遥感影像数据的.h5文件），就没办法在windows中通过python的h5py、gdal等方便地打开了（linux下c++ 的hdf5库我试了，还是可以正常打开的，但是windows中c++ 的hdf5库是否能打开我还没试过）。所以，在windows中，如果只是需要打开、查看一下数据的话（不需要代码执行一些分析或批处理），通过hdfview软件来打开hdf5还是很方便的。

这里就介绍一下hdfview软件的下载、安装方法。

首先，我们打开hdfview软件的github下载网站（https://github.com/hdfgroup/hdfview/releases）。当然，也可以先进入官方下载网站（https://portal.hdfgroup.org/downloads/index.html），找到其中的hdfview软件下载位置，如下图所示。