DIY  3.0
data-parallel out-of-core C++ library
 All Classes Namespaces Functions Typedefs Groups Pages
numpy.hpp
1 #ifndef DIY_IO_NMPY_HPP
2 #define DIY_IO_NMPY_HPP
3 
4 #include <sstream>
5 #include <complex>
6 #include <stdexcept>
7 
8 #include "../serialization.hpp"
9 #include "bov.hpp"
10 
11 namespace diy
12 {
13 namespace io
14 {
15  class NumPy: public BOV
16  {
17  public:
18  NumPy(mpi::io::file& f):
19  BOV(f) {}
20 
21  unsigned word_size() const { return word_size_; }
22 
23  unsigned read_header()
24  {
25  BOV::Shape shape;
26  bool fortran;
27  size_t offset = parse_npy_header(shape, fortran);
28  if (fortran)
29  throw std::runtime_error("diy::io::NumPy cannot read data in fortran order");
30  BOV::set_offset(offset);
31  BOV::set_shape(shape);
32  return word_size_;
33  }
34 
35  template<class T>
36  void write_header(int dim, const DiscreteBounds& bounds);
37 
38  template<class T, class S>
39  void write_header(const S& shape);
40 
41  private:
42  inline size_t parse_npy_header(BOV::Shape& shape, bool& fortran_order);
43  void save(diy::BinaryBuffer& bb, const std::string& s) { bb.save_binary(s.c_str(), s.size()); }
44  template<class T>
45  inline void convert_and_save(diy::BinaryBuffer& bb, const T& x)
46  {
47  std::ostringstream oss;
48  oss << x;
49  save(bb, oss.str());
50  }
51 
52  private:
53  unsigned word_size_;
54  };
55 
56  namespace detail
57  {
58  inline char big_endian();
59  template<class T>
60  char map_numpy_type();
61  }
62 }
63 }
64 
65 // Modified from: https://github.com/rogersce/cnpy
66 // Copyright (C) 2011 Carl Rogers
67 // Released under MIT License
68 // license available at http://www.opensource.org/licenses/mit-license.php
69 size_t
70 diy::io::NumPy::
71 parse_npy_header(BOV::Shape& shape, bool& fortran_order)
72 {
73  char buffer[256];
74  file().read_at_all(0, buffer, 256);
75  std::string header(buffer, buffer + 256);
76  size_t nl = header.find('\n');
77  if (nl == std::string::npos)
78  throw std::runtime_error("parse_npy_header: failed to read the header");
79  header = header.substr(11, nl - 11 + 1);
80  size_t header_size = nl + 1;
81 
82  int loc1, loc2;
83 
84  //fortran order
85  loc1 = header.find("fortran_order")+16;
86  fortran_order = (header.substr(loc1,4) == "True" ? true : false);
87 
88  //shape
89  unsigned ndims;
90  loc1 = header.find("(");
91  loc2 = header.find(")");
92  std::string str_shape = header.substr(loc1+1,loc2-loc1-1);
93  if(str_shape[str_shape.size()-1] == ',') ndims = 1;
94  else ndims = std::count(str_shape.begin(),str_shape.end(),',')+1;
95  shape.resize(ndims);
96  for(unsigned int i = 0;i < ndims;i++) {
97  loc1 = str_shape.find(",");
98  shape[i] = atoi(str_shape.substr(0,loc1).c_str());
99  str_shape = str_shape.substr(loc1+1);
100  }
101 
102  //endian, word size, data type
103  //byte order code | stands for not applicable.
104  //not sure when this applies except for byte array
105  loc1 = header.find("descr")+9;
106  //bool littleEndian = (header[loc1] == '<' || header[loc1] == '|' ? true : false);
107  //assert(littleEndian);
108 
109  //char type = header[loc1+1];
110  //assert(type == map_type(T));
111 
112  std::string str_ws = header.substr(loc1+2);
113  loc2 = str_ws.find("'");
114  word_size_ = atoi(str_ws.substr(0,loc2).c_str());
115 
116  return header_size;
117 }
118 
119 template<class T>
120 void
121 diy::io::NumPy::
122 write_header(int dim, const DiscreteBounds& bounds)
123 {
124  std::vector<int> shape;
125  for (int i = 0; i < dim; ++i)
126  shape.push_back(bounds.max[i] - bounds.min[i] + 1);
127 
128  write_header< T, std::vector<int> >(shape);
129 }
130 
131 
132 template<class T, class S>
133 void
134 diy::io::NumPy::
135 write_header(const S& shape)
136 {
137  BOV::set_shape(shape);
138 
139  diy::MemoryBuffer dict;
140  save(dict, "{'descr': '");
141  diy::save(dict, detail::big_endian());
142  diy::save(dict, detail::map_numpy_type<T>());
143  convert_and_save(dict, sizeof(T));
144  save(dict, "', 'fortran_order': False, 'shape': (");
145  convert_and_save(dict, shape[0]);
146  for (int i = 1; i < (int) shape.size(); i++)
147  {
148  save(dict, ", ");
149  convert_and_save(dict, shape[i]);
150  }
151  if(shape.size() == 1) save(dict, ",");
152  save(dict, "), }");
153  //pad with spaces so that preamble+dict is modulo 16 bytes. preamble is 10 bytes. dict needs to end with \n
154  int remainder = 16 - (10 + dict.position) % 16;
155  for (int i = 0; i < remainder - 1; ++i)
156  diy::save(dict, ' ');
157  diy::save(dict, '\n');
158 
159  diy::MemoryBuffer header;
160  diy::save(header, (char) 0x93);
161  save(header, "NUMPY");
162  diy::save(header, (char) 0x01); // major version of numpy format
163  diy::save(header, (char) 0x00); // minor version of numpy format
164  diy::save(header, (unsigned short) dict.position);
165  header.save_binary(&dict.buffer[0], dict.buffer.size());
166 
167  BOV::set_offset(header.position);
168 
169  if (file().comm().rank() == 0)
170  file().write_at(0, &header.buffer[0], header.buffer.size());
171 }
172 
173 char
174 diy::io::detail::big_endian()
175 {
176  unsigned char x[] = {1,0};
177  void* x_void = x;
178  short y = *static_cast<short*>(x_void);
179  return y == 1 ? '<' : '>';
180 }
181 
182 namespace diy
183 {
184 namespace io
185 {
186 namespace detail
187 {
188 template<> inline char map_numpy_type<float>() { return 'f'; }
189 template<> inline char map_numpy_type<double>() { return 'f'; }
190 template<> inline char map_numpy_type<long double>() { return 'f'; }
191 
192 template<> inline char map_numpy_type<int>() { return 'i'; }
193 template<> inline char map_numpy_type<char>() { return 'i'; }
194 template<> inline char map_numpy_type<short>() { return 'i'; }
195 template<> inline char map_numpy_type<long>() { return 'i'; }
196 template<> inline char map_numpy_type<long long>() { return 'i'; }
197 
198 template<> inline char map_numpy_type<unsigned int>() { return 'u'; }
199 template<> inline char map_numpy_type<unsigned char>() { return 'u'; }
200 template<> inline char map_numpy_type<unsigned short>() { return 'u'; }
201 template<> inline char map_numpy_type<unsigned long>() { return 'u'; }
202 template<> inline char map_numpy_type<unsigned long long>() { return 'u'; }
203 
204 template<> inline char map_numpy_type<bool>() { return 'b'; }
205 
206 template<> inline char map_numpy_type< std::complex<float> >() { return 'c'; }
207 template<> inline char map_numpy_type< std::complex<double> >() { return 'c'; }
208 template<> inline char map_numpy_type< std::complex<long double> >() { return 'c'; }
209 }
210 }
211 }
212 
213 #endif
virtual void save_binary(const char *x, size_t count)=0
copy count bytes from x into the buffer
Definition: numpy.hpp:15
void save(BinaryBuffer &bb, const T &x)
Saves x to bb by calling diy::Serialization<T>::save(bb,x).
Definition: serialization.hpp:102
Definition: bov.hpp:16
Wraps MPI file IO.
Definition: io.hpp:16
Definition: serialization.hpp:26
Definition: types.hpp:16
virtual void save_binary(const char *x, size_t count) override
copy count bytes from x into the buffer
Definition: serialization.hpp:413
A serialization buffer.
Definition: serialization.hpp:19