DIY  3.0
data-parallel out-of-core C++ library
 All Classes Namespaces Functions Typedefs Groups Pages
storage.hpp
1 #ifndef DIY_STORAGE_HPP
2 #define DIY_STORAGE_HPP
3 
4 #include <string>
5 #include <map>
6 #include <fstream>
7 #include <fcntl.h>
8 
9 #include "serialization.hpp"
10 #include "thread.hpp"
11 #include "log.hpp"
12 #include "io/utils.hpp"
13 
14 namespace diy
15 {
16  namespace detail
17  {
18  typedef void (*Save)(const void*, BinaryBuffer& buf);
19  typedef void (*Load)(void*, BinaryBuffer& buf);
20 
21  struct FileBuffer: public BinaryBuffer
22  {
23  FileBuffer(FILE* file_): file(file_), head(0), tail(0) {}
24 
25  // TODO: add error checking
26  virtual inline void save_binary(const char* x, size_t count) override { fwrite(x, 1, count, file); head += count; }
27  virtual inline void load_binary(char* x, size_t count) override { fread(x, 1, count, file); }
28  virtual inline void load_binary_back(char* x, size_t count) override { fseek(file, static_cast<long>(tail), SEEK_END); fread(x, 1, count, file); tail += count; fseek(file, static_cast<long>(head), SEEK_SET); }
29 
30  size_t size() const { return head; }
31 
32  FILE* file;
33  size_t head, tail; // tail is used to support reading from the back;
34  // the mechanism is a little awkward and unused, but should work if needed
35  };
36  }
37 
39  {
40  public:
41  virtual int put(MemoryBuffer& bb) =0;
42  virtual int put(const void* x, detail::Save save) =0;
43  virtual void get(int i, MemoryBuffer& bb, size_t extra = 0) =0;
44  virtual void get(int i, void* x, detail::Load load) =0;
45  virtual void destroy(int i) =0;
46  };
47 
49  {
50  private:
51  struct FileRecord
52  {
53  size_t size;
54  std::string name;
55  };
56 
57  public:
58  FileStorage(const std::string& filename_template = "/tmp/DIY.XXXXXX"):
59  filename_templates_(1, filename_template),
60  count_(0), current_size_(0), max_size_(0) {}
61 
62  FileStorage(const std::vector<std::string>& filename_templates):
63  filename_templates_(filename_templates),
64  count_(0), current_size_(0), max_size_(0) {}
65 
66  virtual int put(MemoryBuffer& bb) override
67  {
68  auto log = get_logger();
69  std::string filename;
70  int fh = open_random(filename);
71 
72  log->debug("FileStorage::put(): {}; buffer size: {}", filename, bb.size());
73 
74  size_t sz = bb.buffer.size();
75 #if defined(_WIN32)
76  using r_type = int;
77  r_type written = _write(fh, &bb.buffer[0], static_cast<unsigned int>(sz));
78 #else
79  using r_type = ssize_t;
80  r_type written = write(fh, &bb.buffer[0], sz);
81 #endif
82  if (written < static_cast<r_type>(sz) || written == r_type(-1))
83  log->warn("Could not write the full buffer to {}: written = {}; size = {}", filename, written, sz);
84  io::utils::close(fh);
85  bb.wipe();
86 
87 #if 0 // double-check the written file size: only for extreme debugging
88  FILE* fp = fopen(filename.c_str(), "r");
89  fseek(fp, 0L, SEEK_END);
90  int fsz = ftell(fp);
91  if (fsz != sz)
92  log->warn("file size doesn't match the buffer size, {} vs {}", fsz, sz);
93  fclose(fp);
94 #endif
95 
96  return make_file_record(filename, sz);
97  }
98 
99  virtual int put(const void* x, detail::Save save) override
100  {
101  std::string filename;
102  int fh = open_random(filename);
103 #if defined(_WIN32)
104  detail::FileBuffer fb(_fdopen(fh, "wb"));
105 #else
106  detail::FileBuffer fb(fdopen(fh, "w"));
107 #endif
108  save(x, fb);
109  size_t sz = fb.size();
110  fclose(fb.file);
111  io::utils::sync(fh);
112 
113  return make_file_record(filename, sz);
114  }
115 
116  virtual void get(int i, MemoryBuffer& bb, size_t extra) override
117  {
118  FileRecord fr = extract_file_record(i);
119 
120  get_logger()->debug("FileStorage::get(): {}", fr.name);
121 
122  bb.buffer.reserve(fr.size + extra);
123  bb.buffer.resize(fr.size);
124 #if defined(_WIN32)
125  int fh = -1;
126  _sopen_s(&fh, fr.name.c_str(), _O_RDONLY | _O_BINARY, _SH_DENYNO, _S_IREAD);
127  _read(fh, &bb.buffer[0], static_cast<unsigned int>(fr.size));
128 #else
129  int fh = open(fr.name.c_str(), O_RDONLY | O_SYNC, 0600);
130  read(fh, &bb.buffer[0], fr.size);
131 #endif
132  io::utils::close(fh);
133  remove_file(fr);
134  }
135 
136  virtual void get(int i, void* x, detail::Load load) override
137  {
138  FileRecord fr = extract_file_record(i);
139 
140  //int fh = open(fr.name.c_str(), O_RDONLY | O_SYNC, 0600);
141 #if defined(_WIN32)
142  int fh = -1;
143  _sopen_s(&fh, fr.name.c_str(), _O_RDONLY | _O_BINARY, _SH_DENYNO, _S_IREAD);
144  detail::FileBuffer fb(_fdopen(fh, "rb"));
145 #else
146  int fh = open(fr.name.c_str(), O_RDONLY, 0600);
147  detail::FileBuffer fb(fdopen(fh, "r"));
148 #endif
149  load(x, fb);
150  fclose(fb.file);
151 
152  remove_file(fr);
153  }
154 
155  virtual void destroy(int i) override
156  {
157  FileRecord fr;
158  {
159  CriticalMapAccessor accessor = filenames_.access();
160  fr = (*accessor)[i];
161  accessor->erase(i);
162  }
163  io::utils::remove(fr.name);
164  (*current_size_.access()) -= fr.size;
165  }
166 
167  int count() const { return (*count_.const_access()); }
168  size_t current_size() const { return (*current_size_.const_access()); }
169  size_t max_size() const { return (*max_size_.const_access()); }
170 
171  ~FileStorage()
172  {
173  for (FileRecordMap::const_iterator it = filenames_.const_access()->begin();
174  it != filenames_.const_access()->end();
175  ++it)
176  {
177  io::utils::remove(it->second.name);
178  }
179  }
180 
181  private:
182  int open_random(std::string& filename) const
183  {
184  if (filename_templates_.size() == 1)
185  filename = filename_templates_[0].c_str();
186  else
187  {
188  // pick a template at random (very basic load balancing mechanism)
189  filename = filename_templates_[static_cast<size_t>(std::rand()) % filename_templates_.size()].c_str();
190  }
191  int fh = diy::io::utils::mkstemp(filename);
192  return fh;
193  }
194 
195  int make_file_record(const std::string& filename, size_t sz)
196  {
197  int res = (*count_.access())++;
198  FileRecord fr = { sz, filename };
199  (*filenames_.access())[res] = fr;
200 
201  // keep track of sizes
202  critical_resource<size_t>::accessor cur = current_size_.access();
203  *cur += sz;
204  critical_resource<size_t>::accessor max = max_size_.access();
205  if (*cur > *max)
206  *max = *cur;
207 
208  return res;
209  }
210 
211  FileRecord extract_file_record(int i)
212  {
213  CriticalMapAccessor accessor = filenames_.access();
214  FileRecord fr = (*accessor)[i];
215  accessor->erase(i);
216  return fr;
217  }
218 
219  void remove_file(const FileRecord& fr)
220  {
221  io::utils::remove(fr.name);
222  (*current_size_.access()) -= fr.size;
223  }
224 
225  private:
226  typedef std::map<int, FileRecord> FileRecordMap;
229 
230  private:
231  std::vector<std::string> filename_templates_;
232  CriticalMap filenames_;
233  critical_resource<int> count_;
234  critical_resource<size_t> current_size_, max_size_;
235  };
236 }
237 
238 #endif
void load(BinaryBuffer &bb, T &x)
Loads x from bb by calling diy::Serialization<T>::load(bb,x).
Definition: serialization.hpp:106
virtual void save_binary(const char *x, size_t count)=0
copy count bytes from x into the buffer
Definition: storage.hpp:38
void save(BinaryBuffer &bb, const T &x)
Saves x to bb by calling diy::Serialization<T>::save(bb,x).
Definition: serialization.hpp:102
Definition: critical-resource.hpp:15
Definition: serialization.hpp:26
virtual void load_binary(char *x, size_t count)=0
copy count bytes into x from the buffer
Definition: storage.hpp:48
virtual void load_binary_back(char *x, size_t count)=0
copy count bytes into x from the back of the buffer
Definition: critical-resource.hpp:32