DIY  3.0
data-parallel out-of-core C++ library
 All Classes Namespaces Functions Typedefs Groups Pages
storage.hpp
1 #ifndef DIY_STORAGE_HPP
2 #define DIY_STORAGE_HPP
3 
4 #include <string>
5 #include <map>
6 #include <fstream>
7 
8 #include <unistd.h> // mkstemp() on Mac
9 #include <cstdlib> // mkstemp() on Linux
10 #include <cstdio> // remove()
11 #include <fcntl.h>
12 
13 #include "serialization.hpp"
14 #include "thread.hpp"
15 #include "log.hpp"
16 
17 namespace diy
18 {
19  namespace detail
20  {
21  typedef void (*Save)(const void*, BinaryBuffer& buf);
22  typedef void (*Load)(void*, BinaryBuffer& buf);
23 
24  struct FileBuffer: public BinaryBuffer
25  {
26  FileBuffer(FILE* file_): file(file_), head(0), tail(0) {}
27 
28  // TODO: add error checking
29  virtual inline void save_binary(const char* x, size_t count) override { fwrite(x, 1, count, file); head += count; }
30  virtual inline void load_binary(char* x, size_t count) override { fread(x, 1, count, file); }
31  virtual inline void load_binary_back(char* x, size_t count) override { fseek(file, tail, SEEK_END); fread(x, 1, count, file); tail += count; fseek(file, head, SEEK_SET); }
32 
33  size_t size() const { return head; }
34 
35  FILE* file;
36  size_t head, tail; // tail is used to support reading from the back;
37  // the mechanism is a little awkward and unused, but should work if needed
38  };
39  }
40 
42  {
43  public:
44  virtual int put(MemoryBuffer& bb) =0;
45  virtual int put(const void* x, detail::Save save) =0;
46  virtual void get(int i, MemoryBuffer& bb, size_t extra = 0) =0;
47  virtual void get(int i, void* x, detail::Load load) =0;
48  virtual void destroy(int i) =0;
49  };
50 
52  {
53  private:
54  struct FileRecord
55  {
56  size_t size;
57  std::string name;
58  };
59 
60  public:
61  FileStorage(const std::string& filename_template = "/tmp/DIY.XXXXXX"):
62  filename_templates_(1, filename_template),
63  count_(0), current_size_(0), max_size_(0) {}
64 
65  FileStorage(const std::vector<std::string>& filename_templates):
66  filename_templates_(filename_templates),
67  count_(0), current_size_(0), max_size_(0) {}
68 
69  virtual int put(MemoryBuffer& bb) override
70  {
71  auto log = get_logger();
72  std::string filename;
73  int fh = open_random(filename);
74 
75  log->debug("FileStorage::put(): {}; buffer size: {}", filename, bb.size());
76 
77  size_t sz = bb.buffer.size();
78  size_t written = write(fh, &bb.buffer[0], sz);
79  if (written < sz || written == (size_t)-1)
80  log->warn("Could not write the full buffer to {}: written = {}; size = {}", filename, written, sz);
81  fsync(fh);
82  close(fh);
83  bb.wipe();
84 
85 #if 0 // double-check the written file size: only for extreme debugging
86  FILE* fp = fopen(filename.c_str(), "r");
87  fseek(fp, 0L, SEEK_END);
88  int fsz = ftell(fp);
89  if (fsz != sz)
90  log->warn("file size doesn't match the buffer size, {} vs {}", fsz, sz);
91  fclose(fp);
92 #endif
93 
94  return make_file_record(filename, sz);
95  }
96 
97  virtual int put(const void* x, detail::Save save) override
98  {
99  std::string filename;
100  int fh = open_random(filename);
101 
102  detail::FileBuffer fb(fdopen(fh, "w"));
103  save(x, fb);
104  size_t sz = fb.size();
105  fclose(fb.file);
106  fsync(fh);
107 
108  return make_file_record(filename, sz);
109  }
110 
111  virtual void get(int i, MemoryBuffer& bb, size_t extra) override
112  {
113  FileRecord fr = extract_file_record(i);
114 
115  get_logger()->debug("FileStorage::get(): {}", fr.name);
116 
117  bb.buffer.reserve(fr.size + extra);
118  bb.buffer.resize(fr.size);
119  int fh = open(fr.name.c_str(), O_RDONLY | O_SYNC, 0600);
120  read(fh, &bb.buffer[0], fr.size);
121  close(fh);
122 
123  remove_file(fr);
124  }
125 
126  virtual void get(int i, void* x, detail::Load load) override
127  {
128  FileRecord fr = extract_file_record(i);
129 
130  //int fh = open(fr.name.c_str(), O_RDONLY | O_SYNC, 0600);
131  int fh = open(fr.name.c_str(), O_RDONLY, 0600);
132  detail::FileBuffer fb(fdopen(fh, "r"));
133  load(x, fb);
134  fclose(fb.file);
135 
136  remove_file(fr);
137  }
138 
139  virtual void destroy(int i) override
140  {
141  FileRecord fr;
142  {
143  CriticalMapAccessor accessor = filenames_.access();
144  fr = (*accessor)[i];
145  accessor->erase(i);
146  }
147  remove(fr.name.c_str());
148  (*current_size_.access()) -= fr.size;
149  }
150 
151  int count() const { return (*count_.const_access()); }
152  size_t current_size() const { return (*current_size_.const_access()); }
153  size_t max_size() const { return (*max_size_.const_access()); }
154 
155  ~FileStorage()
156  {
157  for (FileRecordMap::const_iterator it = filenames_.const_access()->begin();
158  it != filenames_.const_access()->end();
159  ++it)
160  {
161  remove(it->second.name.c_str());
162  }
163  }
164 
165  private:
166  int open_random(std::string& filename) const
167  {
168  if (filename_templates_.size() == 1)
169  filename = filename_templates_[0].c_str();
170  else
171  {
172  // pick a template at random (very basic load balancing mechanism)
173  filename = filename_templates_[std::rand() % filename_templates_.size()].c_str();
174  }
175 #ifdef __MACH__
176  // TODO: figure out how to open with O_SYNC
177  int fh = mkstemp(const_cast<char*>(filename.c_str()));
178 #else
179  int fh = mkostemp(const_cast<char*>(filename.c_str()), O_WRONLY | O_SYNC);
180 #endif
181 
182  return fh;
183  }
184 
185  int make_file_record(const std::string& filename, size_t sz)
186  {
187  int res = (*count_.access())++;
188  FileRecord fr = { sz, filename };
189  (*filenames_.access())[res] = fr;
190 
191  // keep track of sizes
192  critical_resource<size_t>::accessor cur = current_size_.access();
193  *cur += sz;
194  critical_resource<size_t>::accessor max = max_size_.access();
195  if (*cur > *max)
196  *max = *cur;
197 
198  return res;
199  }
200 
201  FileRecord extract_file_record(int i)
202  {
203  CriticalMapAccessor accessor = filenames_.access();
204  FileRecord fr = (*accessor)[i];
205  accessor->erase(i);
206  return fr;
207  }
208 
209  void remove_file(const FileRecord& fr)
210  {
211  remove(fr.name.c_str());
212  (*current_size_.access()) -= fr.size;
213  }
214 
215  private:
216  typedef std::map<int, FileRecord> FileRecordMap;
219 
220  private:
221  std::vector<std::string> filename_templates_;
222  CriticalMap filenames_;
223  critical_resource<int> count_;
224  critical_resource<size_t> current_size_, max_size_;
225  };
226 }
227 
228 #endif
void load(BinaryBuffer &bb, T &x)
Loads x from bb by calling diy::Serialization<T>::load(bb,x).
Definition: serialization.hpp:106
virtual void save_binary(const char *x, size_t count)=0
copy count bytes from x into the buffer
Definition: storage.hpp:41
void save(BinaryBuffer &bb, const T &x)
Saves x to bb by calling diy::Serialization<T>::save(bb,x).
Definition: serialization.hpp:102
Definition: critical-resource.hpp:15
Definition: serialization.hpp:26
virtual void load_binary(char *x, size_t count)=0
copy count bytes into x from the buffer
Definition: storage.hpp:51
virtual void load_binary_back(char *x, size_t count)=0
copy count bytes into x from the back of the buffer
Definition: critical-resource.hpp:32