Orion
high-rate readout
bintree.hpp
1 #pragma once
2 
3 #include <array>
4 #include <cstdint>
5 #include <memory>
6 
7 namespace itk::itkpix::endec::codec {
8 
9 namespace detail {
10 template <bool USE_LUT = true>
12  public:
13  BinaryTreeLUT() = default;
14 
15  ~BinaryTreeLUT() = default;
16 
17  static inline constexpr uint32_t LUT_LEN = 16;
18 
19  static inline constexpr uint32_t encode(uint32_t decoded, uint32_t &encoded) {
20  if constexpr (USE_LUT) {
21  uint64_t _encoded = lut_encode[decoded & 0xffff];
22  encoded = _encoded & 0xffffffff;
23  return _encoded >> 32;
24  } else
25  return encode_(decoded, encoded);
26  }
27 
28  [[gnu::always_inline]] static inline constexpr uint32_t decode(uint32_t encoded, uint32_t &decoded) {
29  if constexpr (USE_LUT) {
30  auto _encoded = encoded & 0x3fffffff;
31  auto [_decoded, _offset, _length] = lut_top[_encoded >> (30 - LUT_LEN)];
32  uint32_t hitmap = _decoded;
33  decoded = hitmap;
34  if (_offset == 0xff) return _length;
35  _encoded = (_encoded >> _offset) & 0x3fff;
36  auto [_hitmap, length2] = lut_bottom[_encoded];
37  decoded &= 0xff;
38  decoded |= uint16_t(_hitmap) << 8;
39  return _length + length2;
40  } else
41  return decode_(encoded, decoded);
42  }
43  private:
44  using HitLutEntry = std::pair<uint8_t,uint8_t>;
45  using HitLutRow = std::array<HitLutEntry, 16>;
46  using HitLutType = std::array<HitLutRow, 1<<16>;
47  static inline constexpr auto create_hit_lut_length_() {
48  std::array<uint8_t, 1<<16> result{};
49  for(uint32_t i = 0; i < (1<<16); i++)
50  result[i] = std::popcount(i);
51  return result;
52  }
53  static inline constexpr auto create_hit_lut_() {
54  HitLutType result{};
55  for (uint32_t hitmap = 0; hitmap < (1 << 16); ++hitmap) {
56  int index{};
57  for (uint8_t bit = 0; bit < 16; ++bit) {
58  if((hitmap & (1<< bit)) == 0) continue;
59  result[hitmap][index++] = {bit % 8, bit / 8};
60  }
61  }
62  return result;
63  }
64  public:
65  static inline HitLutType hit_lut {create_hit_lut_()};
66  static inline std::array<uint8_t, (1<<16)> hit_lut_length{create_hit_lut_length_()};
67  private:
68  using LUT1_entry = std::tuple<uint16_t, uint8_t, uint8_t>;
69  using LUT2_entry = std::tuple<uint8_t, uint8_t>;
70  using LUT1_type = std::conditional_t<USE_LUT, std::array<LUT1_entry, 1 << LUT_LEN>, void *>;
71  using LUT2_type = std::conditional_t<USE_LUT, std::array<LUT2_entry, 1 << (30 - LUT_LEN)>, void *>;
72  using LUT_ENCODE_type = std::conditional_t<USE_LUT, std::array<uint64_t, (1 << 16)>, void *>;
73 
74  static inline constexpr auto create_lut_top_() {
75  LUT1_type lut;
76  for (uint32_t i = 0; i < (1 << LUT_LEN); i++) {
77  uint32_t decoded{};
78  auto length = decode_(i << (30 - LUT_LEN), decoded);
79  uint32_t offset = 0xff;
80  if (length > 16) {
81  uint32_t shift = decoded & 0xff;
82  uint32_t encoded;
83  length = encode_(shift, encoded);
84  offset = 16 - length;
85  }
86  lut[i] = {decoded & 0xffff, offset & 0xff, length & 0xff};
87  }
88  return lut;
89  };
90 
91  public:
92  static inline LUT1_type lut_top{create_lut_top_()};
93  private:
94  static inline constexpr auto create_lut_bottom_() {
95  LUT2_type lut;
96  for (uint32_t i = 0; i < 1 << (30 - LUT_LEN); i++) {
97  uint32_t decoded{};
98  auto length = decode_bottom_(i << LUT_LEN, decoded);
99  lut[i] = {decoded >> 8, length};
100  }
101  return lut;
102  }
103  public:
104  static inline LUT2_type lut_bottom{create_lut_bottom_()};
105  private:
106  static inline constexpr auto create_lut_encode_() {
107  LUT_ENCODE_type lut;
108  lut[0] = 0;
109  for (uint32_t i = 1; i < 1 << 16; i++) {
110  uint32_t encoded;
111  uint64_t len = encode_(i, encoded);
112  lut[i] = encoded | (len << 32);
113  }
114  return lut;
115  }
116 
117  static inline LUT_ENCODE_type lut_encode{create_lut_encode_()};
118 
119  static inline constexpr uint32_t encode_(uint32_t decoded, uint32_t &encoded) {
120  uint32_t b[8];
121  for (int i = 0; i < 8; i++) {
122  b[i] = ((decoded >> (2 * i)) & 0x1) << 1 | ((decoded >> (2 * i + 1)) & 0x1);
123  }
124  auto one_bit = [](uint32_t value) -> uint32_t { return value != 0 ? 1 : 0; };
125  uint32_t S1 = (one_bit(b[0] | b[1] | b[2] | b[3]) << 1) | one_bit(b[4] | b[5] | b[6] | b[7]);
126  uint32_t S2t = (one_bit(b[0] | b[1]) << 1) | one_bit(b[2] | b[3]);
127  uint32_t S2b = (one_bit(b[4] | b[5]) << 1) | one_bit(b[6] | b[7]);
128  uint32_t S3tl = (one_bit(b[0]) << 1) | one_bit(b[1]);
129  uint32_t S3tr = (one_bit(b[2]) << 1) | one_bit(b[3]);
130  uint32_t S3bl = (one_bit(b[4]) << 1) | one_bit(b[5]);
131  uint32_t S3br = (one_bit(b[6]) << 1) | one_bit(b[7]);
132 
133  uint32_t pos = 0;
134  encoded = 0;
135 
136  auto writeTwo = [&](uint32_t src) {
137  if (src == 0b01) {
138  encoded |= (0b0) << (28 - pos);
139  pos++;
140  } else {
141  encoded |= (src & 0x3) << (28 - pos);
142  pos += 2;
143  }
144  };
145  for (auto val : {S1, S2t, S3tl, S3tr, b[0], b[1], b[2], b[3], S2b, S3bl, S3br, b[4], b[5], b[6], b[7]})
146  if (val) writeTwo(val);
147  return pos;
148  }
149 
150  [[gnu::always_inline]] static constexpr uint32_t decode_(uint32_t encoded, uint32_t &decoded) {
151  return decode_impl_<true>(encoded, decoded);
152  }
153 
154  template <bool DECODE_ALL = true>
155  [[gnu::always_inline]] static constexpr uint32_t decode_impl_(uint32_t encoded, uint32_t &decoded) {
156  uint32_t pos{};
157  uint32_t S1{};
158  uint32_t S2t{};
159  uint32_t S3tl{};
160  uint32_t S3tr{};
161  uint32_t S2b{};
162  uint32_t S3bl{};
163  uint32_t S3br{};
164  uint32_t b[8]{};
165 
166  auto readTwo = [&](uint32_t &dst) {
167  uint32_t val = (encoded >> (28 - pos)) & 0x3;
168  if (val == 0b00 or val == 0b01) {
169  dst = 0b01;
170  pos++;
171  } else {
172  dst = val;
173  pos += 2;
174  }
175  };
176 
177  auto read = [&](uint32_t branch, uint32_t index) {
178  auto &left = b[index];
179  auto &right = b[index + 1];
180  switch (branch & 3) {
181  case 0b10:
182  readTwo(left);
183  break;
184  case 0b01:
185  readTwo(right);
186  break;
187  case 0b11:
188  readTwo(left);
189  readTwo(right);
190  break;
191  }
192  };
193 
194  auto read_bottom = [&]() {
195  readTwo(S2b);
196  switch (S2b & 3) {
197  case 0b10:
198  readTwo(S3bl);
199  read(S3bl, 4);
200  break;
201  case 0b01:
202  readTwo(S3br);
203  read(S3br, 6);
204  break;
205  case 0b11:
206  readTwo(S3bl);
207  readTwo(S3br);
208  read(S3bl, 4);
209  read(S3br, 6);
210  break;
211  }
212  };
213 
214  auto read_all = [&] {
215  readTwo(S1);
216  switch (S1 & 3) {
217  case 0b10:
218  readTwo(S2t);
219  switch (S2t & 3) {
220  case 0b10:
221  readTwo(S3tl);
222  read(S3tl, 0);
223  break;
224  case 0b01:
225  readTwo(S3tr);
226  read(S3tr, 2);
227  break;
228  case 0b11:
229  readTwo(S3tl);
230  readTwo(S3tr);
231  read(S3tl, 0);
232  read(S3tr, 2);
233  break;
234  }
235  break;
236  case 0b01:
237  read_bottom();
238  break;
239  case 0b11:
240  readTwo(S2t);
241  switch (S2t & 3) {
242  case 0b10:
243  readTwo(S3tl);
244  read(S3tl, 0);
245  break;
246  case 0b01:
247  readTwo(S3tr);
248  read(S3tr, 2);
249  break;
250  case 0b11:
251  readTwo(S3tl);
252  readTwo(S3tr);
253  read(S3tl, 0);
254  read(S3tr, 2);
255  break;
256  }
257  read_bottom();
258  }
259  };
260 
261  if constexpr (DECODE_ALL)
262  read_all();
263  else
264  read_bottom();
265 
266  decoded = 0;
267  for (int i = 0; i < 8; i++) {
268  decoded |= ((b[i] >> 1) & 0x1) << (i * 2);
269  decoded |= ((b[i] >> 0) & 0x1) << (i * 2 + 1);
270  }
271  return pos;
272  }
273 
274  static inline constexpr uint32_t decode_bottom_(uint32_t encoded, uint32_t &decoded) {
275  return decode_impl_<false>(encoded, decoded);
276  }
277 };
278 } // namespace detail
281 
282 } // itk::itkpix::endec::codec