Mstdlib-1.24.0
m_bit_parser.h
1/* The MIT License (MIT)
2 *
3 * Copyright (c) 2017 Monetra Technologies, LLC.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a copy
6 * of this software and associated documentation files (the "Software"), to deal
7 * in the Software without restriction, including without limitation the rights
8 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 * copies of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 * THE SOFTWARE.
22 */
23
24#ifndef __M_BIT_PARSER_H__
25#define __M_BIT_PARSER_H__
26
27/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
28
29#include <mstdlib/base/m_defs.h>
30#include <mstdlib/base/m_types.h>
31#include <mstdlib/base/m_buf.h>
32
33/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
34
35__BEGIN_DECLS
36
37/*! \addtogroup m_bit_parser Bitwise Data Parser
38 * \ingroup mstdlib_base
39 *
40 * Buffer based data parser that reads data per-bit, instead of per-byte.
41 *
42 * Example (creating a parser, reading some bits):
43 *
44 * \code{.c}
45 * M_bit_parser_t *bparser;
46 * const M_uint8 bytes[] = {0x70, 0x3F};
47 * size_t nbits = 12;
48 *
49 * M_uint8 bit;
50 * char *str;
51 * size_t nbits_in_range;
52 *
53 * bparser = M_bit_parser_create_const(bytes, nbits);
54 *
55 * M_bit_parser_read_bit(bparser, &bit); // bit == 0
56 *
57 * str == M_bit_parser_read_strdup(bparser, 5); // str == "11100"
58 * M_free(str);
59 *
60 * M_bit_parser_read_range(bparser, &bit, &nbits_in_range, M_bit_parser_len(bparser));
61 * // bit == 0
62 * // nbits_in_range == 6
63 *
64 * M_bit_parser_destroy(bparser);
65 * \endcode
66 *
67 * @{
68 */
69
70struct M_bit_parser;
71typedef struct M_bit_parser M_bit_parser_t;
72
73
74/*! Signed integer formats understood by bit parser.
75 *
76 * In-depth description of these formats can be found at <https://en.wikipedia.org/wiki/Signed_number_representations>.
77 *
78 * \see M_bit_parser_read_int
79 */
80typedef enum {
81 M_BIT_PARSER_SIGN_MAG = 0, /*!< Signed magnitude format (first bit is sign, rest of bits are magnitude) */
82 M_BIT_PARSER_ONES_COMP = 1, /*!< One's complement */
83 M_BIT_PARSER_TWOS_COMP = 2 /*!< Two's complement */
85
86
87/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
88
89/*! Create a bit parser over the given data (copies input data).
90 *
91 * The parser will copy the data internally, so after this function is called, the caller's copy of the
92 * data may be copied or freed without affecting the parser.
93 *
94 * If your data isn't going to change, you can use M_bit_parser_create_const() instead to avoid duplicating
95 * the data.
96 *
97 * \param[in] bytes data to parse bitwise
98 * \param[in] nbits number of bits in data
99 * \return a new parser object over the given data
100 *
101 * \see M_bit_parser_reset
102 * \see M_bit_parser_destroy
103 */
104M_API M_bit_parser_t *M_bit_parser_create(const void *bytes, size_t nbits) M_WARN_UNUSED_RESULT M_MALLOC;
105
106
107/*! Create a bit parser over the given data (assumes given data won't change).
108 *
109 * Assumes the given data pointer won't be modified until after you're done with the parser.
110 *
111 * \warning
112 * Violating this assumption can lead to undefined behavior (including program crashes).
113 *
114 * \param[in] bytes data to parse bitwise
115 * \param[in] nbits number of bits in data
116 * \return a new parser object over the given data
117 *
118 * \see M_bit_parser_reset
119 * \see M_bit_parser_destroy
120 */
121M_API M_bit_parser_t *M_bit_parser_create_const(const void *bytes, size_t nbits) M_WARN_UNUSED_RESULT M_MALLOC;
122
123
124/*! Append data to a bit parser object.
125 *
126 * If you append data to a parser that was created with M_bit_parser_create_const(), the const data will
127 * be copied into internal storage before the append.
128 *
129 * \param[in] bparser bit parser object
130 * \param[in] bytes bytes to read from
131 * \param[in] nbits number of bits to append from byte array
132 */
133M_API void M_bit_parser_append(M_bit_parser_t *bparser, const void *bytes, size_t nbits);
134
135
136/*! Append bits from a given integer to a bit parser object.
137 *
138 * Note that the bit region being read is assumed to be justified against the least-significant end of the
139 * integer, though the bits within that region are read from most-significant to least-significant.
140 *
141 * For example, if bits == 0x8B == (10001011)b, and nbits == 4, the bits "1011" will be added to the buffer.
142 *
143 * \param[in] bparser bit parser object
144 * \param[in] bits value to draw bits from
145 * \param[in] nbits number of bits to use (counted from least-significant end, right-to-left)
146 */
147M_API void M_bit_parser_append_uint(M_bit_parser_t *bparser, M_uint64 bits, size_t nbits);
148
149
150/*! Append bits from a given binary-ascii string to the buffer.
151 *
152 * A binary-ascii string is a list of 1 and 0 characters (e.g., "100010").
153 *
154 * Any whitespace in the string will be silently ignored. So, " 1000 1 0" will add the same data as "100010".
155 *
156 * \param[in] bparser bit parser object
157 * \param[in] bitstr string to draw bits from
158 * \return M_FALSE on error (given bitstr had characters other than '0', '1' or whitespace)
159 */
160M_API M_bool M_bit_parser_append_bitstr(M_bit_parser_t *bparser, const char *bitstr);
161
162
163/*! Reset parser to use new data (copies input data).
164 *
165 * Parser state (including any mark) is reset to initial values. Any data that was in the parser before this
166 * call is dropped.
167 *
168 * The new data is copied into the parser, so the caller's copy of the data may be modified or freed
169 * after this call without affecting the parser.
170 *
171 * \param[in] bparser bit parser object
172 * \param[in] bytes bytes to read from
173 * \param[in] nbits number of bits to read out of input bytes
174 */
175M_API void M_bit_parser_reset(M_bit_parser_t *bparser, const void *bytes, size_t nbits);
176
177
178/*! Destroy the bit parser object.
179 *
180 * \param[in] bparser bit parser object
181 */
182M_API void M_bit_parser_destroy(M_bit_parser_t *bparser) M_FREE(1);
183
184
185/*! Returns the number of bits left to read in the parser.
186 *
187 * \param[in] bparser bit parser object
188 * \return number of bits left that haven't been read yet
189 */
190M_API size_t M_bit_parser_len(const M_bit_parser_t *bparser);
191
192
193/*! Retrieve the current position of the parser (number of bits read).
194 *
195 * \param[in] bparser bit parser object
196 * \return current parser position, relative to start of data (in bits)
197 */
198M_API size_t M_bit_parser_current_offset(const M_bit_parser_t *bparser);
199
200
201/*! Count the number of bits left in the parser that have the given value.
202 *
203 * If the remaining bits in the parser are "10011110", calling this function
204 * with bit=0 will return 3, and calling it with bit=1 will return 5.
205 *
206 * \param[in] bparser bit parser object
207 * \param[in] bit bit value to count
208 * \return number of bits left in parser with the given value
209 */
210M_API size_t M_bit_parser_count(const M_bit_parser_t *bparser, M_uint8 bit);
211
212
213/*! Rewind parser (and any mark) back to start of data.
214 *
215 * \param[in] bparser bit parser object
216 *
217 * \see M_bit_parser_mark
218 * \see M_bit_parser_mark_rewind
219 */
221
222
223/*! Mark the current position in the stream, so we can return to it later.
224 *
225 * \param[in] bparser bit parser object
226 *
227 * \see M_bit_parser_mark_len
228 * \see M_bit_parser_mark_rewind
229 */
231
232
233/*! Return the number of bits from a mark to the current parser position.
234 *
235 * If no mark has been set, returns the number of bits from the start of the data.
236 *
237 * For example, if I set a mark, read 3 bits, and then call this function, it'll return 3.
238 *
239 * \param[in] bparser bit parser object
240 * \return number of bits read/consumed from the point where we last marked the bitstream
241 *
242 * \see M_bit_parser_mark
243 */
244M_API size_t M_bit_parser_mark_len(const M_bit_parser_t *bparser);
245
246
247/*! Rewind parser back to the marked position.
248 *
249 * This will not clear the mark - you can read and then return to a marked position multiple times.
250 *
251 * If no mark has been set, this will rewind all the way back to the beginning of the stream.
252 *
253 * \param[in] bparser bit parser object
254 * \return number of bits we rewound the stream
255 *
256 * \see M_bit_parser_rewind_to_start
257 * \see M_bit_parser_mark
258 */
260
261
262/*! Skip past the given number of bits.
263 *
264 * \param[in] bparser bit parser object
265 * \param[in] nbits number of bits to consume
266 * \return M_TRUE on success, M_FALSE if not enough bits left
267 */
268M_API M_bool M_bit_parser_consume(M_bit_parser_t *bparser, size_t nbits);
269
270
271/*! Read a single bit at the parser's current position without advancing.
272 *
273 * \param[in] bparser bit parser object
274 * \param[out] bit 0 or 1
275 * \return M_TRUE on success, M_FALSE if there are no bits left to read
276 */
277M_API M_bool M_bit_parser_peek_bit(const M_bit_parser_t *bparser, M_uint8 *bit);
278
279
280/*! Read a single bit at the parser's current position and advance.
281 *
282 * \param[in] bparser bit parser object
283 * \param[out] bit 0 or 1
284 * \return M_TRUE on success, M_FALSE if there are no bits left to read
285 */
286M_API M_bool M_bit_parser_read_bit(M_bit_parser_t *bparser, M_uint8 *bit);
287
288
289/*! Read multiple bits and add them to the end of the given bit buffer.
290 *
291 * \param[in] bparser bit parser to read bits from
292 * \param[in,out] bbuf bit buffer to store bits in
293 * \param[in] nbits number of bits to read
294 * \return M_TRUE on success, M_FALSE if there aren't enough bits left
295 */
296M_API M_bool M_bit_parser_read_bit_buf(M_bit_parser_t *bparser, M_bit_buf_t *bbuf, size_t nbits);
297
298
299/*! Read multiple bits, zero-pad to byte boundary, then add them to the given buffer.
300 *
301 * Padding is only added as-needed to the last byte that gets added to the buffer. Every byte
302 * before that is packed with the bits we're reading.
303 *
304 * For example, if we add the bits "1010 1010 1100 01" using this function, two bytes are added to
305 * the buffer: "1010 1010 1100 0100" (two padding zeros on end).
306 *
307 *
308 * \param[in] bparser bit parser to read bits from
309 * \param[in,out] buf buffer to store bytes in
310 * \param[in] nbits number of bits to read
311 * \return M_TRUE on success, M_FALSE if there aren't enough bits left
312 */
313M_API M_bool M_bit_parser_read_buf(M_bit_parser_t *bparser, M_buf_t *buf, size_t nbits);
314
315
316/*! Read multiple bits, zero-pad to byte boundary, then add them to the given array.
317 *
318 * Padding is only added as-needed to the last byte that gets added to the buffer. Every byte
319 * before that is packed with the bits we're reading.
320 *
321 * For example, if we add the bits "1010 1010 1100 01" using this function, two bytes are added to
322 * the buffer: "1010 1010 1100 0100" (two padding zeros on end).
323 *
324 *
325 * \param[in] bparser bit parser to read bits from
326 * \param[in] dest array to store bytes in
327 * \param[in,out] destlen length of \a dest in bytes. Before return, set to number of bytes written.
328 * \param[in] nbits number of bits to read
329 * \return M_TRUE on success, M_FALSE if there aren't enough bits left
330 */
331M_API M_bool M_bit_parser_read_bytes(M_bit_parser_t *bparser, M_uint8 *dest, size_t *destlen, size_t nbits);
332
333
334/*! Read multiple bits, then return them as a bit string.
335 *
336 * A bit string is just a list of '0' and '1' characters (e.g., "100101").
337 *
338 * \warning
339 * The caller assumes ownership of returned string, and must free it with M_free().
340 *
341 * \param[in] bparser bit parser to read bits from
342 * \param[in] nbits number of bits to read
343 * \return bitstring on success, NULL if there aren't enough bits left
344 * \see M_free
345 */
346M_API char *M_bit_parser_read_strdup(M_bit_parser_t *bparser, size_t nbits);
347
348
349/*! Read multiple bits, intepret as big-endian unsigned integer.
350 *
351 * The bits are interpreted as a single big-endian unsigned integer, then the integer
352 * value is stored in \a res.
353 *
354 * For example, if a bit parser contains '11100', you would see the following in num:
355 * \li M_bit_parser_read_uint(bparser, 3, &num) --> num == 7 (b111)
356 * \li M_bit_parser_read_uint(bparser, 4, &num) --> num == 14 (b1110)
357 * \li M_bit_parser_read_uint(bparser, 5, &num) --> num == 28 (b11100)
358 *
359 * \param[in] bparser bit parser to read bits from
360 * \param[in] nbits number of bits to read (must be >= 1 and <= 64)
361 * \param[out] res read bits, converted to an unsigned integer
362 * \return M_TRUE on success, M_FALSE on failure
363 */
364M_API M_bool M_bit_parser_read_uint(M_bit_parser_t *bparser, size_t nbits, M_uint64 *res);
365
366
367/*! Read multiple bits, interpret as a signed integer.
368 *
369 * The bits are interpreted as a single big-endian signed integer, using the specified
370 * signed integer format.
371 *
372 * \param[in] bparser bit parser to read bits from
373 * \param[in] nbits number of bits to read (must be >= 2 and <= 64)
374 * \param[in] fmt signed integer format of the bits we're reading
375 * \param[out] res read bits, converted to a native signed integer
376 * \return M_TRUE on success, M_FALSE on failure
377 */
378M_API M_bool M_bit_parser_read_int(M_bit_parser_t *bparser, size_t nbits, M_bit_parser_int_format_t fmt, M_int64 *res);
379
380
381/*! Read bits until we hit a bit different than the current one.
382 *
383 * For example, if the parser contain "11100001", calling this function will move the parser's position
384 * to the first \a 0, and return \a 1 in \a bit and \a 3 in \a nbits_in_range
385 *
386 * Note that this function will always read at least one bit, if any bits are left to read.
387 *
388 * \param[in] bparser bit parser to read bits from
389 * \param[out] bit bit value in range we just read (0 or 1)
390 * \param[out] nbits_in_range number of bits in range we just read
391 * \param[in] max_bits maximum number of bits to read (if set to zero, no bits will be read)
392 * \return M_TRUE if at least one bit was read, M_FALSE if no bits are left or \a max_bits was zero
393 */
394M_API M_bool M_bit_parser_read_range(M_bit_parser_t *bparser, M_uint8 *bit, size_t *nbits_in_range, size_t max_bits);
395
396
397/*! Skip bits until we hit a bit different than the current one.
398 *
399 * For example, if the parser contains "11100001", calling this function will move the parser's position
400 * to the first \a 0.
401 *
402 * Note that this function will always consume at least one bit, if any bits are left to skip.
403 *
404 * \param[in] bparser bit parser to read bits from
405 * \param[in] max_bits maximum number of bits to skip (if set to zero, no bits will be skipped)
406 * \return M_TRUE if at least one bit was skipped, M_FALSE if no bits are left or \a max_bits was zero
407 */
408M_API M_bool M_bit_parser_consume_range(M_bit_parser_t *bparser, size_t max_bits);
409
410
411/*! Consume bits up to and including the next bit with the given value.
412 *
413 * Usage example:
414 * \code{.c}
415 * M_bit_parser_t *bparser;
416 * const M_uint8 bytes[] = {0x86, 0x00};
417 *
418 * bparser = M_bit_parser_create_const(bytes, 10);
419 * // bparser contains: "1000011000"
420 *
421 * // Now, let's say we want to print the index of every set bit.
422 * while (M_bit_parser_consume_to_next(bparser, 1, M_bit_parser_len(bparser)) {
423 * M_printf("set bit: %zu\n", M_bit_parser_current_offset(bparser) - 1);
424 * }
425 *
426 * // Loop will print:
427 * // set bit: 0
428 * // set bit: 5
429 * // set bit: 6
430 * // After loop, bparser will be empty.
431 * \endcode
432 *
433 * \param[in] bparser bit parser to read bits from
434 * \param[in] bit bit value that we're looking for
435 * \param[in] max_bits maximum number of bits to consume (if set to zero, no bits will be consumed)
436 * \return M_TRUE if we found and consumed a matching bit, M_FALSE otherwise.
437 */
438M_API M_bool M_bit_parser_consume_to_next(M_bit_parser_t *bparser, M_uint8 bit, size_t max_bits);
439
440/*! @} */
441
442__END_DECLS
443
444#endif /* __M_BIT_PARSER_H__ */
struct M_bit_buf M_bit_buf_t
Definition: m_bit_buf.h:72
void M_bit_parser_append(M_bit_parser_t *bparser, const void *bytes, size_t nbits)
void M_bit_parser_reset(M_bit_parser_t *bparser, const void *bytes, size_t nbits)
M_bool M_bit_parser_read_int(M_bit_parser_t *bparser, size_t nbits, M_bit_parser_int_format_t fmt, M_int64 *res)
void M_bit_parser_destroy(M_bit_parser_t *bparser) M_FREE(1)
size_t M_bit_parser_mark_len(const M_bit_parser_t *bparser)
size_t M_bit_parser_mark_rewind(M_bit_parser_t *bparser)
M_bool M_bit_parser_consume(M_bit_parser_t *bparser, size_t nbits)
M_bool M_bit_parser_read_bit_buf(M_bit_parser_t *bparser, M_bit_buf_t *bbuf, size_t nbits)
M_bit_parser_int_format_t
Definition: m_bit_parser.h:80
M_bit_parser_t * M_bit_parser_create_const(const void *bytes, size_t nbits) M_WARN_UNUSED_RESULT M_MALLOC
M_bool M_bit_parser_consume_range(M_bit_parser_t *bparser, size_t max_bits)
M_bool M_bit_parser_read_range(M_bit_parser_t *bparser, M_uint8 *bit, size_t *nbits_in_range, size_t max_bits)
M_bool M_bit_parser_append_bitstr(M_bit_parser_t *bparser, const char *bitstr)
M_bool M_bit_parser_read_bit(M_bit_parser_t *bparser, M_uint8 *bit)
M_bool M_bit_parser_read_bytes(M_bit_parser_t *bparser, M_uint8 *dest, size_t *destlen, size_t nbits)
void M_bit_parser_rewind_to_start(M_bit_parser_t *bparser)
M_bool M_bit_parser_consume_to_next(M_bit_parser_t *bparser, M_uint8 bit, size_t max_bits)
size_t M_bit_parser_current_offset(const M_bit_parser_t *bparser)
struct M_bit_parser M_bit_parser_t
Definition: m_bit_parser.h:71
char * M_bit_parser_read_strdup(M_bit_parser_t *bparser, size_t nbits)
M_bool M_bit_parser_peek_bit(const M_bit_parser_t *bparser, M_uint8 *bit)
size_t M_bit_parser_count(const M_bit_parser_t *bparser, M_uint8 bit)
M_bool M_bit_parser_read_buf(M_bit_parser_t *bparser, M_buf_t *buf, size_t nbits)
void M_bit_parser_mark(M_bit_parser_t *bparser)
void M_bit_parser_append_uint(M_bit_parser_t *bparser, M_uint64 bits, size_t nbits)
M_bool M_bit_parser_read_uint(M_bit_parser_t *bparser, size_t nbits, M_uint64 *res)
size_t M_bit_parser_len(const M_bit_parser_t *bparser)
M_bit_parser_t * M_bit_parser_create(const void *bytes, size_t nbits) M_WARN_UNUSED_RESULT M_MALLOC
@ M_BIT_PARSER_ONES_COMP
Definition: m_bit_parser.h:82
@ M_BIT_PARSER_TWOS_COMP
Definition: m_bit_parser.h:83
@ M_BIT_PARSER_SIGN_MAG
Definition: m_bit_parser.h:81
struct M_buf M_buf_t
Definition: m_buf.h:77