Mstdlib-1.24.0
m_csv.h
1/* The MIT License (MIT)
2 *
3 * Copyright (c) 2015 Monetra Technologies, LLC.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a copy
6 * of this software and associated documentation files (the "Software"), to deal
7 * in the Software without restriction, including without limitation the rights
8 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 * copies of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 * THE SOFTWARE.
22 */
23
24#ifndef __M_CSV_H__
25#define __M_CSV_H__
26
27/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
28
29#include <mstdlib/base/m_defs.h>
30#include <mstdlib/base/m_types.h>
31#include <mstdlib/base/m_buf.h>
32#include <mstdlib/base/m_list_str.h>
33
34/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
35
36__BEGIN_DECLS
37
38/*! \addtogroup m_csv CSV
39 * \ingroup m_formats
40 * CSV Parser.
41 *
42 * RFC 4180 compliant CSV parser.
43 *
44 * The first row in the CSV is assumed to be the header. If there is no
45 * header the *raw* functions should be used to reterive data. If there
46 * is a header the non-raw functions should be used. These functions
47 * take into account the header when indexing rows automatically. The
48 * first row after the header is index 0.
49 *
50 * Example:
51 *
52 * \code{.c}
53 * const char *data = "header1,header1\ncell1,cell2"
54 * M_csv_t *csv;
55 * const char *const_temp;
56 *
57 * csv = M_csv_parse(data, M_str_len(data), ',', '"', M_CSV_FLAG_NONE);
58 * const_temp = M_csv_get_header(csv, 0);
59 * M_printf("header='%s'\n", const_temp);
60 *
61 * const_temp = M_csv_get_cellbynum(csv, 0, 1);
62 * M_printf("cell='%s'\n", const_temp);
63 *
64 * M_csv_destroy(csv);
65 * \endcode
66 *
67 * Example output:
68 *
69 * \code
70 * header='header1'
71 * cell='cell2'
72 * \endcode
73 *
74 * @{
75 */
76
77struct M_csv;
78typedef struct M_csv M_csv_t;
79
80/*! Flags controlling parse behavior */
82 M_CSV_FLAG_NONE = 0, /*!< No Flags */
83 M_CSV_FLAG_TRIM_WHITESPACE = 1 << 0 /*!< If a cell is not quoted, trim leading and trailing whitespace */
84};
85
86/*! Callback that can be used to filter rows from data returned by M_csv_output_rows_buf().
87 *
88 * \param[in] csv the csv being output.
89 * \param[in] row the idx of the current row being considered (NOT raw - 0 is the first row after the header).
90 * \param[in] thunk pointer to thunk object passed into M_csv_output_rows_buf() by caller.
91 * \return M_TRUE, if the row should be included in output. M_FALSE otherwise.
92 */
93typedef M_bool (*M_csv_row_filter_cb)(const M_csv_t *csv, size_t row, void *thunk);
94
95
96/*! Callback that can be used to edit data from certain columns as it's written out.
97 *
98 * \param[in] buf buffer to write new version of cell data to.
99 * \param[in] cell original cell data (may be empty/NULL, if cell was empty)
100 * \param[in] header header of column this cell came from
101 * \param[in] thunk pointer to thunk object passed into M_csv_output_rows_buf() by caller.
102 * \return M_TRUE if we added a modified value to buf. M_FALSE if value was OK as-is.
103 */
104typedef M_bool (*M_csv_cell_writer_cb)(M_buf_t *buf, const char *cell, const char *header, void *thunk);
105
106
107/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
108
109/*! Parse a string into a CSV object.
110 *
111 * \param[in] data The data to parse.
112 * \param[in] len The length of the data to parse.
113 * \param[in] delim CSV delimiter character. Typically comma (",").
114 * \param[in] quote CSV quote character. Typically double quote (""").
115 * \param[in] flags Flags controlling parse behavior.
116 *
117 * \return CSV object.
118 *
119 * \see M_csv_destroy
120 */
121M_API M_csv_t *M_csv_parse(const char *data, size_t len, char delim, char quote, M_uint32 flags) M_MALLOC;
122
123
124/*! Parse a string into a CSV object, using given column headers.
125 *
126 * Same as M_csv_parse, but add the given headers as the first row before parsing the data into the table.
127 *
128 * \param[in] data The data to parse.
129 * \param[in] len The length of data to parse.
130 * \param[in] delim CSV delimiter character. Typically comma (',').
131 * \param[in] quote CSV quote character. Typically double quote ('"').
132 * \param[in] flags Flags controlling parse behavior.
133 * \param[in] headers List of headers to add as first row of table.
134 *
135 * \return CSV object
136 */
137M_API M_csv_t *M_csv_parse_add_headers(const char *data, size_t len, char delim, char quote, M_uint32 flags,
138 M_list_str_t *headers);
139
140
141/*! Parse a string into a CSV object.
142 *
143 * This will take ownership of the data passed in. The data must be valid for the life of the
144 * returned CSV object and will be destroyed by the CSV object when the CSV object is destroyed.
145 *
146 * \param[in] data The string to parse.
147 * \param[in] len The length of the data to parse.
148 * \param[in] delim CSV delimiter character. Typically comma (",").
149 * \param[in] quote CSV quote character. Typically double quote (""").
150 * \param[in] flags Flags controlling parse behavior.
151 *
152 * \return CSV object.
153 *
154 * \see M_csv_destroy
155 */
156M_API M_csv_t *M_csv_parse_inplace(char *data, size_t len, char delim, char quote, M_uint32 flags) M_MALLOC_ALIASED;
157
158
159/*! Destory a CSV object.
160 *
161 * \param[in] csv The csv.
162 */
163M_API void M_csv_destroy(M_csv_t *csv) M_FREE(1);
164
165
166/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
167 * Raw getters if no headers used */
168
169/*! Get the raw number of csv rows.
170 *
171 * This should be used when the CSV data does not contain a header.
172 * This count will include the header as a row in the count.
173 *
174 * \param[in] csv The csv.
175 *
176 * \return The number of rows including the header as a row.
177 *
178 * \see M_csv_get_numrows
179 */
180M_API size_t M_csv_raw_num_rows(const M_csv_t *csv);
181
182
183/*! Get the raw number of csv columns.
184 *
185 * This should be used when the CSV data does not contain a header.
186 *
187 * \param[in] csv The csv.
188 *
189 * \return The number of columns.
190 *
191 * \see M_csv_get_numcols
192 */
193M_API size_t M_csv_raw_num_cols(const M_csv_t *csv);
194
195
196/*! Get the cell at the given position.
197 *
198 * This should be used when the CSV data does not contain a header.
199 * This assumes that the first row is data (not the header).
200 *
201 * \param[in] csv The csv.
202 * \param[in] row The row. Indexed from 0 where 0 is the header (if there is a header).
203 * \param[in] col The column. Indexed from 0.
204 *
205 * \return The csv data at the position or NULL if the position if invalid.
206 *
207 * \see M_csv_get_cellbynum
208 */
209M_API const char *M_csv_raw_cell(const M_csv_t *csv, size_t row, size_t col);
210
211
212/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
213 * Getters if headers used (default) */
214
215/*! Get the number of csv rows.
216 *
217 * This should be used when the CSV data contains a header.
218 * This count will not include the header as a row in the count.
219 *
220 * \param[in] csv The csv.
221 *
222 * \return The number of rows excluding the header as a row.
223 *
224 * \see M_csv_raw_num_rows
225 */
226M_API size_t M_csv_get_numrows(const M_csv_t *csv);
227
228
229/*! Get the raw number of csv columns.
230 *
231 * This should be used when the CSV data contains a header.
232 *
233 * \param[in] csv The csv.
234 *
235 * \return The number of columns.
236 *
237 * \see M_csv_raw_num_cols
238 */
239M_API size_t M_csv_get_numcols(const M_csv_t *csv);
240
241
242/*! Get the cell at the given position.
243 *
244 * This should be used when the CSV data contains a header.
245 * This assumes that the first row is a header (not data).
246 *
247 * \param[in] csv The csv.
248 * \param[in] row The row. Indexed from 0 where 0 is the first row after the header.
249 * \param[in] col The column. Indexed from 0.
250 *
251 * \return The csv data at the position or NULL if the position if invalid.
252 *
253 * \see M_csv_raw_cell
254 */
255M_API const char *M_csv_get_cellbynum(const M_csv_t *csv, size_t row, size_t col);
256
257
258/*! Get the header for a given column
259 *
260 * This should be used when the CSV data contains a header.
261 * This assumes that the first row is a header (not data).
262 *
263 * \param[in] csv The csv.
264 * \param[in] col The column. Indexed from 0.
265 *
266 * \return The header for the given column.
267 */
268M_API const char *M_csv_get_header(const M_csv_t *csv, size_t col);
269
270
271/*! Get the cell at the for the given header.
272 *
273 * This should be used when the CSV data contains a header.
274 * This assumes that the first row is a header (not data).
275 *
276 * \param[in] csv The csv.
277 * \param[in] row The row. Indexed from 0 where 0 is the first row after the header.
278 * \param[in] colname The column name to get the data from.
279 *
280 * \return The csv data at the position or NULL if the position if invalid.
281 */
282M_API const char *M_csv_get_cell(const M_csv_t *csv, size_t row, const char *colname);
283
284
285/*! Get the column number for a given column (header) name.
286 *
287 * This should be used when the CSV data contains a header.
288 * This assumes that the first row is a header (not data).
289 *
290 * \param[in] csv The csv.
291 * \param[in] colname The column name to get the data from.
292 *
293 * \return Column number for the given name on success. Otherwise -1.
294 */
295M_API ssize_t M_csv_get_cell_num(const M_csv_t *csv, const char *colname);
296
297
298/*! Use different delim and quote characters for output than for parsing.
299 *
300 * By default, M_csv_output_headers_buf() and M_csv_output_rows_buf() will use the same
301 * delimiter and quote characters that were used when parsing the data.
302 *
303 * However, if you need to use a different delimiter and/or quote character in your
304 * output, call this function first to change them.
305 *
306 * \param csv The csv.
307 * \param delim delimiter char to use in subsequent write operations
308 * \param quote quote char to use in subsequent write operations
309 */
310void M_csv_output_set_control_chars(M_csv_t *csv, char delim, char quote);
311
312
313/*! Write the header row, in CSV format.
314 *
315 * When outputting CSV data, this should be called first, with the exact same list of headers
316 * that you'll be using later with M_csv_output_rows_buf().
317 *
318 * If \a headers is NULL, all headers defined in the CSV data will be output, in the same order
319 * they were originally stored in.
320 *
321 * \see M_csv_output_rows_buf()
322 *
323 * \param[out] buf buffer to place output in.
324 * \param[in] csv the CSV data to output.
325 * \param[in] headers names of columns to include in header row (will be written in this exact order).
326 */
327M_API void M_csv_output_headers_buf(M_buf_t *buf, const M_csv_t *csv, M_list_str_t *headers);
328
329
330/*! Write the parsed data to the given buffer, in CSV format.
331 *
332 * If \a headers is not NULL, only the columns whose names match will be output, in the same order
333 * that the column headers are listed in \a headers. If there are names in \a headers which aren't
334 * present in the parsed CSV file, an empty value will be added for that column in every row.
335 *
336 * A filter callback may be used to omit certain rows from the output. If no filter callback is
337 * provided, all rows will be output.
338 *
339 * \see M_csv_output_headers_buf()
340 *
341 * \param[out] buf buffer to place output in
342 * \param[in] csv the CSV data to output.
343 * \param[in] headers names of columns to include in output (also controls column order).
344 * \param[in] filter_cb callback to control which rows are output (may be NULL).
345 * \param[in] filter_thunk pointer to pass to \a filter_cb (may be NULL).
346 * \param[in] writer_cb callback to allow editing cell values (may be NULL).
347 * \param[in] writer_thunk pointer to pass to \a writer_cb (may be NULL).
348 */
349M_API void M_csv_output_rows_buf(M_buf_t *buf, const M_csv_t *csv, M_list_str_t *headers,
350 M_csv_row_filter_cb filter_cb, void *filter_thunk, M_csv_cell_writer_cb writer_cb, void *writer_thunk);
351
352/*! @} */
353
354__END_DECLS
355
356/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
357
358#endif /* __M_CSV_H__ */
struct M_buf M_buf_t
Definition: m_buf.h:77
M_csv_t * M_csv_parse_add_headers(const char *data, size_t len, char delim, char quote, M_uint32 flags, M_list_str_t *headers)
M_csv_t * M_csv_parse_inplace(char *data, size_t len, char delim, char quote, M_uint32 flags) M_MALLOC_ALIASED
const char * M_csv_get_header(const M_csv_t *csv, size_t col)
const char * M_csv_get_cell(const M_csv_t *csv, size_t row, const char *colname)
void M_csv_output_set_control_chars(M_csv_t *csv, char delim, char quote)
size_t M_csv_raw_num_cols(const M_csv_t *csv)
M_bool(* M_csv_row_filter_cb)(const M_csv_t *csv, size_t row, void *thunk)
Definition: m_csv.h:93
void M_csv_destroy(M_csv_t *csv) M_FREE(1)
size_t M_csv_raw_num_rows(const M_csv_t *csv)
M_csv_t * M_csv_parse(const char *data, size_t len, char delim, char quote, M_uint32 flags) M_MALLOC
void M_csv_output_rows_buf(M_buf_t *buf, const M_csv_t *csv, M_list_str_t *headers, M_csv_row_filter_cb filter_cb, void *filter_thunk, M_csv_cell_writer_cb writer_cb, void *writer_thunk)
size_t M_csv_get_numcols(const M_csv_t *csv)
const char * M_csv_get_cellbynum(const M_csv_t *csv, size_t row, size_t col)
const char * M_csv_raw_cell(const M_csv_t *csv, size_t row, size_t col)
void M_csv_output_headers_buf(M_buf_t *buf, const M_csv_t *csv, M_list_str_t *headers)
M_bool(* M_csv_cell_writer_cb)(M_buf_t *buf, const char *cell, const char *header, void *thunk)
Definition: m_csv.h:104
struct M_csv M_csv_t
Definition: m_csv.h:78
M_CSV_FLAGS
Definition: m_csv.h:81
ssize_t M_csv_get_cell_num(const M_csv_t *csv, const char *colname)
size_t M_csv_get_numrows(const M_csv_t *csv)
@ M_CSV_FLAG_NONE
Definition: m_csv.h:82
@ M_CSV_FLAG_TRIM_WHITESPACE
Definition: m_csv.h:83
struct M_list_str M_list_str_t
Definition: m_list_str.h:80