CSV.h
.
More...
#include "CSV.h"
Go to the source code of this file.
Namespaces | |
namespace | csv |
Comma Separated Value (not used in this implementation). | |
Defines | |
#define | COMMA ',' |
#define | DQUOTE '"' |
#define | LF '\n' |
#define | CR '\r' |
Functions | |
bool | automataCSV (std::string &csv, std::istream &CIN) |
Scans input stream CIN and returns the next CSV value. | |
void | singleDQUOTE (std::string &str) |
Substitute each double DQUOTE's by a single DQUOTE within str . | |
bool | getNextCSV (std::string &csv, std::istream &CIN) |
Scans input stream CIN and returns the next CSV value. | |
void | setQuotedCSV (std::string &res, const std::string &value) |
Prepares value for output into a CSV file. | |
void | trim (std::string &str) |
Deletes leading and trailing whitespace from "str" . | |
void | trimCSV (std::string &str) |
Converts an incorrect CSV field value into its probably correct value. | |
void | chop (std::string &str, char ch) |
Deletes ch when it is the trailing character in str . |
CSV.h
.
Definition in file CSV.cpp.
bool automataCSV | ( | std::string & | csv, | |
std::istream & | CIN | |||
) |
Scans input stream CIN
and returns the next CSV value.
CIN
gets stored into csv
.char
, not tested for wchar_t
.CIN.fail()
or when CIN.eof()
.
void singleDQUOTE | ( | std::string & | str | ) |
bool getNextCSV | ( | std::string & | csv, | |
std::istream & | CIN | |||
) |
Scans input stream CIN
and returns the next CSV value.
CIN
should be open in std::ios::binary
mode as chars are extracted one by one, using CIN.get(ch)
.CIN
gets stored into csv
.char
, not tested for wchar_t
.csv
the trailing (CR+LF or LF) ==> "\r\n"
o "\n"
.
"\n"
(LF -> LineFeed).{{ // test::getNextCSV() VEC.clear(); // std::vector<std::string> VEC; std::string csv; bool eol_CIN = false; // stop when the end of line is reached std::istringstream ist( str , std::ios::binary ); while ( ! eol_CIN && ! ist.fail() ) { // ! ist.eof() pitfall! eol_CIN = getNextCSV( csv, ist ); VEC.push_back( csv ); } return; // Using std::ios::binary ensures that no CR+LF chars are discarded }}
void setQuotedCSV | ( | std::string & | res, | |
const std::string & | value | |||
) |
Prepares value
for output into a CSV file.
res
.value
has whitespace.value
has double-quotes.value
has commas ","
.'"'
within value
with 2 double-quotes
[""].char
, not tested for wchar_t
.
{{ // test::setQuotedCSV() std::string res; setQuotedCSV( res, "," ); assertTrue( res == "\",\"" ); // [","] setQuotedCSV( res, "2" ); assertTrue( res == "2" ); // [2] setQuotedCSV( res, "" ); assertTrue( res == "" ); // [] setQuotedCSV( res, "4,5" ); assertTrue( res == "\"4,5\"" ); // ["4,5"] setQuotedCSV( res, "K\"" ); assertTrue( res == "\"K\"\"\"" ); // ["K"""] setQuotedCSV( res, "\r\n" ); assertTrue( res == "\"\r\n\"" ); // ["\r\n"] }}
void trim | ( | std::string & | str | ) |
Deletes leading and trailing whitespace from "str"
.
" \f\n\r\t\v"
.isspace(ch)
to find out if a letter is whitespace.
{{ // test::trim() std::string str; str = " a b "; trim(str); assertTrue( str == "a b" ); str = " a\nb "; trim(str); assertTrue( str == "a\nb" ); str = ""; trim(str); assertTrue( str == "" ); str = "\r\t\n "; trim(str); assertTrue( str == "" ); str = " a b "; trim(str); assertTrue( str == "a b" ); str = " ab " ; trim(str); assertTrue( str == "ab" ); }}
void trimCSV | ( | std::string & | str | ) |
Converts an incorrect CSV field value into its probably correct value.
trim()
.
[""] by a single doble quote
["].
Sometimes a FILE.csv has quoted fields surrounded by whitespace. As these field values do not comply with RFC-4180, they are extracted by getNextCSV()
as they come, with no whitespace removed and with their double quotes pairs intact. In the following example the string is enclosed in square parenthesis [..] instad of double quotes
["] for legibility:
["zero", "if "" 1" , , " 3xt" \r\n] [....0.,........ 1..,2,.........3...] csv field getNextCSV() trimCSV() +------------------+----------------+----------+ | ["zero"] | [zero] | [zero] | | [, "if "" 1" ] | [ "if "" 1" ] | [if " 1] | | [, ] | [ ] | [] | | [, " 3xt" \r\n] | [ " 3xt" ] | [ 3xt] | +------------------+----------------+----------+
By common sense, the programmer would expect that these strings be returned as they appear in the trimCSV()
column, but the fact of the matter is that the only one that complies with RFC-4180 is the first one. After using trimCSV()
on the value returned by getNextCSV()
the result is what is reasonbly expected.
"\r"
or carriage returns "\n"
are probably processed in a way different form what it is expected, even before they are passed as arguments to trimCSV()
. It is wiser no to trust this routine as a complete solution to process CSV files that do not fully comply with RFC-4180.
{{ // test::trimCSV() CSV_line csv("\"zero\", \"if \"\" 1\" , , \" 3xt\" \r\f"); std::string s; s=csv[0]; assertTrue( s == "zero" ); trimCSV(s); assertTrue( s == "zero" ); s=csv[1]; assertTrue( s == " \"if \"\" 1\" " ); trimCSV(s); assertTrue( s == "if \" 1" ); s=csv[2]; assertTrue( s == " " ); trimCSV(s); assertTrue( s == "" ); s=csv[3]; assertTrue( s == " \" 3xt\" \r\f" ); trimCSV(s); assertTrue( s == " 3xt" ); }}
void chop | ( | std::string & | str, | |
char | ch = 0 | |||
) |
Deletes ch
when it is the trailing character in str
.
ch
.
{{ // test::chop() std::string str; char ch; str = "12345"; assertTrue( str == "12345" ); chop(str,'0'); assertTrue( str == "12345" ); for ( ch='5'; ch != '0'; --ch ) { assertTrue( str[str.size()-1] == ch ); chop(str,ch); } assertTrue( str == "" ); chop(str,'3'); assertTrue( str == "" ); }}