我需要在c++中加载和使用CSV文件数据。在这一点上,它实际上只是一个以逗号分隔的解析器(即不用担心转义新行和逗号)。主要需要的是逐行解析器,它将在每次调用方法时为下一行返回一个向量。
我发现这篇文章看起来很有前途: http://www.boost.org/doc/libs/1_35_0/libs/spirit/example/fundamental/list_parser.cpp
我从未使用过Boost's Spirit,但我愿意尝试一下。但前提是我忽略了一个更直接的解决方案。
我需要在c++中加载和使用CSV文件数据。在这一点上,它实际上只是一个以逗号分隔的解析器(即不用担心转义新行和逗号)。主要需要的是逐行解析器,它将在每次调用方法时为下一行返回一个向量。
我发现这篇文章看起来很有前途: http://www.boost.org/doc/libs/1_35_0/libs/spirit/example/fundamental/list_parser.cpp
我从未使用过Boost's Spirit,但我愿意尝试一下。但前提是我忽略了一个更直接的解决方案。
当前回答
另一个类似于Loki Astari的答案的解决方案,在c++ 11中。这里的行是给定类型的std::元组。代码扫描一行,然后扫描到每个分隔符,然后将值直接转换并转储到元组中(使用一些模板代码)。
for (auto row : csv<std::string, int, float>(file, ',')) {
std::cout << "first col: " << std::get<0>(row) << std::endl;
}
优势:
非常干净,使用简单,只有c++ 11。 自动类型转换为std::tuple<t1,…>通过算子>>。
缺少什么:
转义和引用 没有错误处理的情况下畸形的CSV。
主要代码:
#include <iterator>
#include <sstream>
#include <string>
namespace csvtools {
/// Read the last element of the tuple without calling recursively
template <std::size_t idx, class... fields>
typename std::enable_if<idx >= std::tuple_size<std::tuple<fields...>>::value - 1>::type
read_tuple(std::istream &in, std::tuple<fields...> &out, const char delimiter) {
std::string cell;
std::getline(in, cell, delimiter);
std::stringstream cell_stream(cell);
cell_stream >> std::get<idx>(out);
}
/// Read the @p idx-th element of the tuple and then calls itself with @p idx + 1 to
/// read the next element of the tuple. Automatically falls in the previous case when
/// reaches the last element of the tuple thanks to enable_if
template <std::size_t idx, class... fields>
typename std::enable_if<idx < std::tuple_size<std::tuple<fields...>>::value - 1>::type
read_tuple(std::istream &in, std::tuple<fields...> &out, const char delimiter) {
std::string cell;
std::getline(in, cell, delimiter);
std::stringstream cell_stream(cell);
cell_stream >> std::get<idx>(out);
read_tuple<idx + 1, fields...>(in, out, delimiter);
}
}
/// Iterable csv wrapper around a stream. @p fields the list of types that form up a row.
template <class... fields>
class csv {
std::istream &_in;
const char _delim;
public:
typedef std::tuple<fields...> value_type;
class iterator;
/// Construct from a stream.
inline csv(std::istream &in, const char delim) : _in(in), _delim(delim) {}
/// Status of the underlying stream
/// @{
inline bool good() const {
return _in.good();
}
inline const std::istream &underlying_stream() const {
return _in;
}
/// @}
inline iterator begin();
inline iterator end();
private:
/// Reads a line into a stringstream, and then reads the line into a tuple, that is returned
inline value_type read_row() {
std::string line;
std::getline(_in, line);
std::stringstream line_stream(line);
std::tuple<fields...> retval;
csvtools::read_tuple<0, fields...>(line_stream, retval, _delim);
return retval;
}
};
/// Iterator; just calls recursively @ref csv::read_row and stores the result.
template <class... fields>
class csv<fields...>::iterator {
csv::value_type _row;
csv *_parent;
public:
typedef std::input_iterator_tag iterator_category;
typedef csv::value_type value_type;
typedef std::size_t difference_type;
typedef csv::value_type * pointer;
typedef csv::value_type & reference;
/// Construct an empty/end iterator
inline iterator() : _parent(nullptr) {}
/// Construct an iterator at the beginning of the @p parent csv object.
inline iterator(csv &parent) : _parent(parent.good() ? &parent : nullptr) {
++(*this);
}
/// Read one row, if possible. Set to end if parent is not good anymore.
inline iterator &operator++() {
if (_parent != nullptr) {
_row = _parent->read_row();
if (!_parent->good()) {
_parent = nullptr;
}
}
return *this;
}
inline iterator operator++(int) {
iterator copy = *this;
++(*this);
return copy;
}
inline csv::value_type const &operator*() const {
return _row;
}
inline csv::value_type const *operator->() const {
return &_row;
}
bool operator==(iterator const &other) {
return (this == &other) or (_parent == nullptr and other._parent == nullptr);
}
bool operator!=(iterator const &other) {
return not (*this == other);
}
};
template <class... fields>
typename csv<fields...>::iterator csv<fields...>::begin() {
return iterator(*this);
}
template <class... fields>
typename csv<fields...>::iterator csv<fields...>::end() {
return iterator();
}
我在GitHub上放了一个小的工作示例;我一直用它来解析一些数值数据,它达到了它的目的。
其他回答
如果你确实关心正确解析CSV,这将做它…相对较慢,因为它一次只处理一个字符。
void ParseCSV(const string& csvSource, vector<vector<string> >& lines)
{
bool inQuote(false);
bool newLine(false);
string field;
lines.clear();
vector<string> line;
string::const_iterator aChar = csvSource.begin();
while (aChar != csvSource.end())
{
switch (*aChar)
{
case '"':
newLine = false;
inQuote = !inQuote;
break;
case ',':
newLine = false;
if (inQuote == true)
{
field += *aChar;
}
else
{
line.push_back(field);
field.clear();
}
break;
case '\n':
case '\r':
if (inQuote == true)
{
field += *aChar;
}
else
{
if (newLine == false)
{
line.push_back(field);
lines.push_back(line);
field.clear();
line.clear();
newLine = true;
}
}
break;
default:
newLine = false;
field.push_back(*aChar);
break;
}
aChar++;
}
if (field.size())
line.push_back(field);
if (line.size())
lines.push_back(line);
}
可以使用std::regex。
根据文件大小和可用内存,可以逐行读取,也可以完全在std::string中读取。
读取文件可以使用:
std::ifstream t("file.txt");
std::string sin((std::istreambuf_iterator<char>(t)),
std::istreambuf_iterator<char>());
然后你可以和这个相匹配,它实际上是根据你的需要定制的。
std::regex word_regex(",\\s]+");
auto what =
std::sregex_iterator(sin.begin(), sin.end(), word_regex);
auto wend = std::sregex_iterator();
std::vector<std::string> v;
for (;what!=wend ; wend) {
std::smatch match = *what;
v.push_back(match.str());
}
不好意思,但是为了隐藏几行代码,这似乎是非常复杂的语法。
为什么不这样呢:
/**
Read line from a CSV file
@param[in] fp file pointer to open file
@param[in] vls reference to vector of strings to hold next line
*/
void readCSV( FILE *fp, std::vector<std::string>& vls )
{
vls.clear();
if( ! fp )
return;
char buf[10000];
if( ! fgets( buf,999,fp) )
return;
std::string s = buf;
int p,q;
q = -1;
// loop over columns
while( 1 ) {
p = q;
q = s.find_first_of(",\n",p+1);
if( q == -1 )
break;
vls.push_back( s.substr(p+1,q-p-1) );
}
}
int _tmain(int argc, _TCHAR* argv[])
{
std::vector<std::string> vls;
FILE * fp = fopen( argv[1], "r" );
if( ! fp )
return 1;
readCSV( fp, vls );
readCSV( fp, vls );
readCSV( fp, vls );
std::cout << "row 3, col 4 is " << vls[3].c_str() << "\n";
return 0;
}
使用流解析CSV文件行
我写了一个解析CSV文件行的小例子,如果需要,它可以用for和while循环来开发:
#include <iostream>
#include <fstream>
#include <string.h>
using namespace std;
int main() {
ifstream fin("Infile.csv");
ofstream fout("OutFile.csv");
string strline, strremain, strCol1 , strout;
string delimeter =";";
int d1;
继续到文件的末尾:
while (!fin.eof()){
从InFile获取第一行:
getline(fin,strline,'\n');
在直线上找到度距仪的位置:
d1 = strline.find(';');
然后解析第一列:
strCol1 = strline.substr(0,d1); // parse first Column
d1++;
strremain = strline.substr(d1); // remaining line
创建CSV格式输出行:
strout.append(strCol1);
strout.append(delimeter);
写行到输出文件:
fout << strout << endl; //out file line
}
fin.close();
fout.close();
return(0);
}
代码已编译并运行。好运!
由于所有CSV问题似乎都被重定向到这里,我想我应该在这里发布我的答案。这个回答并没有直接回答提问者的问题。我希望能够读取已知的CSV格式的流,而且每个字段的类型都已经知道。当然,可以使用下面的方法将每个字段处理为字符串类型。
作为我希望能够使用CSV输入流的一个例子,考虑以下输入(取自维基百科的CSV页面):
const char input[] =
"Year,Make,Model,Description,Price\n"
"1997,Ford,E350,\"ac, abs, moon\",3000.00\n"
"1999,Chevy,\"Venture \"\"Extended Edition\"\"\",\"\",4900.00\n"
"1999,Chevy,\"Venture \"\"Extended Edition, Very Large\"\"\",\"\",5000.00\n"
"1996,Jeep,Grand Cherokee,\"MUST SELL!\n\
air, moon roof, loaded\",4799.00\n"
;
然后,我希望能够像这样读取数据:
std::istringstream ss(input);
std::string title[5];
int year;
std::string make, model, desc;
float price;
csv_istream(ss)
>> title[0] >> title[1] >> title[2] >> title[3] >> title[4];
while (csv_istream(ss)
>> year >> make >> model >> desc >> price) {
//...do something with the record...
}
这就是我最后得到的解。
struct csv_istream {
std::istream &is_;
csv_istream (std::istream &is) : is_(is) {}
void scan_ws () const {
while (is_.good()) {
int c = is_.peek();
if (c != ' ' && c != '\t') break;
is_.get();
}
}
void scan (std::string *s = 0) const {
std::string ws;
int c = is_.get();
if (is_.good()) {
do {
if (c == ',' || c == '\n') break;
if (s) {
ws += c;
if (c != ' ' && c != '\t') {
*s += ws;
ws.clear();
}
}
c = is_.get();
} while (is_.good());
if (is_.eof()) is_.clear();
}
}
template <typename T, bool> struct set_value {
void operator () (std::string in, T &v) const {
std::istringstream(in) >> v;
}
};
template <typename T> struct set_value<T, true> {
template <bool SIGNED> void convert (std::string in, T &v) const {
if (SIGNED) v = ::strtoll(in.c_str(), 0, 0);
else v = ::strtoull(in.c_str(), 0, 0);
}
void operator () (std::string in, T &v) const {
convert<is_signed_int<T>::val>(in, v);
}
};
template <typename T> const csv_istream & operator >> (T &v) const {
std::string tmp;
scan(&tmp);
set_value<T, is_int<T>::val>()(tmp, v);
return *this;
}
const csv_istream & operator >> (std::string &v) const {
v.clear();
scan_ws();
if (is_.peek() != '"') scan(&v);
else {
std::string tmp;
is_.get();
std::getline(is_, tmp, '"');
while (is_.peek() == '"') {
v += tmp;
v += is_.get();
std::getline(is_, tmp, '"');
}
v += tmp;
scan();
}
return *this;
}
template <typename T>
const csv_istream & operator >> (T &(*manip)(T &)) const {
is_ >> manip;
return *this;
}
operator bool () const { return !is_.fail(); }
};
使用以下helper,可以通过c++ 11中的新积分特征模板进行简化:
template <typename T> struct is_signed_int { enum { val = false }; };
template <> struct is_signed_int<short> { enum { val = true}; };
template <> struct is_signed_int<int> { enum { val = true}; };
template <> struct is_signed_int<long> { enum { val = true}; };
template <> struct is_signed_int<long long> { enum { val = true}; };
template <typename T> struct is_unsigned_int { enum { val = false }; };
template <> struct is_unsigned_int<unsigned short> { enum { val = true}; };
template <> struct is_unsigned_int<unsigned int> { enum { val = true}; };
template <> struct is_unsigned_int<unsigned long> { enum { val = true}; };
template <> struct is_unsigned_int<unsigned long long> { enum { val = true}; };
template <typename T> struct is_int {
enum { val = (is_signed_int<T>::val || is_unsigned_int<T>::val) };
};
在网上试试!