Java有一个方便的分割方法:

String str = "The quick brown fox";
String[] results = str.split(" ");

在c++中有简单的方法来做到这一点吗?


当前回答

Boost有很强的拆分功能:Boost::algorithm::split。

示例程序:

#include <vector>
#include <boost/algorithm/string.hpp>

int main() {
    auto s = "a,b, c ,,e,f,";
    std::vector<std::string> fields;
    boost::split(fields, s, boost::is_any_of(","));
    for (const auto& field : fields)
        std::cout << "\"" << field << "\"\n";
    return 0;
}

输出:

"a"
"b"
" c "
""
"e"
"f"
""

其他回答

pystring是一个小型库,实现了Python的一系列字符串函数,包括split方法:

#include <string>
#include <vector>
#include "pystring.h"

std::vector<std::string> chunks;
pystring::split("this string", chunks);

// also can specify a separator
pystring::split("this-string", chunks, "-");

简单的c++代码(标准c++ 98),接受多个分隔符(在std::string中指定),只使用向量、字符串和迭代器。

#include <iostream>
#include <vector>
#include <string>
#include <stdexcept> 

std::vector<std::string> 
split(const std::string& str, const std::string& delim){
    std::vector<std::string> result;
    if (str.empty())
        throw std::runtime_error("Can not tokenize an empty string!");
    std::string::const_iterator begin, str_it;
    begin = str_it = str.begin(); 
    do {
        while (delim.find(*str_it) == std::string::npos && str_it != str.end())
            str_it++; // find the position of the first delimiter in str
        std::string token = std::string(begin, str_it); // grab the token
        if (!token.empty()) // empty token only when str starts with a delimiter
            result.push_back(token); // push the token into a vector<string>
        while (delim.find(*str_it) != std::string::npos && str_it != str.end())
            str_it++; // ignore the additional consecutive delimiters
        begin = str_it; // process the remaining tokens
        } while (str_it != str.end());
    return result;
}

int main() {
    std::string test_string = ".this is.a.../.simple;;test;;;END";
    std::string delim = "; ./"; // string containing the delimiters
    std::vector<std::string> tokens = split(test_string, delim);           
    for (std::vector<std::string>::const_iterator it = tokens.begin(); 
        it != tokens.end(); it++)
            std::cout << *it << std::endl;
}

我以前只用标准库做了一个lexer/tokenizer。代码如下:

#include <iostream>
#include <string>
#include <vector>
#include <sstream>

using namespace std;

string seps(string& s) {
    if (!s.size()) return "";
    stringstream ss;
    ss << s[0];
    for (int i = 1; i < s.size(); i++) {
        ss << '|' << s[i];
    }
    return ss.str();
}

void Tokenize(string& str, vector<string>& tokens, const string& delimiters = " ")
{
    seps(str);

    // Skip delimiters at beginning.
    string::size_type lastPos = str.find_first_not_of(delimiters, 0);
    // Find first "non-delimiter".
    string::size_type pos = str.find_first_of(delimiters, lastPos);

    while (string::npos != pos || string::npos != lastPos)
    {
        // Found a token, add it to the vector.
        tokens.push_back(str.substr(lastPos, pos - lastPos));
        // Skip delimiters.  Note the "not_of"
        lastPos = str.find_first_not_of(delimiters, pos);
        // Find next "non-delimiter"
        pos = str.find_first_of(delimiters, lastPos);
    }
}

int main(int argc, char *argv[])
{
    vector<string> t;
    string s = "Tokens for everyone!";

    Tokenize(s, t, "|");

    for (auto c : t)
        cout << c << endl;

    system("pause");

    return 0;
}

Boost标记器类可以使这类事情变得非常简单:

#include <iostream>
#include <string>
#include <boost/foreach.hpp>
#include <boost/tokenizer.hpp>

using namespace std;
using namespace boost;

int main(int, char**)
{
    string text = "token, test   string";

    char_separator<char> sep(", ");
    tokenizer< char_separator<char> > tokens(text, sep);
    BOOST_FOREACH (const string& t, tokens) {
        cout << t << "." << endl;
    }
}

针对c++ 11更新:

#include <iostream>
#include <string>
#include <boost/tokenizer.hpp>

using namespace std;
using namespace boost;

int main(int, char**)
{
    string text = "token, test   string";

    char_separator<char> sep(", ");
    tokenizer<char_separator<char>> tokens(text, sep);
    for (const auto& t : tokens) {
        cout << t << "." << endl;
    }
}

Boost有很强的拆分功能:Boost::algorithm::split。

示例程序:

#include <vector>
#include <boost/algorithm/string.hpp>

int main() {
    auto s = "a,b, c ,,e,f,";
    std::vector<std::string> fields;
    boost::split(fields, s, boost::is_any_of(","));
    for (const auto& field : fields)
        std::cout << "\"" << field << "\"\n";
    return 0;
}

输出:

"a"
"b"
" c "
""
"e"
"f"
""