如何迭代由空格分隔的单词组成的字符串中的单词?

注意,我对C字符串函数或那种字符操作/访问不感兴趣。比起效率,我更喜欢优雅。我当前的解决方案:

#include <iostream>
#include <sstream>
#include <string>

using namespace std;

int main() {
    string s = "Somewhere down the road";
    istringstream iss(s);

    do {
        string subs;
        iss >> subs;
        cout << "Substring: " << subs << endl;
    } while (iss);
}

当前回答

LazyString拆分器:

#include <string>
#include <algorithm>
#include <unordered_set>

using namespace std;

class LazyStringSplitter
{
    string::const_iterator start, finish;
    unordered_set<char> chop;

public:

    // Empty Constructor
    explicit LazyStringSplitter()
    {}

    explicit LazyStringSplitter (const string cstr, const string delims)
        : start(cstr.begin())
        , finish(cstr.end())
        , chop(delims.begin(), delims.end())
    {}

    void operator () (const string cstr, const string delims)
    {
        chop.insert(delims.begin(), delims.end());
        start = cstr.begin();
        finish = cstr.end();
    }

    bool empty() const { return (start >= finish); }

    string next()
    {
        // return empty string
        // if ran out of characters
        if (empty())
            return string("");

        auto runner = find_if(start, finish, [&](char c) {
            return chop.count(c) == 1;
        });

        // construct next string
        string ret(start, runner);
        start = runner + 1;

        // Never return empty string
        // + tail recursion makes this method efficient
        return !ret.empty() ? ret : next();
    }
};

我将此方法称为LazyStringSplitter是因为一个原因——它不会一次性拆分字符串。本质上,它的行为类似于python生成器它公开了一个名为next的方法,该方法返回从原始字符串拆分的下一个字符串我使用了c++11STL中的无序集,因此查找分隔符的速度要快得多下面是它的工作原理

测试程序

#include <iostream>
using namespace std;

int main()
{
    LazyStringSplitter splitter;

    // split at the characters ' ', '!', '.', ','
    splitter("This, is a string. And here is another string! Let's test and see how well this does.", " !.,");

    while (!splitter.empty())
        cout << splitter.next() << endl;
    return 0;
}

输出,输出

This
is
a
string
And
here
is
another
string
Let's
test
and
see
how
well
this
does

改进这一点的下一个计划是实施开始和结束方法,以便可以执行以下操作:

vector<string> split_string(splitter.begin(), splitter.end());

其他回答

#include <vector>
#include <string>
#include <sstream>

int main()
{
    std::string str("Split me by whitespaces");
    std::string buf;                 // Have a buffer string
    std::stringstream ss(str);       // Insert the string into a stream

    std::vector<std::string> tokens; // Create vector to hold our words

    while (ss >> buf)
        tokens.push_back(buf);

    return 0;
}
#include <iostream>
#include <vector>
using namespace std;

int main() {
  string str = "ABC AABCD CDDD RABC GHTTYU FR";
  str += " "; //dirty hack: adding extra space to the end
  vector<string> v;

  for (int i=0; i<(int)str.size(); i++) {
    int a, b;
    a = i;

    for (int j=i; j<(int)str.size(); j++) {
      if (str[j] == ' ') {
        b = j;
        i = j;
        break;
      }
    }
    v.push_back(str.substr(a, b-a));
  }

  for (int i=0; i<v.size(); i++) {
    cout<<v[i].size()<<" "<<v[i]<<endl;
  }
  return 0;
}

仅为方便:

template<class V, typename T>
bool in(const V &v, const T &el) {
    return std::find(v.begin(), v.end(), el) != v.end();
}

基于多个分隔符的实际拆分:

std::vector<std::string> split(const std::string &s,
                               const std::vector<char> &delims) {
    std::vector<std::string> res;
    auto stuff = [&delims](char c) { return !in(delims, c); };
    auto space = [&delims](char c) { return in(delims, c); };
    auto first = std::find_if(s.begin(), s.end(), stuff);
    while (first != s.end()) {
        auto last = std::find_if(first, s.end(), space);
        res.push_back(std::string(first, last));
        first = std::find_if(last + 1, s.end(), stuff);
    }
    return res;
}

用法:

int main() {
    std::string s = "   aaa,  bb  cc ";
    for (auto el: split(s, {' ', ','}))
        std::cout << el << std::endl;
    return 0;
}

我喜欢下面的代码,因为它将结果放入一个向量中,支持字符串作为delim,并控制保持空值。但是,那时候看起来不太好。

#include <ostream>
#include <string>
#include <vector>
#include <algorithm>
#include <iterator>
using namespace std;

vector<string> split(const string& s, const string& delim, const bool keep_empty = true) {
    vector<string> result;
    if (delim.empty()) {
        result.push_back(s);
        return result;
    }
    string::const_iterator substart = s.begin(), subend;
    while (true) {
        subend = search(substart, s.end(), delim.begin(), delim.end());
        string temp(substart, subend);
        if (keep_empty || !temp.empty()) {
            result.push_back(temp);
        }
        if (subend == s.end()) {
            break;
        }
        substart = subend + delim.size();
    }
    return result;
}

int main() {
    const vector<string> words = split("So close no matter how far", " ");
    copy(words.begin(), words.end(), ostream_iterator<string>(cout, "\n"));
}

当然,Boost有一个split(),它的部分功能与此类似。而且,如果“空白”是指任何类型的空白,那么使用Boost的split和is_any_of()都非常有用。

对于一个大得离谱而且可能是冗余的版本,可以尝试很多For循环。

string stringlist[10];
int count = 0;

for (int i = 0; i < sequence.length(); i++)
{
    if (sequence[i] == ' ')
    {
        stringlist[count] = sequence.substr(0, i);
        sequence.erase(0, i+1);
        i = 0;
        count++;
    }
    else if (i == sequence.length()-1)  // Last word
    {
        stringlist[count] = sequence.substr(0, i+1);
    }
}

它并不漂亮,但总的来说(除了标点符号和一系列其他错误)它是有效的!