如何迭代字符串的单词？

如何迭代由空格分隔的单词组成的字符串中的单词？

注意，我对C字符串函数或那种字符操作/访问不感兴趣。比起效率，我更喜欢优雅。我当前的解决方案：

#include <iostream>
#include <sstream>
#include <string>

using namespace std;

int main() {
    string s = "Somewhere down the road";
    istringstream iss(s);

    do {
        string subs;
        iss >> subs;
        cout << "Substring: " << subs << endl;
    } while (iss);
}

当前回答

获取Boost！：-）

#include <boost/algorithm/string/split.hpp>
#include <boost/algorithm/string.hpp>
#include <iostream>
#include <vector>

using namespace std;
using namespace boost;

int main(int argc, char**argv) {
    typedef vector < string > list_type;

    list_type list;
    string line;

    line = "Somewhere down the road";
    split(list, line, is_any_of(" "));

    for(int i = 0; i < list.size(); i++)
    {
        cout << list[i] << endl;
    }

    return 0;
}

此示例给出了输出-

Somewhere
down
the
road

2013-04-07 16:07:55

其他回答

对于一个大得离谱而且可能是冗余的版本，可以尝试很多For循环。

string stringlist[10];
int count = 0;

for (int i = 0; i < sequence.length(); i++)
{
    if (sequence[i] == ' ')
    {
        stringlist[count] = sequence.substr(0, i);
        sequence.erase(0, i+1);
        i = 0;
        count++;
    }
    else if (i == sequence.length()-1)  // Last word
    {
        stringlist[count] = sequence.substr(0, i+1);
    }
}

它并不漂亮，但总的来说（除了标点符号和一系列其他错误）它是有效的！

2008-10-25 09:34:36

这是另一种方法。。

void split_string(string text,vector<string>& words)
{
  int i=0;
  char ch;
  string word;

  while(ch=text[i++])
  {
    if (isspace(ch))
    {
      if (!word.empty())
      {
        words.push_back(word);
      }
      word = "";
    }
    else
    {
      word += ch;
    }
  }
  if (!word.empty())
  {
    words.push_back(word);
  }
}

2010-01-08 03:21:16

我有一种与其他解决方案非常不同的方法，它提供了很多其他解决方案所缺乏的价值，但当然也有其缺点。这是一个工作实现，示例是在单词周围放置＜tag＞＜/tag＞。

首先，这个问题可以通过一个循环解决，不需要额外的内存，只需考虑四种逻辑情况。从概念上讲，我们对边界感兴趣。我们的代码应该反映出这一点：让我们遍历字符串，一次查看两个字符，记住字符串的开头和结尾都有特殊情况。

缺点是我们必须编写实现，这有点冗长，但大多是方便的样板。

好处是我们编写了实现，因此很容易根据特定的需要定制它，例如区分左和写单词边界，使用任何一组分隔符，或处理其他情况，例如无边界或错误位置。

using namespace std;

#include <iostream>
#include <string>

#include <cctype>

typedef enum boundary_type_e {
    E_BOUNDARY_TYPE_ERROR = -1,
    E_BOUNDARY_TYPE_NONE,
    E_BOUNDARY_TYPE_LEFT,
    E_BOUNDARY_TYPE_RIGHT,
} boundary_type_t;

typedef struct boundary_s {
    boundary_type_t type;
    int pos;
} boundary_t;

bool is_delim_char(int c) {
    return isspace(c); // also compare against any other chars you want to use as delimiters
}

bool is_word_char(int c) {
    return ' ' <= c && c <= '~' && !is_delim_char(c);
}

boundary_t maybe_word_boundary(string str, int pos) {
    int len = str.length();
    if (pos < 0 || pos >= len) {
        return (boundary_t){.type = E_BOUNDARY_TYPE_ERROR};
    } else {
        if (pos == 0 && is_word_char(str[pos])) {
            // if the first character is word-y, we have a left boundary at the beginning
            return (boundary_t){.type = E_BOUNDARY_TYPE_LEFT, .pos = pos};
        } else if (pos == len - 1 && is_word_char(str[pos])) {
            // if the last character is word-y, we have a right boundary left of the null terminator
            return (boundary_t){.type = E_BOUNDARY_TYPE_RIGHT, .pos = pos + 1};
        } else if (!is_word_char(str[pos]) && is_word_char(str[pos + 1])) {
            // if we have a delimiter followed by a word char, we have a left boundary left of the word char
            return (boundary_t){.type = E_BOUNDARY_TYPE_LEFT, .pos = pos + 1};
        } else if (is_word_char(str[pos]) && !is_word_char(str[pos + 1])) {
            // if we have a word char followed by a delimiter, we have a right boundary right of the word char
            return (boundary_t){.type = E_BOUNDARY_TYPE_RIGHT, .pos = pos + 1};
        }
        return (boundary_t){.type = E_BOUNDARY_TYPE_NONE};
    }
}

int main() {
    string str;
    getline(cin, str);

    int len = str.length();
    for (int i = 0; i < len; i++) {
        boundary_t boundary = maybe_word_boundary(str, i);
        if (boundary.type == E_BOUNDARY_TYPE_LEFT) {
            // whatever
        } else if (boundary.type == E_BOUNDARY_TYPE_RIGHT) {
            // whatever
        }
    }
}

正如您所看到的，代码非常容易理解和微调，代码的实际使用非常简短和简单。使用C++不应阻止我们编写最简单、最容易定制的代码，即使这意味着不使用STL。我认为这是Linus Torvalds所说的“品味”的一个例子，因为我们已经消除了所有不需要的逻辑，而写作风格自然允许在需要处理的时候处理更多的案件。

可以改进此代码的可能是使用enum类，在maybe_word_boundary中接受指向is_word_char的函数指针，而不是直接调用is_word_char，并传递lambda。

2019-01-16 15:14:15

我编写了以下代码。您可以指定分隔符，它可以是字符串。结果类似于Java的String.split，结果中包含空字符串。

例如，如果我们调用split（“ABCPICKABCANYABCTWO:ABC”，“ABC”），结果如下：

0  <len:0>
1 PICK <len:4>
2 ANY <len:3>
3 TWO: <len:4>
4  <len:0>

代码：

vector <string> split(const string& str, const string& delimiter = " ") {
    vector <string> tokens;

    string::size_type lastPos = 0;
    string::size_type pos = str.find(delimiter, lastPos);

    while (string::npos != pos) {
        // Found a token, add it to the vector.
        cout << str.substr(lastPos, pos - lastPos) << endl;
        tokens.push_back(str.substr(lastPos, pos - lastPos));
        lastPos = pos + delimiter.size();
        pos = str.find(delimiter, lastPos);
    }

    tokens.push_back(str.substr(lastPos, str.size() - lastPos));
    return tokens;
}

2012-10-07 05:26:11

一些C++20编译器和大多数C++23编译器（range和string_view）

for (auto word : std::views::split("Somewhere down the road", ' '))
        std::cout << std::string_view{ word.begin(), word.end() } << std::endl;

2023-02-07 19:37:31

如何迭代字符串的单词？

推荐文章

最新文章

标签