如何迭代由空格分隔的单词组成的字符串中的单词?

注意,我对C字符串函数或那种字符操作/访问不感兴趣。比起效率,我更喜欢优雅。我当前的解决方案:

#include <iostream>
#include <sstream>
#include <string>

using namespace std;

int main() {
    string s = "Somewhere down the road";
    istringstream iss(s);

    do {
        string subs;
        iss >> subs;
        cout << "Substring: " << subs << endl;
    } while (iss);
}

当前回答

还有另一种方式——连续传递方式、零分配、基于函数的分隔。

 void split( auto&& data, auto&& splitter, auto&& operation ) {
   using std::begin; using std::end;
   auto prev = begin(data);
   while (prev != end(data) ) {
     auto&&[prev,next] = splitter( prev, end(data) );
     operation(prev,next);
     prev = next;
   }
 }

现在我们可以基于此编写特定的拆分函数。

 auto anyOfSplitter(auto delimiters) {
   return [delimiters](auto begin, auto end) {
     while( begin != end && 0 == std::string_view(begin, end).find_first_of(delimiters) ) {
       ++begin;
     }
     auto view = std::string_view(begin, end);
     auto next = view.find_first_of(delimiters);
     if (next != view.npos)
       return std::make_pair( begin, begin + next );
     else
       return std::make_pair( begin, end );
   };
 }

我们现在可以生成一个传统的std字符串分割,如下所示:

 template<class C>
 auto traditional_any_of_split( std::string_view<C> str, std::string_view<C> delim ) {
   std::vector<std::basic_string<C>> retval;
   split( str, anyOfSplitter(delim), [&](auto s, auto f) {
     retval.emplace_back(s,f);
   });
   return retval;
 }

或者我们可以改用find

 auto findSplitter(auto delimiter) {
   return [delimiter](auto begin, auto end) {
     while( begin != end && 0 == std::string_view(begin, end).find(delimiter) ) {
       begin += delimiter.size();
     }
     auto view = std::string_view(begin, end);
     auto next = view.find(delimiter);
     if (next != view.npos)
       return std::make_pair( begin, begin + next );
     else
       return std::make_pair( begin, end );
   };
 }

 template<class C>
 auto traditional_find_split( std::string_view<C> str, std::string_view<C> delim ) {
   std::vector<std::basic_string<C>> retval;
   split( str, findSplitter(delim), [&](auto s, auto f) {
     retval.emplace_back(s,f);
   });
   return retval;
 }

通过更换分流器部分。

这两者都分配了一个返回值缓冲区。我们可以以手动管理生命周期为代价将返回值交换到字符串视图。

我们还可以采用一个延续,一次传递一个字符串视图,甚至避免分配视图向量。

这可以通过一个中止选项进行扩展,这样我们可以在读取几个前缀字符串后中止。

其他回答

我使用以下代码:

namespace Core
{
    typedef std::wstring String;

    void SplitString(const Core::String& input, const Core::String& splitter, std::list<Core::String>& output)
    {
        if (splitter.empty())
        {
            throw std::invalid_argument(); // for example
        }

        std::list<Core::String> lines;

        Core::String::size_type offset = 0;

        for (;;)
        {
            Core::String::size_type splitterPos = input.find(splitter, offset);

            if (splitterPos != Core::String::npos)
            {
                lines.push_back(input.substr(offset, splitterPos - offset));
                offset = splitterPos + splitter.size();
            }
            else
            {
                lines.push_back(input.substr(offset));
                break;
            }
        }

        lines.swap(output);
    }
}

// gtest:

class SplitStringTest: public testing::Test
{
};

TEST_F(SplitStringTest, EmptyStringAndSplitter)
{
    std::list<Core::String> result;
    ASSERT_ANY_THROW(Core::SplitString(Core::String(), Core::String(), result));
}

TEST_F(SplitStringTest, NonEmptyStringAndEmptySplitter)
{
    std::list<Core::String> result;
    ASSERT_ANY_THROW(Core::SplitString(L"xy", Core::String(), result));
}

TEST_F(SplitStringTest, EmptyStringAndNonEmptySplitter)
{
    std::list<Core::String> result;
    Core::SplitString(Core::String(), Core::String(L","), result);
    ASSERT_EQ(1, result.size());
    ASSERT_EQ(Core::String(), *result.begin());
}

TEST_F(SplitStringTest, OneCharSplitter)
{
    std::list<Core::String> result;

    Core::SplitString(L"x,y", L",", result);
    ASSERT_EQ(2, result.size());
    ASSERT_EQ(L"x", *result.begin());
    ASSERT_EQ(L"y", *result.rbegin());

    Core::SplitString(L",xy", L",", result);
    ASSERT_EQ(2, result.size());
    ASSERT_EQ(Core::String(), *result.begin());
    ASSERT_EQ(L"xy", *result.rbegin());

    Core::SplitString(L"xy,", L",", result);
    ASSERT_EQ(2, result.size());
    ASSERT_EQ(L"xy", *result.begin());
    ASSERT_EQ(Core::String(), *result.rbegin());
}

TEST_F(SplitStringTest, TwoCharsSplitter)
{
    std::list<Core::String> result;

    Core::SplitString(L"x,.y,z", L",.", result);
    ASSERT_EQ(2, result.size());
    ASSERT_EQ(L"x", *result.begin());
    ASSERT_EQ(L"y,z", *result.rbegin());

    Core::SplitString(L"x,,y,z", L",,", result);
    ASSERT_EQ(2, result.size());
    ASSERT_EQ(L"x", *result.begin());
    ASSERT_EQ(L"y,z", *result.rbegin());
}

TEST_F(SplitStringTest, RecursiveSplitter)
{
    std::list<Core::String> result;

    Core::SplitString(L",,,", L",,", result);
    ASSERT_EQ(2, result.size());
    ASSERT_EQ(Core::String(), *result.begin());
    ASSERT_EQ(L",", *result.rbegin());

    Core::SplitString(L",.,.,", L",.,", result);
    ASSERT_EQ(2, result.size());
    ASSERT_EQ(Core::String(), *result.begin());
    ASSERT_EQ(L".,", *result.rbegin());

    Core::SplitString(L"x,.,.,y", L",.,", result);
    ASSERT_EQ(2, result.size());
    ASSERT_EQ(L"x", *result.begin());
    ASSERT_EQ(L".,y", *result.rbegin());

    Core::SplitString(L",.,,.,", L",.,", result);
    ASSERT_EQ(3, result.size());
    ASSERT_EQ(Core::String(), *result.begin());
    ASSERT_EQ(Core::String(), *(++result.begin()));
    ASSERT_EQ(Core::String(), *result.rbegin());
}

TEST_F(SplitStringTest, NullTerminators)
{
    std::list<Core::String> result;

    Core::SplitString(L"xy", Core::String(L"\0", 1), result);
    ASSERT_EQ(1, result.size());
    ASSERT_EQ(L"xy", *result.begin());

    Core::SplitString(Core::String(L"x\0y", 3), Core::String(L"\0", 1), result);
    ASSERT_EQ(2, result.size());
    ASSERT_EQ(L"x", *result.begin());
    ASSERT_EQ(L"y", *result.rbegin());
}

我们可以在c++中使用strtok,

#include <iostream>
#include <cstring>
using namespace std;

int main()
{
    char str[]="Mickey M;12034;911416313;M;01a;9001;NULL;0;13;12;0;CPP,C;MSC,3D;FEND,BEND,SEC;";
    char *pch = strtok (str,";,");
    while (pch != NULL)
    {
        cout<<pch<<"\n";
        pch = strtok (NULL, ";,");
    }
    return 0;
}

使用vector作为基类的快速版本,可完全访问其所有运算符:

    // Split string into parts.
    class Split : public std::vector<std::string>
    {
        public:
            Split(const std::string& str, char* delimList)
            {
               size_t lastPos = 0;
               size_t pos = str.find_first_of(delimList);

               while (pos != std::string::npos)
               {
                    if (pos != lastPos)
                        push_back(str.substr(lastPos, pos-lastPos));
                    lastPos = pos + 1;
                    pos = str.find_first_of(delimList, lastPos);
               }
               if (lastPos < str.length())
                   push_back(str.substr(lastPos, pos-lastPos));
            }
    };

用于填充STL集的示例:

std::set<std::string> words;
Split split("Hello,World", ",");
words.insert(split.begin(), split.end());

我用这个分隔符分隔字符串。第一个将结果放入预先构建的向量中,第二个返回新向量。

#include <string>
#include <sstream>
#include <vector>
#include <iterator>

template <typename Out>
void split(const std::string &s, char delim, Out result) {
    std::istringstream iss(s);
    std::string item;
    while (std::getline(iss, item, delim)) {
        *result++ = item;
    }
}

std::vector<std::string> split(const std::string &s, char delim) {
    std::vector<std::string> elems;
    split(s, delim, std::back_inserter(elems));
    return elems;
}

请注意,此解决方案不会跳过空令牌,因此下面将找到4项,其中一项为空:

std::vector<std::string> x = split("one:two::three", ':');

我编写了以下代码。您可以指定分隔符,它可以是字符串。结果类似于Java的String.split,结果中包含空字符串。

例如,如果我们调用split(“ABCPICKABCANYABCTWO:ABC”,“ABC”),结果如下:

0  <len:0>
1 PICK <len:4>
2 ANY <len:3>
3 TWO: <len:4>
4  <len:0>

代码:

vector <string> split(const string& str, const string& delimiter = " ") {
    vector <string> tokens;

    string::size_type lastPos = 0;
    string::size_type pos = str.find(delimiter, lastPos);

    while (string::npos != pos) {
        // Found a token, add it to the vector.
        cout << str.substr(lastPos, pos - lastPos) << endl;
        tokens.push_back(str.substr(lastPos, pos - lastPos));
        lastPos = pos + delimiter.size();
        pos = str.find(delimiter, lastPos);
    }

    tokens.push_back(str.substr(lastPos, str.size() - lastPos));
    return tokens;
}