Java有一个方便的分割方法:

String str = "The quick brown fox";
String[] results = str.split(" ");

在c++中有简单的方法来做到这一点吗?


当前回答

如果你愿意使用C语言,你可以使用strtok函数。在使用它时,您应该注意多线程问题。

其他回答

使用regex_token_iterators的解决方案:

#include <iostream>
#include <regex>
#include <string>

using namespace std;

int main()
{
    string str("The quick brown fox");

    regex reg("\\s+");

    sregex_token_iterator iter(str.begin(), str.end(), reg, -1);
    sregex_token_iterator end;

    vector<string> vec(iter, end);

    for (auto a : vec)
    {
        cout << a << endl;
    }
}

If the maximum length of the input string to be tokenized is known, one can exploit this and implement a very fast version. I am sketching the basic idea below, which was inspired by both strtok() and the "suffix array"-data structure described Jon Bentley's "Programming Perls" 2nd edition, chapter 15. The C++ class in this case only gives some organization and convenience of use. The implementation shown can be easily extended for removing leading and trailing whitespace characters in the tokens.

基本上,可以将分隔符替换为以字符串结束的'\0'字符,并设置指向修改后字符串中的标记的指针。在极端情况下,当字符串仅由分隔符组成时,将得到字符串长度加1个空标记。复制要修改的字符串是可行的。

头文件:

class TextLineSplitter
{
public:

    TextLineSplitter( const size_t max_line_len );

    ~TextLineSplitter();

    void            SplitLine( const char *line,
                               const char sep_char = ',',
                             );

    inline size_t   NumTokens( void ) const
    {
        return mNumTokens;
    }

    const char *    GetToken( const size_t token_idx ) const
    {
        assert( token_idx < mNumTokens );
        return mTokens[ token_idx ];
    }

private:
    const size_t    mStorageSize;

    char           *mBuff;
    char          **mTokens;
    size_t          mNumTokens;

    inline void     ResetContent( void )
    {
        memset( mBuff, 0, mStorageSize );
        // mark all items as empty:
        memset( mTokens, 0, mStorageSize * sizeof( char* ) );
        // reset counter for found items:
        mNumTokens = 0L;
    }
};

Implementattion文件:

TextLineSplitter::TextLineSplitter( const size_t max_line_len ):
    mStorageSize ( max_line_len + 1L )
{
    // allocate memory
    mBuff   = new char  [ mStorageSize ];
    mTokens = new char* [ mStorageSize ];

    ResetContent();
}

TextLineSplitter::~TextLineSplitter()
{
    delete [] mBuff;
    delete [] mTokens;
}


void TextLineSplitter::SplitLine( const char *line,
                                  const char sep_char   /* = ',' */,
                                )
{
    assert( sep_char != '\0' );

    ResetContent();
    strncpy( mBuff, line, mMaxLineLen );

    size_t idx       = 0L; // running index for characters

    do
    {
        assert( idx < mStorageSize );

        const char chr = line[ idx ]; // retrieve current character

        if( mTokens[ mNumTokens ] == NULL )
        {
            mTokens[ mNumTokens ] = &mBuff[ idx ];
        } // if

        if( chr == sep_char || chr == '\0' )
        { // item or line finished
            // overwrite separator with a 0-terminating character:
            mBuff[ idx ] = '\0';
            // count-up items:
            mNumTokens ++;
        } // if

    } while( line[ idx++ ] );
}

使用的场景是:

// create an instance capable of splitting strings up to 1000 chars long:
TextLineSplitter spl( 1000 );
spl.SplitLine( "Item1,,Item2,Item3" );
for( size_t i = 0; i < spl.NumTokens(); i++ )
{
    printf( "%s\n", spl.GetToken( i ) );
}

输出:

Item1

Item2
Item3

无意冒犯,但对于这样一个简单的问题,你把事情搞得太复杂了。使用Boost有很多理由。但对于这么简单的事情,就像用20号雪橇打苍蝇一样。

void
split( vector<string> & theStringVector,  /* Altered/returned value */
       const  string  & theString,
       const  string  & theDelimiter)
{
    UASSERT( theDelimiter.size(), >, 0); // My own ASSERT macro.

    size_t  start = 0, end = 0;

    while ( end != string::npos)
    {
        end = theString.find( theDelimiter, start);

        // If at end, use length=maxLength.  Else use length=end-start.
        theStringVector.push_back( theString.substr( start,
                       (end == string::npos) ? string::npos : end - start));

        // If at end, use start=maxSize.  Else use start=end+delimiter.
        start = (   ( end > (string::npos - theDelimiter.size()) )
                  ?  string::npos  :  end + theDelimiter.size());
    }
}

例如(以Doug为例),

#define SHOW(I,X)   cout << "[" << (I) << "]\t " # X " = \"" << (X) << "\"" << endl

int
main()
{
    vector<string> v;

    split( v, "A:PEP:909:Inventory Item", ":" );

    for (unsigned int i = 0;  i < v.size();   i++)
        SHOW( i, v[i] );
}

是的,我们可以split()返回一个新的向量,而不是传入一个。包装和重载是很简单的。但根据我所做的事情,我经常发现重用已有的对象比总是创建新对象更好。(只要我不忘记清空中间的向量!)

参考:http://www.cplusplus.com/reference/string/string/。

(我最初是在写一个回应Doug的问题:基于分隔符的c++字符串修改和提取(关闭)。但由于马丁·约克用这里的指针结束了这个问题……我将泛化我的代码。)

这里有许多过于复杂的建议。试试这个简单的std::string解决方案:

using namespace std;

string someText = ...

string::size_type tokenOff = 0, sepOff = tokenOff;
while (sepOff != string::npos)
{
    sepOff = someText.find(' ', sepOff);
    string::size_type tokenLen = (sepOff == string::npos) ? sepOff : sepOff++ - tokenOff;
    string token = someText.substr(tokenOff, tokenLen);
    if (!token.empty())
        /* do something with token */;
    tokenOff = sepOff;
}

我只是看了所有的答案,找不到下一个前提条件的解决方案:

没有动态内存分配 不使用boost 不使用正则表达式 c++17标准

这就是我的解

#include <iomanip>
#include <iostream>
#include <iterator>
#include <string_view>
#include <utility>

struct split_by_spaces
{
    std::string_view      text;
    static constexpr char delim = ' ';

    struct iterator
    {
        const std::string_view& text;
        std::size_t             cur_pos;
        std::size_t             end_pos;

        std::string_view operator*() const
        {
            return { &text[cur_pos], end_pos - cur_pos };
        }
        bool operator==(const iterator& other) const
        {
            return cur_pos == other.cur_pos && end_pos == other.end_pos;
        }
        bool operator!=(const iterator& other) const
        {
            return !(*this == other);
        }
        iterator& operator++()
        {
            cur_pos = text.find_first_not_of(delim, end_pos);

            if (cur_pos == std::string_view::npos)
            {
                cur_pos = text.size();
                end_pos = cur_pos;
                return *this;
            }

            end_pos = text.find(delim, cur_pos);

            if (cur_pos == std::string_view::npos)
            {
                end_pos = text.size();
            }

            return *this;
        }
    };

    [[nodiscard]] iterator begin() const
    {
        auto start = text.find_first_not_of(delim);
        if (start == std::string_view::npos)
        {
            return iterator{ text, text.size(), text.size() };
        }
        auto end_word = text.find(delim, start);
        if (end_word == std::string_view::npos)
        {
            end_word = text.size();
        }
        return iterator{ text, start, end_word };
    }
    [[nodiscard]] iterator end() const
    {
        return iterator{ text, text.size(), text.size() };
    }
};

int main(int argc, char** argv)
{
    using namespace std::literals;
    auto str = " there should be no memory allocation during parsing"
               "  into words this line and you   should'n create any"
               "  contaner                  for intermediate words  "sv;

    auto comma = "";
    for (std::string_view word : split_by_spaces{ str })
    {
        std::cout << std::exchange(comma, ",") << std::quoted(word);
    }

    auto only_spaces = "                   "sv;
    for (std::string_view word : split_by_spaces{ only_spaces })
    {
        std::cout << "you will not see this line in output" << std::endl;
    }
}