我在c++中使用以下方法解析字符串:

using namespace std;

string parsed,input="text to be parsed";
stringstream input_stringstream(input);

if (getline(input_stringstream,parsed,' '))
{
     // do some processing.
}

使用单个字符分隔符进行解析是可以的。但是如果我想使用字符串作为分隔符呢?

例子:我想拆分:

scott>=tiger

用>=作为分隔符,这样我就可以得到斯科特和老虎。


当前回答

以防将来,有人想跳出Vincenzo Pii答案的盒子函数

#include <vector>
#include <string>


std::vector<std::string> SplitString(
    std::string str,
    std::string delimeter)
{
    std::vector<std::string> splittedStrings = {};
    size_t pos = 0;

    while ((pos = str.find(delimeter)) != std::string::npos)
    {
        std::string token = str.substr(0, pos);
        if (token.length() > 0)
            splittedStrings.push_back(token);
        str.erase(0, pos + delimeter.length());
    }

    if (str.length() > 0)
        splittedStrings.push_back(str);
    return splittedStrings;
}

我还修复了一些错误,以便如果字符串的开头或结尾有分隔符,函数将不会返回空字符串

其他回答

我使用指针算术。对于字符串delim,如果你对char delim满意,只需简单地删除Inner while。我希望它是正确的。如果你发现任何错误或改进,请留下评论。

std::vector<std::string> split(std::string s, std::string delim)
{
    char *p = &s[0];
    char *d = &delim[0];
    std::vector<std::string> res = {""};

    do
    {
        bool is_delim = true;
        char *pp = p;
        char *dd = d;
        while (*dd && is_delim == true)
            if (*pp++ != *dd++)
                is_delim = false;

        if (is_delim)
        {
            p = pp - 1;
            res.push_back("");
        }
        else
            *(res.rbegin()) += *p;
    } while (*p++);

    return res;
}
template<typename C, typename T>
auto insert_in_container(C& c, T&& t) -> decltype(c.push_back(std::forward<T>(t)), void()) {
    c.push_back(std::forward<T>(t));
}
template<typename C, typename T>
auto insert_in_container(C& c, T&& t) -> decltype(c.insert(std::forward<T>(t)), void()) {
    c.insert(std::forward<T>(t));
}
template<typename Container>
Container splitR(const std::string& input, const std::string& delims) {
    Container out;
    size_t delims_len = delims.size();
    auto begIdx = 0u;
    auto endIdx = input.find(delims, begIdx);
    if (endIdx == std::string::npos && input.size() != 0u) {
        insert_in_container(out, input);
    }
    else {
        size_t w = 0;
        while (endIdx != std::string::npos) {
            w = endIdx - begIdx;
            if (w != 0) insert_in_container(out, input.substr(begIdx, w));
            begIdx = endIdx + delims_len;
            endIdx = input.find(delims, begIdx);
        }
        w = input.length() - begIdx;
        if (w != 0) insert_in_container(out, input.substr(begIdx, w));
    }
    return out;
}

作为奖励,这里有一个分割函数和宏的代码示例,很容易使用,你可以在其中选择容器类型:

#include <iostream>
#include <vector>
#include <string>

#define split(str, delim, type) (split_fn<type<std::string>>(str, delim))
 
template <typename Container>
Container split_fn(const std::string& str, char delim = ' ') {
    Container cont{};
    std::size_t current, previous = 0;
    current = str.find(delim);
    while (current != std::string::npos) {
        cont.push_back(str.substr(previous, current - previous));
        previous = current + 1;
        current = str.find(delim, previous);
    }
    cont.push_back(str.substr(previous, current - previous));
    
    return cont;
}

int main() {
    
    auto test = std::string{"This is a great test"};
    auto res = split(test, ' ', std::vector);
    
    for(auto &i : res) {
        std::cout << i << ", "; // "this", "is", "a", "great", "test"
    }
    
    
    return 0;
}

该方法使用std::string::find,而不改变原始字符串,记住前一个子字符串标记的开始和结束。

#include <iostream>
#include <string>

int main()
{
    std::string s = "scott>=tiger";
    std::string delim = ">=";

    auto start = 0U;
    auto end = s.find(delim);
    while (end != std::string::npos)
    {
        std::cout << s.substr(start, end - start) << std::endl;
        start = end + delim.length();
        end = s.find(delim, start);
    }

    std::cout << s.substr(start, end);
}

下面是一个使用Boost string Algorithms库和Boost Range库将一个字符串与另一个字符串分割的示例。这个解决方案的灵感来自StringAlgo库文档,请参阅Split部分。

下面是split_with_string函数的完整程序,以及全面的测试-用godbolt试试:

#include <iostream>
#include <string>
#include <vector>
#include <boost/algorithm/string.hpp>
#include <boost/range/iterator_range.hpp>

std::vector<std::string> split_with_string(std::string_view s, std::string_view search) 
{
    if (search.empty()) return {std::string{s}};

    std::vector<boost::iterator_range<std::string_view::iterator>> found;
    boost::algorithm::ifind_all(found, s, search);
    if (found.empty()) return {};

    std::vector<std::string> parts;
    parts.reserve(found.size() + 2); // a bit more

    std::string_view::iterator part_begin = s.cbegin(), part_end;
    for (auto& split_found : found)
    {
        // do not skip empty extracts
        part_end = split_found.begin();
        parts.emplace_back(part_begin, part_end);
        part_begin = split_found.end();
    }
    if (part_end != s.end())
        parts.emplace_back(part_begin, s.end());

    return parts;
}

#define TEST(expr) std::cout << ((!(expr)) ? "FAIL" : "PASS") << ": " #expr "\t" << std::endl

int main()
{
    auto s0 = split_with_string("adsf-+qwret-+nvfkbdsj", "");
    TEST(s0.size() == 1);
    TEST(s0.front() == "adsf-+qwret-+nvfkbdsj");
    auto s1 = split_with_string("adsf-+qwret-+nvfkbdsj", "-+");
    TEST(s1.size() == 3);
    TEST(s1.front() == "adsf");
    TEST(s1.back() == "nvfkbdsj");
    auto s2 = split_with_string("-+adsf-+qwret-+nvfkbdsj-+", "-+");
    TEST(s2.size() == 5);
    TEST(s2.front() == "");
    TEST(s2.back() == "");
    auto s3 = split_with_string("-+adsf-+qwret-+nvfkbdsj", "-+");
    TEST(s3.size() == 4);
    TEST(s3.front() == "");
    TEST(s3.back() == "nvfkbdsj");
    auto s4 = split_with_string("adsf-+qwret-+nvfkbdsj-+", "-+");
    TEST(s4.size() == 4);
    TEST(s4.front() == "adsf");
    TEST(s4.back() == "");
    auto s5 = split_with_string("dbo.abc", "dbo.");
    TEST(s5.size() == 2);
    TEST(s5.front() == "");
    TEST(s5.back() == "abc");
    auto s6 = split_with_string("dbo.abc", ".");
    TEST(s6.size() == 2);
    TEST(s6.front() == "dbo");
    TEST(s6.back() == "abc");
}

测试输出:

PASS: s0.size() == 1    
PASS: s0.front() == "adsf-+qwret-+nvfkbdsj" 
PASS: s1.size() == 3    
PASS: s1.front() == "adsf"  
PASS: s1.back() == "nvfkbdsj"   
PASS: s2.size() == 5    
PASS: s2.front() == ""  
PASS: s2.back() == ""   
PASS: s3.size() == 4    
PASS: s3.front() == ""  
PASS: s3.back() == "nvfkbdsj"   
PASS: s4.size() == 4    
PASS: s4.front() == "adsf"  
PASS: s4.back() == ""   
PASS: s5.size() == 2    
PASS: s5.front() == ""  
PASS: s5.back() == "abc"    
PASS: s6.size() == 2    
PASS: s6.front() == "dbo"   
PASS: s6.back() == "abc"