Java有一个方便的分割方法:
String str = "The quick brown fox";
String[] results = str.split(" ");
在c++中有简单的方法来做到这一点吗?
Java有一个方便的分割方法:
String str = "The quick brown fox";
String[] results = str.split(" ");
在c++中有简单的方法来做到这一点吗?
当前回答
使用regex_token_iterators的解决方案:
#include <iostream>
#include <regex>
#include <string>
using namespace std;
int main()
{
string str("The quick brown fox");
regex reg("\\s+");
sregex_token_iterator iter(str.begin(), str.end(), reg, -1);
sregex_token_iterator end;
vector<string> vec(iter, end);
for (auto a : vec)
{
cout << a << endl;
}
}
其他回答
我只是看了所有的答案,找不到下一个前提条件的解决方案:
没有动态内存分配 不使用boost 不使用正则表达式 c++17标准
这就是我的解
#include <iomanip>
#include <iostream>
#include <iterator>
#include <string_view>
#include <utility>
struct split_by_spaces
{
std::string_view text;
static constexpr char delim = ' ';
struct iterator
{
const std::string_view& text;
std::size_t cur_pos;
std::size_t end_pos;
std::string_view operator*() const
{
return { &text[cur_pos], end_pos - cur_pos };
}
bool operator==(const iterator& other) const
{
return cur_pos == other.cur_pos && end_pos == other.end_pos;
}
bool operator!=(const iterator& other) const
{
return !(*this == other);
}
iterator& operator++()
{
cur_pos = text.find_first_not_of(delim, end_pos);
if (cur_pos == std::string_view::npos)
{
cur_pos = text.size();
end_pos = cur_pos;
return *this;
}
end_pos = text.find(delim, cur_pos);
if (cur_pos == std::string_view::npos)
{
end_pos = text.size();
}
return *this;
}
};
[[nodiscard]] iterator begin() const
{
auto start = text.find_first_not_of(delim);
if (start == std::string_view::npos)
{
return iterator{ text, text.size(), text.size() };
}
auto end_word = text.find(delim, start);
if (end_word == std::string_view::npos)
{
end_word = text.size();
}
return iterator{ text, start, end_word };
}
[[nodiscard]] iterator end() const
{
return iterator{ text, text.size(), text.size() };
}
};
int main(int argc, char** argv)
{
using namespace std::literals;
auto str = " there should be no memory allocation during parsing"
" into words this line and you should'n create any"
" contaner for intermediate words "sv;
auto comma = "";
for (std::string_view word : split_by_spaces{ str })
{
std::cout << std::exchange(comma, ",") << std::quoted(word);
}
auto only_spaces = " "sv;
for (std::string_view word : split_by_spaces{ only_spaces })
{
std::cout << "you will not see this line in output" << std::endl;
}
}
MFC/ATL有一个非常好的标记器。从MSDN:
CAtlString str( "%First Second#Third" );
CAtlString resToken;
int curPos= 0;
resToken= str.Tokenize("% #",curPos);
while (resToken != "")
{
printf("Resulting token: %s\n", resToken);
resToken= str.Tokenize("% #",curPos);
};
Output
Resulting Token: First
Resulting Token: Second
Resulting Token: Third
这是一个简单的stl解决方案(~5行!)使用std::find和std::find_first_not_of来处理重复的分隔符(例如空格或句号),以及开头和结尾的分隔符:
#include <string>
#include <vector>
void tokenize(std::string str, std::vector<string> &token_v){
size_t start = str.find_first_not_of(DELIMITER), end=start;
while (start != std::string::npos){
// Find next occurence of delimiter
end = str.find(DELIMITER, start);
// Push back the token found into vector
token_v.push_back(str.substr(start, end-start));
// Skip all occurences of the delimiter to find new start
start = str.find_first_not_of(DELIMITER, end);
}
}
现场试试吧!
我认为这就是字符串流上的>>操作符的用途:
string word; sin >> word;
我以前只用标准库做了一个lexer/tokenizer。代码如下:
#include <iostream>
#include <string>
#include <vector>
#include <sstream>
using namespace std;
string seps(string& s) {
if (!s.size()) return "";
stringstream ss;
ss << s[0];
for (int i = 1; i < s.size(); i++) {
ss << '|' << s[i];
}
return ss.str();
}
void Tokenize(string& str, vector<string>& tokens, const string& delimiters = " ")
{
seps(str);
// Skip delimiters at beginning.
string::size_type lastPos = str.find_first_not_of(delimiters, 0);
// Find first "non-delimiter".
string::size_type pos = str.find_first_of(delimiters, lastPos);
while (string::npos != pos || string::npos != lastPos)
{
// Found a token, add it to the vector.
tokens.push_back(str.substr(lastPos, pos - lastPos));
// Skip delimiters. Note the "not_of"
lastPos = str.find_first_not_of(delimiters, pos);
// Find next "non-delimiter"
pos = str.find_first_of(delimiters, lastPos);
}
}
int main(int argc, char *argv[])
{
vector<string> t;
string s = "Tokens for everyone!";
Tokenize(s, t, "|");
for (auto c : t)
cout << c << endl;
system("pause");
return 0;
}