在c++中进行不区分大小写字符串比较而不将字符串转换为全大写或全小写的最佳方法是什么?

请指出这些方法是否对unicode友好,以及它们的可移植性如何。


当前回答

Boost::iequals在string的情况下不兼容utf-8。 你可以使用boost::locale。

comparator<char,collator_base::secondary> cmpr;
cout << (cmpr(str1, str2) ? "str1 < str2" : "str1 >= str2") << endl;

Primary -- ignore accents and character case, comparing base letters only. For example "facade" and "Façade" are the same. Secondary -- ignore character case but consider accents. "facade" and "façade" are different but "Façade" and "façade" are the same. Tertiary -- consider both case and accents: "Façade" and "façade" are different. Ignore punctuation. Quaternary -- consider all case, accents, and punctuation. The words must be identical in terms of Unicode representation. Identical -- as quaternary, but compare code points as well.

其他回答

str1.size() == str2.size() && std::equal(str1.begin(), str1.end(), str2.begin(), [](auto a, auto b){return std::tolower(a)==std::tolower(b);})

如果你不能使用boost,你可以在c++ 14中使用上面的代码。对于宽字符,必须使用std::towlower。

支持unicode的Visual c++字符串函数:http://msdn.microsoft.com/en-us/library/cc194799.aspx

您可能正在寻找的是_wcsnicmp

又短又漂亮。没有其他依赖,除了扩展的std C库。

strcasecmp(str1.c_str(), str2.c_str()) == 0

如果str1和str2相等,则返回true。 Strcasecmp可能不存在,可能有类似的stricmp, strcmpi等。

示例代码:

#include <iostream>
#include <string>
#include <string.h> //For strcasecmp(). Also could be found in <mem.h>

using namespace std;

/// Simple wrapper
inline bool str_ignoreCase_cmp(std::string const& s1, std::string const& s2) {
    if(s1.length() != s2.length())
        return false;  // optimization since std::string holds length in variable.
    return strcasecmp(s1.c_str(), s2.c_str()) == 0;
}

/// Function object - comparator
struct StringCaseInsensetiveCompare {
    bool operator()(std::string const& s1, std::string const& s2) {
        if(s1.length() != s2.length())
            return false;  // optimization since std::string holds length in variable.
        return strcasecmp(s1.c_str(), s2.c_str()) == 0;
    }
    bool operator()(const char *s1, const char * s2){ 
        return strcasecmp(s1,s2)==0;
    }
};


/// Convert bool to string
inline char const* bool2str(bool b){ return b?"true":"false"; }

int main()
{
    cout<< bool2str(strcasecmp("asd","AsD")==0) <<endl;
    cout<< bool2str(strcasecmp(string{"aasd"}.c_str(),string{"AasD"}.c_str())==0) <<endl;
    StringCaseInsensetiveCompare cmp;
    cout<< bool2str(cmp("A","a")) <<endl;
    cout<< bool2str(cmp(string{"Aaaa"},string{"aaaA"})) <<endl;
    cout<< bool2str(str_ignoreCase_cmp(string{"Aaaa"},string{"aaaA"})) <<endl;
    return 0;
}

输出:

true
true
true
true
true

如果你不想使用Boost库,那么这里有一个解决方案,它只使用c++标准io头。

#include <iostream>

struct iequal
{
    bool operator()(int c1, int c2) const
    {
        // case insensitive comparison of two characters.
        return std::toupper(c1) == std::toupper(c2);
    }
};

bool iequals(const std::string& str1, const std::string& str2)
{
    // use std::equal() to compare range of characters using the functor above.
    return std::equal(str1.begin(), str1.end(), str2.begin(), iequal());
}

int main(void)
{
    std::string str_1 = "HELLO";
    std::string str_2 = "hello";

    if(iequals(str_1,str_2))
    {
        std::cout<<"String are equal"<<std::endl;   
    }

    else
    {
        std::cout<<"String are not equal"<<std::endl;
    }


    return 0;
}

我写了一个不区分大小写的char_traits版本,用于std::basic_string,以便在使用内置的std::basic_string成员函数进行比较、搜索等时生成一个不区分大小写的std::string。

换句话说,我想这样做。

std::string a = "Hello, World!";
std::string b = "hello, world!";

assert( a == b );

...这是std::string不能处理的。下面是我的新char_traits的用法:

std::istring a = "Hello, World!";
std::istring b = "hello, world!";

assert( a == b );

...这是它的实现:

/*  ---

        Case-Insensitive char_traits for std::string's

        Use:

            To declare a std::string which preserves case but ignores case in comparisons & search,
            use the following syntax:

                std::basic_string<char, char_traits_nocase<char> > noCaseString;

            A typedef is declared below which simplifies this use for chars:

                typedef std::basic_string<char, char_traits_nocase<char> > istring;

    --- */

    template<class C>
    struct char_traits_nocase : public std::char_traits<C>
    {
        static bool eq( const C& c1, const C& c2 )
        { 
            return ::toupper(c1) == ::toupper(c2); 
        }

        static bool lt( const C& c1, const C& c2 )
        { 
            return ::toupper(c1) < ::toupper(c2);
        }

        static int compare( const C* s1, const C* s2, size_t N )
        {
            return _strnicmp(s1, s2, N);
        }

        static const char* find( const C* s, size_t N, const C& a )
        {
            for( size_t i=0 ; i<N ; ++i )
            {
                if( ::toupper(s[i]) == ::toupper(a) ) 
                    return s+i ;
            }
            return 0 ;
        }

        static bool eq_int_type( const int_type& c1, const int_type& c2 )
        { 
            return ::toupper(c1) == ::toupper(c2) ; 
        }       
    };

    template<>
    struct char_traits_nocase<wchar_t> : public std::char_traits<wchar_t>
    {
        static bool eq( const wchar_t& c1, const wchar_t& c2 )
        { 
            return ::towupper(c1) == ::towupper(c2); 
        }

        static bool lt( const wchar_t& c1, const wchar_t& c2 )
        { 
            return ::towupper(c1) < ::towupper(c2);
        }

        static int compare( const wchar_t* s1, const wchar_t* s2, size_t N )
        {
            return _wcsnicmp(s1, s2, N);
        }

        static const wchar_t* find( const wchar_t* s, size_t N, const wchar_t& a )
        {
            for( size_t i=0 ; i<N ; ++i )
            {
                if( ::towupper(s[i]) == ::towupper(a) ) 
                    return s+i ;
            }
            return 0 ;
        }

        static bool eq_int_type( const int_type& c1, const int_type& c2 )
        { 
            return ::towupper(c1) == ::towupper(c2) ; 
        }       
    };

    typedef std::basic_string<char, char_traits_nocase<char> > istring;
    typedef std::basic_string<wchar_t, char_traits_nocase<wchar_t> > iwstring;