如何在c++中转换大端值和小端值?
为了清晰起见,我必须将二进制数据(双精度浮点值以及32位和64位整数)从一个CPU架构转换到另一个CPU架构。这并不涉及网络,因此ntoh()和类似的函数在这里不能工作。
注意:我接受的答案直接适用于我的目标编译器(这就是我选择它的原因)。然而,这里还有其他非常好的、更方便的答案。
如何在c++中转换大端值和小端值?
为了清晰起见,我必须将二进制数据(双精度浮点值以及32位和64位整数)从一个CPU架构转换到另一个CPU架构。这并不涉及网络,因此ntoh()和类似的函数在这里不能工作。
注意:我接受的答案直接适用于我的目标编译器(这就是我选择它的原因)。然而,这里还有其他非常好的、更方便的答案。
当前回答
我从这篇文章中得到了一些建议,并把它们放在一起形成了这个:
#include <boost/type_traits.hpp>
#include <boost/static_assert.hpp>
#include <boost/detail/endian.hpp>
#include <stdexcept>
#include <cstdint>
enum endianness
{
little_endian,
big_endian,
network_endian = big_endian,
#if defined(BOOST_LITTLE_ENDIAN)
host_endian = little_endian
#elif defined(BOOST_BIG_ENDIAN)
host_endian = big_endian
#else
#error "unable to determine system endianness"
#endif
};
namespace detail {
template<typename T, size_t sz>
struct swap_bytes
{
inline T operator()(T val)
{
throw std::out_of_range("data size");
}
};
template<typename T>
struct swap_bytes<T, 1>
{
inline T operator()(T val)
{
return val;
}
};
template<typename T>
struct swap_bytes<T, 2>
{
inline T operator()(T val)
{
return ((((val) >> 8) & 0xff) | (((val) & 0xff) << 8));
}
};
template<typename T>
struct swap_bytes<T, 4>
{
inline T operator()(T val)
{
return ((((val) & 0xff000000) >> 24) |
(((val) & 0x00ff0000) >> 8) |
(((val) & 0x0000ff00) << 8) |
(((val) & 0x000000ff) << 24));
}
};
template<>
struct swap_bytes<float, 4>
{
inline float operator()(float val)
{
uint32_t mem =swap_bytes<uint32_t, sizeof(uint32_t)>()(*(uint32_t*)&val);
return *(float*)&mem;
}
};
template<typename T>
struct swap_bytes<T, 8>
{
inline T operator()(T val)
{
return ((((val) & 0xff00000000000000ull) >> 56) |
(((val) & 0x00ff000000000000ull) >> 40) |
(((val) & 0x0000ff0000000000ull) >> 24) |
(((val) & 0x000000ff00000000ull) >> 8 ) |
(((val) & 0x00000000ff000000ull) << 8 ) |
(((val) & 0x0000000000ff0000ull) << 24) |
(((val) & 0x000000000000ff00ull) << 40) |
(((val) & 0x00000000000000ffull) << 56));
}
};
template<>
struct swap_bytes<double, 8>
{
inline double operator()(double val)
{
uint64_t mem =swap_bytes<uint64_t, sizeof(uint64_t)>()(*(uint64_t*)&val);
return *(double*)&mem;
}
};
template<endianness from, endianness to, class T>
struct do_byte_swap
{
inline T operator()(T value)
{
return swap_bytes<T, sizeof(T)>()(value);
}
};
// specialisations when attempting to swap to the same endianess
template<class T> struct do_byte_swap<little_endian, little_endian, T> { inline T operator()(T value) { return value; } };
template<class T> struct do_byte_swap<big_endian, big_endian, T> { inline T operator()(T value) { return value; } };
} // namespace detail
template<endianness from, endianness to, class T>
inline T byte_swap(T value)
{
// ensure the data is only 1, 2, 4 or 8 bytes
BOOST_STATIC_ASSERT(sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 || sizeof(T) == 8);
// ensure we're only swapping arithmetic types
BOOST_STATIC_ASSERT(boost::is_arithmetic<T>::value);
return detail::do_byte_swap<from, to, T>()(value);
}
然后你可以这样使用它:
// swaps val from host-byte-order to network-byte-order
auto swapped = byte_swap<host_endian, network_endian>(val);
反之亦然
// swap a value received from the network into host-byte-order
auto val = byte_swap<network_endian, host_endian>(val_from_network);
其他回答
有一个叫做BSWAP的汇编指令可以帮你做交换,非常快。 你可以在这里阅读。
Visual Studio,或者更准确地说是Visual c++运行时库,为此提供了平台intrinsic,称为_byteswap_ushort()、_byteswap_ulong()和_byteswap_int64()。其他平台应该也有类似的情况,但我不知道它们会被称为什么。
c++20无分支版本,现在std::endian已经存在,但在c++23之前增加了std::byteswap
#include <bit>
#include <type_traits>
#include <concepts>
#include <array>
#include <cstring>
#include <iostream>
#include <bitset>
template <int LEN, int OFF=LEN/2>
class do_swap
{
// FOR 8 bytes:
// LEN=8 (LEN/2==4) <H><G><F><E><D><C><B><A>
// OFF=4: FROM=0, TO=7 => [A]<G><F><E><D><C><B>[H]
// OFF=3: FROM=1, TO=6 => [A][B]<F><E><D><C>[G][H]
// OFF=2: FROM=2, TO=5 => [A][B][C]<E><D>[F][G][H]
// OFF=1: FROM=3, TO=4 => [A][B][C][D][E][F][G][H]
// OFF=0: FROM=4, TO=3 => DONE
public:
enum consts {FROM=LEN/2-OFF, TO=(LEN-1)-FROM};
using NXT=do_swap<LEN, OFF-1>;
// flip the first and last for the current iteration's range
static void flip(std::array<std::byte, LEN>& b)
{
std::byte tmp=b[FROM];
b[FROM]=b[TO];
b[TO]=tmp;
NXT::flip(b);
}
};
template <int LEN>
class do_swap<LEN, 0> // STOP the template recursion
{
public:
static void flip(std::array<std::byte, LEN>&)
{
}
};
template<std::integral T, std::endian TO, std::endian FROM=std::endian::native>
requires ((TO==std::endian::big) || (TO==std::endian::little))
&& ((FROM==std::endian::big) || (FROM==std::endian::little))
class endian_swap
{
public:
enum consts {BYTE_COUNT=sizeof(T)};
static T cvt(const T integral)
{
// if FROM and TO are the same -- nothing to do
if (TO==FROM)
{
return integral;
}
// endian::big --> endian::little is the same as endian::little --> endian::big
// the bytes have to be reversed
// memcpy seems to be the most supported way to do byte swaps in a defined way
std::array<std::byte, BYTE_COUNT> bytes;
std::memcpy(&bytes, &integral, BYTE_COUNT);
do_swap<BYTE_COUNT>::flip(bytes);
T ret;
std::memcpy(&ret, &bytes, BYTE_COUNT);
return ret;
}
};
std::endian big()
{
return std::endian::big;
}
std::endian little()
{
return std::endian::little;
}
std::endian native()
{
return std::endian::native;
}
long long swap_to_big(long long x)
{
return endian_swap<long long, std::endian::big>::cvt(x);
}
long long swap_to_little(long long x)
{
return endian_swap<long long, std::endian::little>::cvt(x);
}
void show(std::string label, long long x)
{
std::cout << label << "\t: " << std::bitset<64>(x) << " (" << x << ")" << std::endl;
}
int main(int argv, char ** argc)
{
long long init=0xF8FCFEFF7F3F1F0;
long long to_big=swap_to_big(init);
long long to_little=swap_to_little(init);
show("Init", init);
show(">big", to_big);
show(">little", to_little);
}
我们已经用模板做到了这一点。你可以这样做:
// Specialization for 2-byte types.
template<>
inline void endian_byte_swapper< 2 >(char* dest, char const* src)
{
// Use bit manipulations instead of accessing individual bytes from memory, much faster.
ushort* p_dest = reinterpret_cast< ushort* >(dest);
ushort const* const p_src = reinterpret_cast< ushort const* >(src);
*p_dest = (*p_src >> 8) | (*p_src << 8);
}
// Specialization for 4-byte types.
template<>
inline void endian_byte_swapper< 4 >(char* dest, char const* src)
{
// Use bit manipulations instead of accessing individual bytes from memory, much faster.
uint* p_dest = reinterpret_cast< uint* >(dest);
uint const* const p_src = reinterpret_cast< uint const* >(src);
*p_dest = (*p_src >> 24) | ((*p_src & 0x00ff0000) >> 8) | ((*p_src & 0x0000ff00) << 8) | (*p_src << 24);
}
如果一个大端位32位无符号整数看起来像0xAABBCCDD,它等于2864434397,那么同样的32位无符号整数在小端位处理器上看起来像0xDDCCBBAA,它也等于2864434397。
如果一个大端序16位无符号空头看起来像0xAABB,它等于43707,那么同一个16位无符号空头在小端序处理器上看起来像0xBBAA,它也等于43707。
这里有两个方便的#define函数,用于将字节从小端序转换为大端序,反之亦然——>
// can be used for short, unsigned short, word, unsigned word (2-byte types)
#define BYTESWAP16(n) (((n&0xFF00)>>8)|((n&0x00FF)<<8))
// can be used for int or unsigned int or float (4-byte types)
#define BYTESWAP32(n) ((BYTESWAP16((n&0xFFFF0000)>>16))|((BYTESWAP16(n&0x0000FFFF))<<16))
// can be used for unsigned long long or double (8-byte types)
#define BYTESWAP64(n) ((BYTESWAP32((n&0xFFFFFFFF00000000)>>32))|((BYTESWAP32(n&0x00000000FFFFFFFF))<<32))
来这里寻找一个Boost解决方案,失望地离开,但最终在其他地方找到了它。你可以使用boost::endian::endian_reverse。它被模板化/重载了所有的基元类型:
#include <iostream>
#include <iomanip>
#include "boost/endian/conversion.hpp"
int main()
{
uint32_t word = 0x01;
std::cout << std::hex << std::setfill('0') << std::setw(8) << word << std::endl;
// outputs 00000001;
uint32_t word2 = boost::endian::endian_reverse(word);
// there's also a `void ::endian_reverse_inplace(...) function
// that reverses the value passed to it in place and returns nothing
std::cout << std::hex << std::setfill('0') << std::setw(8) << word2 << std::endl;
// outputs 01000000
return 0;
}
示范
虽然,看起来c++23最终用std::byteswap解决了这个问题。(我使用的是c++17,所以这不是一个选项。)