如何在c++中转换大端值和小端值?

为了清晰起见,我必须将二进制数据(双精度浮点值以及32位和64位整数)从一个CPU架构转换到另一个CPU架构。这并不涉及网络,因此ntoh()和类似的函数在这里不能工作。


注意:我接受的答案直接适用于我的目标编译器(这就是我选择它的原因)。然而,这里还有其他非常好的、更方便的答案。


当前回答

如果一个大端位32位无符号整数看起来像0xAABBCCDD,它等于2864434397,那么同样的32位无符号整数在小端位处理器上看起来像0xDDCCBBAA,它也等于2864434397。

如果一个大端序16位无符号空头看起来像0xAABB,它等于43707,那么同一个16位无符号空头在小端序处理器上看起来像0xBBAA,它也等于43707。

这里有两个方便的#define函数,用于将字节从小端序转换为大端序,反之亦然——>

// can be used for short, unsigned short, word, unsigned word (2-byte types)
#define BYTESWAP16(n) (((n&0xFF00)>>8)|((n&0x00FF)<<8))

// can be used for int or unsigned int or float (4-byte types)
#define BYTESWAP32(n) ((BYTESWAP16((n&0xFFFF0000)>>16))|((BYTESWAP16(n&0x0000FFFF))<<16))

// can be used for unsigned long long or double (8-byte types)
#define BYTESWAP64(n) ((BYTESWAP32((n&0xFFFFFFFF00000000)>>32))|((BYTESWAP32(n&0x00000000FFFFFFFF))<<32))

其他回答

实现优化器友好的未对齐非就地末端访问器的可移植技术。它们处理每个编译器、每个边界对齐和每个字节排序。这些未对齐的例程被补充或讨论,取决于本机的端序和对齐方式。部分列出,但你懂的。BO*是基于本机字节排序的常数值。

uint32_t sw_get_uint32_1234(pu32)
uint32_1234 *pu32;
{
  union {
    uint32_1234 u32_1234;
    uint32_t u32;
  } bou32;
  bou32.u32_1234[0] = (*pu32)[BO32_0];
  bou32.u32_1234[1] = (*pu32)[BO32_1];
  bou32.u32_1234[2] = (*pu32)[BO32_2];
  bou32.u32_1234[3] = (*pu32)[BO32_3];
  return(bou32.u32);
}

void sw_set_uint32_1234(pu32, u32)
uint32_1234 *pu32;
uint32_t u32;
{
  union {
    uint32_1234 u32_1234;
    uint32_t u32;
  } bou32;
  bou32.u32 = u32;
  (*pu32)[BO32_0] = bou32.u32_1234[0];
  (*pu32)[BO32_1] = bou32.u32_1234[1];
  (*pu32)[BO32_2] = bou32.u32_1234[2];
  (*pu32)[BO32_3] = bou32.u32_1234[3];
}

#if HAS_SW_INT64
int64 sw_get_int64_12345678(pi64)
int64_12345678 *pi64;
{
  union {
    int64_12345678 i64_12345678;
    int64 i64;
  } boi64;
  boi64.i64_12345678[0] = (*pi64)[BO64_0];
  boi64.i64_12345678[1] = (*pi64)[BO64_1];
  boi64.i64_12345678[2] = (*pi64)[BO64_2];
  boi64.i64_12345678[3] = (*pi64)[BO64_3];
  boi64.i64_12345678[4] = (*pi64)[BO64_4];
  boi64.i64_12345678[5] = (*pi64)[BO64_5];
  boi64.i64_12345678[6] = (*pi64)[BO64_6];
  boi64.i64_12345678[7] = (*pi64)[BO64_7];
  return(boi64.i64);
}
#endif

int32_t sw_get_int32_3412(pi32)
int32_3412 *pi32;
{
  union {
    int32_3412 i32_3412;
    int32_t i32;
  } boi32;
  boi32.i32_3412[2] = (*pi32)[BO32_0];
  boi32.i32_3412[3] = (*pi32)[BO32_1];
  boi32.i32_3412[0] = (*pi32)[BO32_2];
  boi32.i32_3412[1] = (*pi32)[BO32_3];
  return(boi32.i32);
}

void sw_set_int32_3412(pi32, i32)
int32_3412 *pi32;
int32_t i32;
{
  union {
    int32_3412 i32_3412;
    int32_t i32;
  } boi32;
  boi32.i32 = i32;
  (*pi32)[BO32_0] = boi32.i32_3412[2];
  (*pi32)[BO32_1] = boi32.i32_3412[3];
  (*pi32)[BO32_2] = boi32.i32_3412[0];
  (*pi32)[BO32_3] = boi32.i32_3412[1];
}

uint32_t sw_get_uint32_3412(pu32)
uint32_3412 *pu32;
{
  union {
    uint32_3412 u32_3412;
    uint32_t u32;
  } bou32;
  bou32.u32_3412[2] = (*pu32)[BO32_0];
  bou32.u32_3412[3] = (*pu32)[BO32_1];
  bou32.u32_3412[0] = (*pu32)[BO32_2];
  bou32.u32_3412[1] = (*pu32)[BO32_3];
  return(bou32.u32);
}

void sw_set_uint32_3412(pu32, u32)
uint32_3412 *pu32;
uint32_t u32;
{
  union {
    uint32_3412 u32_3412;
    uint32_t u32;
  } bou32;
  bou32.u32 = u32;
  (*pu32)[BO32_0] = bou32.u32_3412[2];
  (*pu32)[BO32_1] = bou32.u32_3412[3];
  (*pu32)[BO32_2] = bou32.u32_3412[0];
  (*pu32)[BO32_3] = bou32.u32_3412[1];
}

float sw_get_float_1234(pf)
float_1234 *pf;
{
  union {
    float_1234 f_1234;
    float f;
  } bof;
  bof.f_1234[0] = (*pf)[BO32_0];
  bof.f_1234[1] = (*pf)[BO32_1];
  bof.f_1234[2] = (*pf)[BO32_2];
  bof.f_1234[3] = (*pf)[BO32_3];
  return(bof.f);
}

void sw_set_float_1234(pf, f)
float_1234 *pf;
float f;
{
  union {
    float_1234 f_1234;
    float f;
  } bof;
  bof.f = (float)f;
  (*pf)[BO32_0] = bof.f_1234[0];
  (*pf)[BO32_1] = bof.f_1234[1];
  (*pf)[BO32_2] = bof.f_1234[2];
  (*pf)[BO32_3] = bof.f_1234[3];
}

double sw_get_double_12345678(pd)
double_12345678 *pd;
{
  union {
    double_12345678 d_12345678;
    double d;
  } bod;
  bod.d_12345678[0] = (*pd)[BO64_0];
  bod.d_12345678[1] = (*pd)[BO64_1];
  bod.d_12345678[2] = (*pd)[BO64_2];
  bod.d_12345678[3] = (*pd)[BO64_3];
  bod.d_12345678[4] = (*pd)[BO64_4];
  bod.d_12345678[5] = (*pd)[BO64_5];
  bod.d_12345678[6] = (*pd)[BO64_6];
  bod.d_12345678[7] = (*pd)[BO64_7];
  return(bod.d);
}

void sw_set_double_12345678(pd, d)
double_12345678 *pd;
double d;
{
  union {
    double_12345678 d_12345678;
    double d;
  } bod;
  bod.d = d;
  (*pd)[BO64_0] = bod.d_12345678[0];
  (*pd)[BO64_1] = bod.d_12345678[1];
  (*pd)[BO64_2] = bod.d_12345678[2];
  (*pd)[BO64_3] = bod.d_12345678[3];
  (*pd)[BO64_4] = bod.d_12345678[4];
  (*pd)[BO64_5] = bod.d_12345678[5];
  (*pd)[BO64_6] = bod.d_12345678[6];
  (*pd)[BO64_7] = bod.d_12345678[7];
}

如果不与访问器一起使用,这些类型def的好处是会引发编译器错误,从而减少被遗忘的访问器错误。

typedef char int8_1[1], uint8_1[1];

typedef char int16_12[2], uint16_12[2]; /* little endian */
typedef char int16_21[2], uint16_21[2]; /* big endian */

typedef char int24_321[3], uint24_321[3]; /* Alpha Micro, PDP-11 */

typedef char int32_1234[4], uint32_1234[4]; /* little endian */
typedef char int32_3412[4], uint32_3412[4]; /* Alpha Micro, PDP-11 */
typedef char int32_4321[4], uint32_4321[4]; /* big endian */

typedef char int64_12345678[8], uint64_12345678[8]; /* little endian */
typedef char int64_34128756[8], uint64_34128756[8]; /* Alpha Micro, PDP-11 */
typedef char int64_87654321[8], uint64_87654321[8]; /* big endian */

typedef char float_1234[4]; /* little endian */
typedef char float_3412[4]; /* Alpha Micro, PDP-11 */
typedef char float_4321[4]; /* big endian */

typedef char double_12345678[8]; /* little endian */
typedef char double_78563412[8]; /* Alpha Micro? */
typedef char double_87654321[8]; /* big endian */

如果您采用反转单词中位序的常见模式,并剔除每个字节中反转位的部分,那么您将只剩下反转单词中的字节的部分。对于64位:

x = ((x & 0x00000000ffffffff) << 32) ^ ((x >> 32) & 0x00000000ffffffff);
x = ((x & 0x0000ffff0000ffff) << 16) ^ ((x >> 16) & 0x0000ffff0000ffff);
x = ((x & 0x00ff00ff00ff00ff) <<  8) ^ ((x >>  8) & 0x00ff00ff00ff00ff);

编译器应该清除多余的位屏蔽操作(我把它们留在了突出显示模式),但如果它没有,你可以这样重写第一行:

x = ( x                       << 32) ^  (x >> 32);

在大多数架构上,这通常应该简化为一条旋转指令(忽略整个操作可能是一条指令)。

在RISC处理器上,大而复杂的常量可能会导致编译困难。不过,您可以简单地计算前一个的每个常数。像这样:

uint64_t k = 0x00000000ffffffff; /* compiler should know a trick for this */
x = ((x & k) << 32) ^ ((x >> 32) & k);
k ^= k << 16;
x = ((x & k) << 16) ^ ((x >> 16) & k);
k ^= k << 8;
x = ((x & k) <<  8) ^ ((x >>  8) & k);

如果你愿意,你可以把它写成一个循环。这样做效率不高,只是为了好玩:

int i = sizeof(x) * CHAR_BIT / 2;
uintmax_t k = (1 << i) - 1;
while (i >= 8)
{
    x = ((x & k) << i) ^ ((x >> i) & k);
    i >>= 1;
    k ^= k << i;
}

为了完整起见,这里是第一种形式的简化32位版本:

x = ( x               << 16) ^  (x >> 16);
x = ((x & 0x00ff00ff) <<  8) ^ ((x >>  8) & 0x00ff00ff);

我只是想在这里添加我自己的解,因为我在任何地方都没有看到它。它是一个小而可移植的c++模板函数,并且只使用比特操作。

template<typename T> inline static T swapByteOrder(const T& val) {
    int totalBytes = sizeof(val);
    T swapped = (T) 0;
    for (int i = 0; i < totalBytes; ++i) {
        swapped |= (val >> (8*(totalBytes-i-1)) & 0xFF) << (8*i);
    }
    return swapped;
}

我们已经用模板做到了这一点。你可以这样做:

// Specialization for 2-byte types.
template<>
inline void endian_byte_swapper< 2 >(char* dest, char const* src)
{
    // Use bit manipulations instead of accessing individual bytes from memory, much faster.
    ushort* p_dest = reinterpret_cast< ushort* >(dest);
    ushort const* const p_src = reinterpret_cast< ushort const* >(src);
    *p_dest = (*p_src >> 8) | (*p_src << 8);
}

// Specialization for 4-byte types.
template<>
inline void endian_byte_swapper< 4 >(char* dest, char const* src)
{
    // Use bit manipulations instead of accessing individual bytes from memory, much faster.
    uint* p_dest = reinterpret_cast< uint* >(dest);
    uint const* const p_src = reinterpret_cast< uint const* >(src);
    *p_dest = (*p_src >> 24) | ((*p_src & 0x00ff0000) >> 8) | ((*p_src & 0x0000ff00) << 8) | (*p_src << 24);
}

我从这篇文章中得到了一些建议,并把它们放在一起形成了这个:

#include <boost/type_traits.hpp>
#include <boost/static_assert.hpp>
#include <boost/detail/endian.hpp>
#include <stdexcept>
#include <cstdint>

enum endianness
{
    little_endian,
    big_endian,
    network_endian = big_endian,
    
    #if defined(BOOST_LITTLE_ENDIAN)
        host_endian = little_endian
    #elif defined(BOOST_BIG_ENDIAN)
        host_endian = big_endian
    #else
        #error "unable to determine system endianness"
    #endif
};

namespace detail {

template<typename T, size_t sz>
struct swap_bytes
{
    inline T operator()(T val)
    {
        throw std::out_of_range("data size");
    }
};

template<typename T>
struct swap_bytes<T, 1>
{
    inline T operator()(T val)
    {
        return val;
    }
};

template<typename T>
struct swap_bytes<T, 2>
{
    inline T operator()(T val)
    {
        return ((((val) >> 8) & 0xff) | (((val) & 0xff) << 8));
    }
};

template<typename T>
struct swap_bytes<T, 4>
{
    inline T operator()(T val)
    {
        return ((((val) & 0xff000000) >> 24) |
                (((val) & 0x00ff0000) >>  8) |
                (((val) & 0x0000ff00) <<  8) |
                (((val) & 0x000000ff) << 24));
    }
};

template<>
struct swap_bytes<float, 4>
{
    inline float operator()(float val)
    {
        uint32_t mem =swap_bytes<uint32_t, sizeof(uint32_t)>()(*(uint32_t*)&val);
        return *(float*)&mem;
    }
};

template<typename T>
struct swap_bytes<T, 8>
{
    inline T operator()(T val)
    {
        return ((((val) & 0xff00000000000000ull) >> 56) |
                (((val) & 0x00ff000000000000ull) >> 40) |
                (((val) & 0x0000ff0000000000ull) >> 24) |
                (((val) & 0x000000ff00000000ull) >> 8 ) |
                (((val) & 0x00000000ff000000ull) << 8 ) |
                (((val) & 0x0000000000ff0000ull) << 24) |
                (((val) & 0x000000000000ff00ull) << 40) |
                (((val) & 0x00000000000000ffull) << 56));
    }
};

template<>
struct swap_bytes<double, 8>
{
    inline double operator()(double val)
    {
        uint64_t mem =swap_bytes<uint64_t, sizeof(uint64_t)>()(*(uint64_t*)&val);
        return *(double*)&mem;
    }
};

template<endianness from, endianness to, class T>
struct do_byte_swap
{
    inline T operator()(T value)
    {
        return swap_bytes<T, sizeof(T)>()(value);
    }
};
// specialisations when attempting to swap to the same endianess
template<class T> struct do_byte_swap<little_endian, little_endian, T> { inline T operator()(T value) { return value; } };
template<class T> struct do_byte_swap<big_endian,    big_endian,    T> { inline T operator()(T value) { return value; } };

} // namespace detail

template<endianness from, endianness to, class T>
inline T byte_swap(T value)
{
    // ensure the data is only 1, 2, 4 or 8 bytes
    BOOST_STATIC_ASSERT(sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 || sizeof(T) == 8);
    // ensure we're only swapping arithmetic types
    BOOST_STATIC_ASSERT(boost::is_arithmetic<T>::value);

    return detail::do_byte_swap<from, to, T>()(value);
}

然后你可以这样使用它:

// swaps val from host-byte-order to network-byte-order
auto swapped = byte_swap<host_endian, network_endian>(val);

反之亦然

// swap a value received from the network into host-byte-order
auto val = byte_swap<network_endian, host_endian>(val_from_network);