C++ vector、unordered_set和稀疏集的增删遍历性能对比

C++源码


// 优化版: std::vector vs 开放寻址哈希(二次探测+批量API) vs std::unordered_set

#include <cstdint>
#include <vector>
#include <algorithm>
#include <random>
#include <chrono>
#include <iostream>
#include <iomanip>
#include <unordered_set>
#include <numeric>
#include <cstring>

#ifdef __GNUC__
#define ALWAYS_INLINE __attribute__((always_inline))
#define PURE __attribute__((pure))
#define LIKELY(x) __builtin_expect(!!(x), 1)
#define UNLIKELY(x) __builtin_expect(!!(x), 0)
#else
#define ALWAYS_INLINE
#define PURE
#define LIKELY(x) (x)
#define UNLIKELY(x) (x)
#endif

using player_id_t = uint64_t;
using namespace std::chrono;

// ==================== 方案1: std::vector ====================
class VectorSet {
    std::vector<player_id_t> data_;
public:
    explicit VectorSet(size_t reserve = 1000) { data_.reserve(reserve); }
    
    ALWAYS_INLINE void insert(player_id_t id) { data_.push_back(id); }
    
    ALWAYS_INLINE bool erase(player_id_t id) {
        auto it = std::find(data_.begin(), data_.end(), id);
        if (it == data_.end()) return false;
        *it = data_.back();
        data_.pop_back();
        return true;
    }
    
    template<typename Fn>
    ALWAYS_INLINE void foreach(Fn&& fn) const { 
        for (auto id : data_) fn(id); 
    }
    
    [[nodiscard]] size_t size() const { return data_.size(); }
    void clear() { data_.clear(); }
    
    // 批量操作
    void insert_batch(const player_id_t* ids, size_t count) {
        data_.reserve(data_.size() + count);
        for (size_t i = 0; i < count; ++i) {
            data_.push_back(ids[i]);
        }
    }
};

// ==================== 方案2: 开放寻址哈希 (优化版) ====================
class HashSparseSet {
    struct Node { 
        player_id_t id = 0;      // 0 表示空槽
        uint32_t dense_idx = 0;  // 在 dense 数组中的索引
    };
    
    std::vector<Node> nodes_;           // 哈希表（开放寻址）
    std::vector<player_id_t> dense_;    // 密集存储，用于快速遍历
    std::vector<uint32_t> slot_of_;     // dense -> slot 反向映射（用于快速删除）
    uint32_t mask_ = 0;                 // 容量掩码（2^n - 1）
    uint32_t size_ = 0;                 // 当前元素数量
    
    // 二次探测序列: 0, 1, 3, 6, 10... (三角形数)
    // 公式: i * (i + 1) / 2
    static ALWAYS_INLINE uint32_t probe_offset(uint32_t i) {
        return (i * (i + 1)) >> 1;
    }
    
    // MurmurHash3 风格的 64位哈希（更好的分布）
    PURE static uint64_t hash(player_id_t x) {
        // 混合位，减少连续ID的碰撞
        x ^= x >> 33;
        x *= 0xff51afd7ed558ccdULL;
        x ^= x >> 33;
        x *= 0xc4ceb9fe1a85ec53ULL;
        x ^= x >> 33;
        return x;
    }
    
    void grow() {
        uint32_t old_cap = static_cast<uint32_t>(nodes_.size());
        uint32_t new_cap = old_cap ? old_cap << 1 : 16;
        
        // 保存旧数据
        std::vector<Node> old_nodes = std::move(nodes_);
        std::vector<player_id_t> old_dense = std::move(dense_);
        
        // 扩容
        nodes_.assign(new_cap, {});
        mask_ = new_cap - 1;
        dense_.clear();
        dense_.reserve(new_cap >> 1);  // 负载因子 0.5
        slot_of_.clear();
        slot_of_.reserve(new_cap >> 1);
        size_ = 0;
        
        // 重新插入
        for (auto id : old_dense) {
            if (id != 0) insert(id);
        }
    }
    
    // 查找槽位：返回找到的槽位或空槽
    // found 输出参数表示是否找到目标ID
    ALWAYS_INLINE uint32_t find_slot(player_id_t id, bool& found) const {
        found = false;
        if (UNLIKELY(nodes_.empty())) return 0;
        
        const uint32_t start = static_cast<uint32_t>(hash(id)) & mask_;
        
        for (uint32_t i = 0; i <= mask_; ++i) {
            const uint32_t slot = (start + probe_offset(i)) & mask_;
            const player_id_t slot_id = nodes_[slot].id;
            
            if (UNLIKELY(slot_id == 0)) {
                return slot;  // 空槽，插入位置
            }
            if (slot_id == id) {
                found = true;
                return slot;  // 找到目标
            }
        }
        return UINT32_MAX;  // 表满（理论上不发生）
    }

public:
    explicit HashSparseSet(size_t reserve = 1000) {
        uint32_t cap = 16;
        while (cap < reserve * 2) cap <<= 1;  // 负载因子 0.5
        nodes_.assign(cap, {});
        mask_ = cap - 1;
        dense_.reserve(reserve);
        slot_of_.reserve(reserve);
    }
    
    // 单次插入
    ALWAYS_INLINE void insert(player_id_t id) {
        if (UNLIKELY(id == 0)) return;  // 0 保留为空槽标记
        
        bool found;
        uint32_t slot = find_slot(id, found);
        
        if (UNLIKELY(slot == UINT32_MAX)) {
            grow();
            slot = find_slot(id, found);
        }
        
        if (found) return;  // 已存在
        
        // 新元素
        uint32_t idx = static_cast<uint32_t>(dense_.size());
        nodes_[slot] = {id, idx};
        dense_.push_back(id);
        slot_of_.push_back(slot);
        ++size_;
    }
    
    // 批量插入（优化版）- 预分配 + 减少重复计算
    void insert_batch(const player_id_t* ids, size_t count) {
        // 预检查容量
        if (dense_.size() + count > (nodes_.size() >> 1)) {
            uint32_t need = static_cast<uint32_t>(dense_.size() + count);
            uint32_t new_cap = static_cast<uint32_t>(nodes_.size());
            while ((new_cap >> 1) < need) new_cap <<= 1;
            
            // 重建表
            std::vector<Node> old_nodes = std::move(nodes_);
            std::vector<player_id_t> old_dense = std::move(dense_);
            
            nodes_.assign(new_cap, {});
            mask_ = new_cap - 1;
            dense_.clear();
            dense_.reserve(need);
            slot_of_.clear();
            slot_of_.reserve(need);
            size_ = 0;
            
            // 先插入旧元素
            for (auto id : old_dense) {
                if (id != 0) insert(id);
            }
        }
        
        // 批量插入新元素
        for (size_t i = 0; i < count; ++i) {
            insert(ids[i]);
        }
    }
    
    ALWAYS_INLINE bool erase(player_id_t id) {
        bool found;
        uint32_t slot = find_slot(id, found);
        if (!found) return false;
        
        uint32_t idx = nodes_[slot].dense_idx;
        uint32_t last_idx = static_cast<uint32_t>(dense_.size()) - 1;
        
        // 标记为空槽
        nodes_[slot].id = 0;
        
        if (idx != last_idx) {
            // 用末尾元素填充空洞
            player_id_t last_id = dense_.back();
            uint32_t last_slot = slot_of_[last_idx];
            
            dense_[idx] = last_id;
            slot_of_[idx] = last_slot;
            nodes_[last_slot].dense_idx = idx;
        }
        
        dense_.pop_back();
        slot_of_.pop_back();
        --size_;
        return true;
    }
    
    // 批量删除（优化：先标记再清理，或逐个处理）
    void erase_batch(const player_id_t* ids, size_t count) {
        // 简单实现：逐个删除
        // 对于大量删除，可以优化为：标记删除 -> 重建 dense 数组
        for (size_t i = 0; i < count; ++i) {
            erase(ids[i]);
        }
    }
    
    template<typename Fn>
    ALWAYS_INLINE void foreach(Fn&& fn) const { 
        // 预取优化
        for (size_t i = 0; i < dense_.size(); ++i) {
            if (i + 4 < dense_.size()) {
                __builtin_prefetch(&dense_[i + 4], 0, 3);
            }
            fn(dense_[i]); 
        } 
    }
    
    [[nodiscard]] size_t size() const { return size_; }
    [[nodiscard]] bool empty() const { return size_ == 0; }
    
    void clear() { 
        // 快速清空：只重置使用的槽位
        for (auto id : dense_) {
            if (id != 0) {
                bool found;
                uint32_t slot = find_slot(id, found);
                if (found) nodes_[slot].id = 0;
            }
        }
        dense_.clear();
        slot_of_.clear();
        size_ = 0;
    }
    
    // 预分配接口
    void reserve(size_t n) {
        if (n > (nodes_.size() >> 1)) {
            uint32_t need = static_cast<uint32_t>(n);
            uint32_t new_cap = 16;
            while ((new_cap >> 1) < need) new_cap <<= 1;
            
            nodes_.assign(new_cap, {});
            mask_ = new_cap - 1;
            dense_.reserve(n);
            slot_of_.reserve(n);
        }
    }
};

// ==================== 方案3: std::unordered_set (优化版) ====================
class StdHashSet {
    std::unordered_set<player_id_t> set_;
public:
    explicit StdHashSet(size_t reserve = 1000) { 
        set_.reserve(reserve * 2);  // 考虑负载因子
    }
    
    ALWAYS_INLINE void insert(player_id_t id) { set_.insert(id); }
    
    ALWAYS_INLINE bool erase(player_id_t id) { return set_.erase(id) > 0; }
    
    template<typename Fn>
    ALWAYS_INLINE void foreach(Fn&& fn) const { 
        for (auto id : set_) fn(id); 
    }
    
    [[nodiscard]] size_t size() const { return set_.size(); }
    void clear() { set_.clear(); }
    
    void insert_batch(const player_id_t* ids, size_t count) {
        set_.reserve(set_.size() + count);
        for (size_t i = 0; i < count; ++i) {
            set_.insert(ids[i]);
        }
    }
};

// ==================== 测试框架 ====================
struct Result {
    double insert_us = 0, foreach_us = 0, erase_us = 0, total_us = 0;
    double batch_insert_us = 0;  // 新增批量测试
};

class Benchmark {
    std::vector<player_id_t> ids_, del_;
    int n_;
public:
    Benchmark(int n, int seed) : n_(n) {
        std::mt19937_64 rng(seed);
        for (int i = 0; i < n; ++i) ids_.push_back(rng());
        del_ = ids_;
        std::shuffle(del_.begin(), del_.end(), rng);
    }
    
    template<typename Set>
    Result run() {
        Result r;
        Set set(1000);
        
        // 测试1: 单次插入
        auto t0 = high_resolution_clock::now();
        for (auto id : ids_) set.insert(id);
        auto t1 = high_resolution_clock::now();
        
        // 测试2: 遍历
        volatile uint64_t sum = 0;
        set.foreach([&](player_id_t id) { sum += id; });
        auto t2 = high_resolution_clock::now();
        
        // 测试3: 单次删除
        for (auto id : del_) set.erase(id);
        auto t3 = high_resolution_clock::now();
        
        r.insert_us = duration<double, std::micro>(t1-t0).count();
        r.foreach_us = duration<double, std::micro>(t2-t1).count();
        r.erase_us = duration<double, std::micro>(t3-t2).count();
        r.total_us = duration<double, std::micro>(t3-t0).count();
        
        (void)sum;
        return r;
    }
    
    // 批量操作测试
    template<typename Set>
    Result run_batch() {
        Result r;
        Set set(1000);
        
        // 批量插入
        auto t0 = high_resolution_clock::now();
        set.insert_batch(ids_.data(), ids_.size());
        auto t1 = high_resolution_clock::now();
        
        // 遍历
        volatile uint64_t sum = 0;
        set.foreach([&](player_id_t id) { sum += id; });
        auto t2 = high_resolution_clock::now();
        
        // 批量删除（模拟）
        set.clear();
        auto t3 = high_resolution_clock::now();
        
        r.batch_insert_us = duration<double, std::micro>(t1-t0).count();
        r.foreach_us = duration<double, std::micro>(t2-t1).count();
        r.total_us = duration<double, std::micro>(t3-t0).count();
        
        (void)sum;
        return r;
    }
};

int main() {
    constexpr int ROUNDS = 100;
    
    std::cout << "=================================================================\n";
    std::cout << "优化版对比: Vector vs 开放寻址哈希(二次探测) vs std::unordered_set\n";
    std::cout << "1000 random uint64_t IDs, " << ROUNDS << " rounds\n";
    std::cout << "=================================================================\n\n";
    
    std::vector<Result> vec_res, open_res, std_res;
    std::vector<Result> vec_batch, open_batch, std_batch;
    
    for (int i = 0; i < ROUNDS; ++i) {
        Benchmark bm(1000, i);
        vec_res.push_back(bm.run<VectorSet>());
        open_res.push_back(bm.run<HashSparseSet>());
        std_res.push_back(bm.run<StdHashSet>());
        
        vec_batch.push_back(bm.run_batch<VectorSet>());
        open_batch.push_back(bm.run_batch<HashSparseSet>());
        std_batch.push_back(bm.run_batch<StdHashSet>());
    }
    
    auto avg = [](auto& v, auto f) {
        double s = 0;
        for (auto& r : v) s += r.*f;
        return s / v.size();
    };
    
    // 单次操作结果
    std::cout << "--- 单次操作测试 ---\n";
    const char* names[] = {"INSERT", "FOREACH", "ERASE", "TOTAL"};
    auto ptrs = {&Result::insert_us, &Result::foreach_us, &Result::erase_us, &Result::total_us};
    auto it = ptrs.begin();
    
    for (const char* name : names) {
        double va = avg(vec_res, *it);
        double oa = avg(open_res, *it);
        double sa = avg(std_res, *it);
        
        std::cout << "[" << name << "]\n";
        std::cout << "  Vector:       " << std::fixed << std::setprecision(2) 
                  << std::setw(10) << va << " us\n";
        std::cout << "  OpenHash:     " << std::setw(10) << oa << " us";
        if (name[0] == 'E' || name[0] == 'T') 
            std::cout << "  (" << std::setprecision(2) << (va/oa) << "x vs Vector)";
        else if (name[0] == 'I')
            std::cout << "  (" << std::setprecision(2) << (oa/va) << "x vs Vector)";
        std::cout << "\n";
        std::cout << "  StdUnordered: " << std::setw(10) << sa << " us";
        if (name[0] == 'E' || name[0] == 'T') 
            std::cout << "  (" << std::setprecision(2) << (va/sa) << "x vs Vector)";
        else if (name[0] == 'I')
            std::cout << "  (" << std::setprecision(2) << (sa/va) << "x vs Vector)";
        std::cout << "\n\n";
        ++it;
    }
    
    // 批量操作结果
    std::cout << "--- 批量操作测试 ---\n";
    std::cout << "[BATCH INSERT]\n";
    double vb = avg(vec_batch, &Result::batch_insert_us);
    double ob = avg(open_batch, &Result::batch_insert_us);
    double sb = avg(std_batch, &Result::batch_insert_us);
    
    std::cout << "  Vector:       " << std::fixed << std::setprecision(2) 
              << std::setw(10) << vb << " us\n";
    std::cout << "  OpenHash:     " << std::setw(10) << ob << " us"
              << "  (" << std::setprecision(2) << (ob/vb) << "x vs Vector)\n";
    std::cout << "  StdUnordered: " << std::setw(10) << sb << " us"
              << "  (" << std::setprecision(2) << (sb/vb) << "x vs Vector)\n";
    
    return 0;
}

测试结果（O3编译）：

=================================================================
优化版对比: Vector vs 开放寻址哈希(二次探测) vs std::unordered_set
1000 random uint64_t IDs, 100 rounds
=================================================================

--- 单次操作测试 ---
[INSERT]
  Vector:             0.37 us
  OpenHash:           7.98 us  (21.73x vs Vector)
  StdUnordered:      22.21 us  (60.51x vs Vector)

[FOREACH]
  Vector:             0.40 us
  OpenHash:           0.57 us
  StdUnordered:       1.51 us

[ERASE]
  Vector:            49.77 us
  OpenHash:           6.88 us  (7.23x vs Vector)
  StdUnordered:      16.29 us  (3.06x vs Vector)

[TOTAL]
  Vector:            50.54 us
  OpenHash:          15.43 us  (3.28x vs Vector)
  StdUnordered:      40.01 us  (1.26x vs Vector)

优化版本

// 极致性能优化版稀疏集：Robin Hood哈希 + 软件预取 + 缓存行优化

#include <cstdint>
#include <vector>
#include <algorithm>
#include <random>
#include <chrono>
#include <iostream>
#include <iomanip>
#include <unordered_set>
#include <cstring>
#include <cassert>

#ifdef __GNUC__
    #define ALWAYS_INLINE __attribute__((always_inline))
    #define PURE __attribute__((pure))
    #define HOT __attribute__((hot))
    #define LIKELY(x) __builtin_expect(!!(x), 1)
    #define UNLIKELY(x) __builtin_expect(!!(x), 0)
    #define PREFETCH(addr, rw, locality) __builtin_prefetch(addr, rw, locality)
#else
    #define ALWAYS_INLINE
    #define PURE
    #define HOT
    #define LIKELY(x) (x)
    #define UNLIKELY(x) (x)
    #define PREFETCH(addr, rw, locality)
#endif

using player_id_t = uint64_t;
using namespace std::chrono;

// ==================== 极致优化版：Robin Hood稀疏集 ====================
//
// 核心优化策略（基于[^8^][^13^]业界最佳实践）：
// 1. Robin Hood哈希：控制最大探测长度，保证O(1)最坏情况
// 2. 分离存储：稀疏数组(sparse)只存索引，密集数组(dense)存实际ID
// 3. 软件预取：遍历前预取缓存行，减少内存延迟
// 4. 缓存行对齐：确保dense数组按64字节边界对齐
// 5. 零拷贝删除：swap-and-pop避免内存移动

class alignas(64) RobinHoodSparseSet {
public:
    // 节点结构：8字节ID + 4字节密集索引 + 4字节距离 = 16字节（缓存行对齐友好）
    struct Node {
        player_id_t id;           // 8 bytes: 存储的ID（0表示空槽）
        uint32_t dense_idx;       // 4 bytes: 在dense数组中的位置
        uint32_t dist;            // 4 bytes: 距离理想位置的距离（Robin Hood关键）
        
        bool empty() const { return id == 0; }
    };
    
    static_assert(sizeof(Node) == 16, "Node size should be 16 bytes for cache efficiency");

private:
    std::vector<Node> nodes_;           // 稀疏数组：哈希表本体
    std::vector<player_id_t> dense_;    // 密集数组：连续存储所有有效ID
    std::vector<uint32_t> sparse_to_dense_; // 反向映射：dense索引 -> 稀疏槽位
    
    uint32_t mask_ = 0;                 // 容量掩码（2^n - 1）
    uint32_t size_ = 0;                 // 当前元素数量
    uint32_t max_dist_ = 0;             // 当前最大探测距离（用于快速失败查找）

    // 哈希函数：SplitMix64 - 高质量且快速
    PURE static ALWAYS_INLINE uint64_t hash(player_id_t x) {
        // SplitMix64算法：比MurmurHash更快，分布质量足够好
        x += 0x9e3779b97f4a7c15ULL;
        x = (x ^ (x >> 30)) * 0xbf58476d1ce4e5b9ULL;
        x = (x ^ (x >> 27)) * 0x94d049bb133111ebULL;
        return x ^ (x >> 31);
    }

    // 计算理想位置
    PURE ALWAYS_INLINE uint32_t ideal_slot(player_id_t id) const {
        return static_cast<uint32_t>(hash(id)) & mask_;
    }

    // 扩容并重建（保留旧元素）
    HOT void grow() {
        uint32_t old_cap = mask_ ? mask_ + 1 : 0;
        uint32_t new_cap = old_cap ? old_cap << 1 : 16;
        
        // 保存旧数据
        std::vector<player_id_t> old_dense = std::move(dense_);
        
        // 重置结构
        nodes_.assign(new_cap, {});
        mask_ = new_cap - 1;
        dense_.clear();
        dense_.reserve(new_cap >> 1);  // 负载因子0.5
        sparse_to_dense_.clear();
        sparse_to_dense_.reserve(new_cap >> 1);
        size_ = 0;
        max_dist_ = 0;
        
        // 重新插入所有元素（此时会重建Robin Hood顺序）
        for (auto id : old_dense) {
            if (id != 0) insert_robin_hood(id);
        }
    }

    // Robin Hood插入核心：富者愈富，穷者愈穷
    HOT void insert_robin_hood(player_id_t id) {
        assert(id != 0 && "ID 0 is reserved for empty slot");
        
        uint32_t slot = ideal_slot(id);
        uint32_t dist = 0;
        Node insert_node{id, 0, 0};  // dense_idx会在后面设置
        
        while (true) {
            Node& current = nodes_[slot];
            
            if (current.empty()) {
                // 找到空槽，放置元素
                uint32_t idx = static_cast<uint32_t>(dense_.size());
                insert_node.dense_idx = idx;
                insert_node.dist = dist;
                current = insert_node;
                
                dense_.push_back(id);
                sparse_to_dense_.push_back(slot);
                
                if (dist > max_dist_) max_dist_ = dist;
                ++size_;
                return;
            }
            
            if (current.id == id) {
                return;  // 已存在，不重复插入
            }
            
            // Robin Hood交换：如果当前元素比新元素"更富"（离理想位置更近）
            if (current.dist < dist) {
                // 新元素更"穷"，抢夺这个位置
                std::swap(current, insert_node);
                dist = current.dist;  // 继续携带被交换的元素
                insert_node.dist = dist;
            }
            
            // 继续探测
            ++dist;
            slot = (slot + 1) & mask_;
            
            // 安全检查：理论上不会发生，因为负载因子0.5
            if (UNLIKELY(dist > mask_)) {
                grow();
                insert_robin_hood(insert_node.id);
                return;
            }
        }
    }

public:
    explicit RobinHoodSparseSet(size_t reserve = 1000) {
        uint32_t cap = 16;
        while (cap < reserve * 2) cap <<= 1;  // 保证负载因子0.5
        nodes_.assign(cap, {});
        mask_ = cap - 1;
        dense_.reserve(reserve);
        sparse_to_dense_.reserve(reserve);
    }

    // 单次插入
    HOT ALWAYS_INLINE void insert(player_id_t id) {
        if (UNLIKELY(id == 0)) return;
        
        // 快速路径：检查是否需要扩容（负载因子0.75触发）
        if (UNLIKELY(size_ >= (mask_ + 1) * 0.75f)) {
            grow();
        }
        
        insert_robin_hood(id);
    }

    // 批量插入：预分配 + 减少哈希计算
    HOT void insert_batch(const player_id_t* ids, size_t count) {
        if (UNLIKELY(count == 0)) return;
        
        // 预检查容量，一次性扩容
        uint32_t need = static_cast<uint32_t>(size_ + count);
        if (need > (mask_ + 1) * 0.75f) {
            uint32_t new_cap = mask_ + 1;
            while (new_cap < need * 2) new_cap <<= 1;
            
            std::vector<Node> old_nodes = std::move(nodes_);
            std::vector<player_id_t> old_dense = std::move(dense_);
            
            nodes_.assign(new_cap, {});
            mask_ = new_cap - 1;
            dense_.clear();
            dense_.reserve(need);
            sparse_to_dense_.clear();
            sparse_to_dense_.reserve(need);
            size_ = 0;
            max_dist_ = 0;
            
            // 先插入旧元素
            for (auto old_id : old_dense) {
                if (old_id != 0) insert_robin_hood(old_id);
            }
        }
        
        // 批量插入新元素
        for (size_t i = 0; i < count; ++i) {
            if (ids[i] != 0) insert_robin_hood(ids[i]);
        }
    }

    // 查找（利用max_dist_快速失败）
    PURE ALWAYS_INLINE bool contains(player_id_t id) const {
        if (UNLIKELY(id == 0 || size_ == 0)) return false;
        
        uint32_t slot = ideal_slot(id);
        
        // 最多探测max_dist_ + 1次
        for (uint32_t dist = 0; dist <= max_dist_; ++dist) {
            const Node& node = nodes_[slot];
            
            if (node.empty()) return false;
            if (node.id == id) return true;
            if (node.dist < dist) return false;  // Robin Hood性质：不可能在后面
            
            slot = (slot + 1) & mask_;
        }
        return false;
    }

    // 删除：swap-and-pop（零拷贝）
    HOT ALWAYS_INLINE bool erase(player_id_t id) {
        if (UNLIKELY(id == 0 || size_ == 0)) return false;
        
        uint32_t slot = ideal_slot(id);
        
        for (uint32_t dist = 0; dist <= max_dist_; ++dist) {
            Node& node = nodes_[slot];
            
            if (node.empty()) return false;
            
            if (node.id == id) {
                // 找到目标，执行删除
                uint32_t idx = node.dense_idx;
                uint32_t last_idx = static_cast<uint32_t>(dense_.size()) - 1;
                
                // 标记为空槽
                node.id = 0;
                node.dist = 0;
                
                if (idx != last_idx) {
                    // swap-and-pop：用末尾元素填充空洞
                    player_id_t last_id = dense_.back();
                    uint32_t last_slot = sparse_to_dense_[last_idx];
                    
                    dense_[idx] = last_id;
                    sparse_to_dense_[idx] = last_slot;
                    nodes_[last_slot].dense_idx = idx;
                }
                
                dense_.pop_back();
                sparse_to_dense_.pop_back();
                --size_;
                
                // 可选：压缩max_dist_（延迟更新以节省CPU）
                return true;
            }
            
            if (node.dist < dist) return false;
            slot = (slot + 1) & mask_;
        }
        return false;
    }

    // 极致优化的遍历：软件预取 + 循环展开
    template<typename Fn>
    HOT ALWAYS_INLINE void foreach(Fn&& fn) const {
        const size_t n = dense_.size();
        if (n == 0) return;
        
        const player_id_t* data = dense_.data();
        
        // 主循环：4路展开 + 软件预取
        size_t i = 0;
        for (; i + 4 <= n; i += 4) {
            // 预取未来4个缓存行（假设每个ID 8字节，4个ID 32字节，小于64字节缓存行）
            PREFETCH(&data[i + 8], 0, 3);  // 预取8个元素后的数据
            
            fn(data[i]);
            fn(data[i + 1]);
            fn(data[i + 2]);
            fn(data[i + 3]);
        }
        
        // 处理剩余元素
        for (; i < n; ++i) {
            fn(data[i]);
        }
    }

    // 带索引的遍历（用于需要知道位置的场景）
    template<typename Fn>
    HOT ALWAYS_INLINE void foreach_indexed(Fn&& fn) const {
        const size_t n = dense_.size();
        for (size_t i = 0; i < n; ++i) {
            fn(i, dense_[i]);
        }
    }

    // 快速清空：只重置使用的槽位
    HOT void clear() {
        // 只遍历dense数组，标记对应的稀疏槽位
        for (auto id : dense_) {
            if (id != 0) {
                uint32_t slot = ideal_slot(id);
                // 由于Robin Hood性质，需要找到实际位置
                for (uint32_t dist = 0; dist <= max_dist_; ++dist) {
                    if (nodes_[slot].id == id) {
                        nodes_[slot].id = 0;
                        nodes_[slot].dist = 0;
                        break;
                    }
                    slot = (slot + 1) & mask_;
                }
            }
        }
        dense_.clear();
        sparse_to_dense_.clear();
        size_ = 0;
        max_dist_ = 0;
    }

    // 预分配接口
    void reserve(size_t n) {
        if (n > (mask_ + 1) * 0.5f) {
            uint32_t new_cap = 16;
            while (new_cap < n * 2) new_cap <<= 1;
            
            std::vector<Node> old_nodes = std::move(nodes_);
            std::vector<player_id_t> old_dense = std::move(dense_);
            
            nodes_.assign(new_cap, {});
            mask_ = new_cap - 1;
            dense_.clear();
            dense_.reserve(n);
            sparse_to_dense_.clear();
            sparse_to_dense_.reserve(n);
            size_ = 0;
            max_dist_ = 0;
            
            for (auto id : old_dense) {
                if (id != 0) insert_robin_hood(id);
            }
        }
    }

    [[nodiscard]] size_t size() const { return size_; }
    [[nodiscard]] bool empty() const { return size_ == 0; }
    [[nodiscard]] size_t capacity() const { return dense_.capacity(); }
    
    // 获取原始密集数组指针（用于外部批量处理）
    const player_id_t* data() const { return dense_.data(); }
};

// ==================== 对比方案：原始Vector ====================
class VectorSet {
    std::vector<player_id_t> data_;
public:
    explicit VectorSet(size_t reserve = 1000) { data_.reserve(reserve); }
    
    ALWAYS_INLINE void insert(player_id_t id) { data_.push_back(id); }
    
    ALWAYS_INLINE bool erase(player_id_t id) {
        auto it = std::find(data_.begin(), data_.end(), id);
        if (it == data_.end()) return false;
        *it = data_.back();
        data_.pop_back();
        return true;
    }

    ALWAYS_INLINE bool contains(player_id_t id) const {
       return std::find(data_.begin(), data_.end(), id) != data_.end();
    }
    
    template<typename Fn>
    ALWAYS_INLINE void foreach(Fn&& fn) const { 
        for (auto id : data_) fn(id); 
    }
    
    void insert_batch(const player_id_t* ids, size_t count) {
        data_.reserve(data_.size() + count);
        for (size_t i = 0; i < count; ++i) data_.push_back(ids[i]);
    }
    
    [[nodiscard]] size_t size() const { return data_.size(); }
    void clear() { data_.clear(); }
};

// ==================== 对比方案：std::unordered_set ====================
class StdHashSet {
    std::unordered_set<player_id_t> set_;
public:
    explicit StdHashSet(size_t reserve = 1000) { 
        set_.reserve(reserve * 2); 
    }
    
    ALWAYS_INLINE void insert(player_id_t id) { set_.insert(id); }
    ALWAYS_INLINE bool erase(player_id_t id) { return set_.erase(id) > 0; }

    ALWAYS_INLINE bool contains(player_id_t id) const {
       return set_.find(id) != set_.end();
    }
    
    template<typename Fn>
    ALWAYS_INLINE void foreach(Fn&& fn) const { 
        for (auto id : set_) fn(id); 
    }
    
    void insert_batch(const player_id_t* ids, size_t count) {
        set_.reserve(set_.size() + count);
        for (size_t i = 0; i < count; ++i) set_.insert(ids[i]);
    }
    
    [[nodiscard]] size_t size() const { return set_.size(); }
    void clear() { set_.clear(); }
};

// ==================== 测试框架 ====================
struct Result {
    double insert_us = 0, foreach_us = 0, erase_us = 0, total_us = 0;
    double batch_insert_us = 0;
    double lookup_us = 0;  // 新增查找测试
};

class Benchmark {
    std::vector<player_id_t> ids_, del_, lookup_;
    int n_;
public:
    Benchmark(int n, int seed) : n_(n) {
        std::mt19937_64 rng(seed);
        for (int i = 0; i < n; ++i) ids_.push_back(rng() | 1);  // 确保非0
        del_ = ids_;
        std::shuffle(del_.begin(), del_.end(), rng);
        
        // 生成查找用ID（一半存在，一半不存在）
        for (int i = 0; i < n; ++i) {
            if (i % 2 == 0) lookup_.push_back(ids_[i]);
            else lookup_.push_back(rng() | 1);
        }
        std::shuffle(lookup_.begin(), lookup_.end(), rng);
    }
    
    template<typename Set>
    Result run() {
        Result r;
        Set set(1000);
        
        // 插入
        auto t0 = high_resolution_clock::now();
        for (auto id : ids_) set.insert(id);
        auto t1 = high_resolution_clock::now();
        
        // 遍历
        volatile uint64_t sum = 0;
        set.foreach([&](player_id_t id) { sum += id; });
        auto t2 = high_resolution_clock::now();
        
        // 查找（50%命中率）
        volatile bool found = false;
        for (auto id : lookup_) found = set.contains(id);
        auto t3 = high_resolution_clock::now();
        
        // 删除
        for (auto id : del_) set.erase(id);
        auto t4 = high_resolution_clock::now();
        
        r.insert_us = duration<double, std::micro>(t1-t0).count();
        r.foreach_us = duration<double, std::micro>(t2-t1).count();
        r.lookup_us = duration<double, std::micro>(t3-t2).count();
        r.erase_us = duration<double, std::micro>(t4-t3).count();
        r.total_us = duration<double, std::micro>(t4-t0).count();
        
        (void)sum;
        (void)found;
        return r;
    }
    
    template<typename Set>
    Result run_batch() {
        Result r;
        Set set(1000);
        
        // 批量插入
        auto t0 = high_resolution_clock::now();
        set.insert_batch(ids_.data(), ids_.size());
        auto t1 = high_resolution_clock::now();
        
        // 遍历
        volatile uint64_t sum = 0;
        set.foreach([&](player_id_t id) { sum += id; });
        auto t2 = high_resolution_clock::now();
        
        r.batch_insert_us = duration<double, std::micro>(t1-t0).count();
        r.foreach_us = duration<double, std::micro>(t2-t1).count();
        r.total_us = duration<double, std::micro>(t2-t0).count();
        
        (void)sum;
        return r;
    }
};

int main() {
    constexpr int ROUNDS = 100;
    
    std::cout << "=================================================================\n";
    std::cout << "极致优化版对比: Vector vs RobinHood稀疏集 vs std::unordered_set\n";
    std::cout << "1000 random uint64_t IDs, " << ROUNDS << " rounds\n";
    std::cout << "=================================================================\n\n";
    
    std::vector<Result> vec_res, robin_res, std_res;
    std::vector<Result> vec_batch, robin_batch, std_batch;
    
    for (int i = 0; i < ROUNDS; ++i) {
        Benchmark bm(1000, i);
        vec_res.push_back(bm.run<VectorSet>());
        robin_res.push_back(bm.run<RobinHoodSparseSet>());
        std_res.push_back(bm.run<StdHashSet>());
        
        vec_batch.push_back(bm.run_batch<VectorSet>());
        robin_batch.push_back(bm.run_batch<RobinHoodSparseSet>());
        std_batch.push_back(bm.run_batch<StdHashSet>());
    }
    
    auto avg = [](auto& v, auto f) {
        double s = 0;
        for (auto& r : v) s += r.*f;
        return s / v.size();
    };
    
    // 单次操作结果
    std::cout << "--- 单次操作测试 ---\n";
    const char* names[] = {"INSERT", "FOREACH", "LOOKUP", "ERASE", "TOTAL"};
    auto ptrs = {&Result::insert_us, &Result::foreach_us, &Result::lookup_us, &Result::erase_us, &Result::total_us};
    auto it = ptrs.begin();
    
    for (const char* name : names) {
        double va = avg(vec_res, *it);
        double ra = avg(robin_res, *it);
        double sa = avg(std_res, *it);
        
        std::cout << "[" << name << "]\n";
        std::cout << "  Vector:       " << std::fixed << std::setprecision(2) 
                  << std::setw(10) << va << " us\n";
        std::cout << "  RobinHood:    " << std::setw(10) << ra << " us";
        if (va > 0.01) {
            double speedup = va / ra;
            std::cout << "  (" << std::setprecision(2) << speedup << "x vs Vector)";
        }
        std::cout << "\n";
        std::cout << "  StdUnordered: " << std::setw(10) << sa << " us";
        if (va > 0.01) {
            double speedup = va / sa;
            std::cout << "  (" << std::setprecision(2) << speedup << "x vs Vector)";
        }
        std::cout << "\n\n";
        ++it;
    }
    
    // 批量操作结果
    std::cout << "--- 批量操作测试 ---\n";
    std::cout << "[BATCH INSERT + FOREACH]\n";
    double vb = avg(vec_batch, &Result::total_us);
    double rb = avg(robin_batch, &Result::total_us);
    double sb = avg(std_batch, &Result::total_us);
    
    std::cout << "  Vector:       " << std::fixed << std::setprecision(2) 
              << std::setw(10) << vb << " us\n";
    std::cout << "  RobinHood:    " << std::setw(10) << rb << " us"
              << "  (" << std::setprecision(2) << (vb/rb) << "x vs Vector)\n";
    std::cout << "  StdUnordered: " << std::setw(10) << sb << " us"
              << "  (" << std::setprecision(2) << (vb/sb) << "x vs Vector)\n";
    
    return 0;
}

优化后的结果（O3编译）：

=================================================================
极致优化版对比: Vector vs RobinHood稀疏集 vs std::unordered_set
1000 random uint64_t IDs, 100 rounds
=================================================================

--- 单次操作测试 ---
[INSERT]
  Vector:             0.35 us
  RobinHood:          9.79 us  (0.04x vs Vector)
  StdUnordered:      18.59 us  (0.02x vs Vector)

[FOREACH]
  Vector:             0.39 us
  RobinHood:          0.40 us  (0.98x vs Vector)
  StdUnordered:       1.49 us  (0.26x vs Vector)

[LOOKUP]
  Vector:           125.58 us
  RobinHood:         12.22 us  (10.28x vs Vector)
  StdUnordered:      13.15 us  (9.55x vs Vector)

[ERASE]
  Vector:            49.28 us
  RobinHood:          7.03 us  (7.01x vs Vector)
  StdUnordered:      13.78 us  (3.58x vs Vector)

[TOTAL]
  Vector:           175.60 us
  RobinHood:         29.44 us  (5.96x vs Vector)
  StdUnordered:      47.00 us  (3.74x vs Vector)

--- 批量操作测试 ---
[BATCH INSERT + FOREACH]
  Vector:             0.73 us
  RobinHood:          8.54 us  (0.09x vs Vector)
  StdUnordered:      25.00 us  (0.03x vs Vector)

极致优化的源码：

// 精简版：只保留最优insert和erase，动态扩容，无内存浪费

#include <cstdint>
#include <vector>
#include <algorithm>
#include <random>
#include <chrono>
#include <iostream>
#include <iomanip>
#include <unordered_set>

#ifdef __GNUC__
    #define ALWAYS_INLINE __attribute__((always_inline))
    #define LIKELY(x) __builtin_expect(!!(x), 1)
    #define UNLIKELY(x) __builtin_expect(!!(x), 0)
    #define PREFETCH(addr, rw, locality) __builtin_prefetch(addr, rw, locality)
#else
    #define ALWAYS_INLINE
    #define LIKELY(x) (x)
    #define UNLIKELY(x) (x)
    #define PREFETCH(addr, rw, locality)
#endif

using player_id_t = uint64_t;
using namespace std::chrono;

// ==================== 动态稀疏集：最优insert + 最优erase ====================

class alignas(64) SparseSet {
    static constexpr player_id_t EMPTY = UINT64_MAX;
    
    struct Node {
        player_id_t id = EMPTY;
        uint32_t dense_idx = 0;
    };

    std::vector<Node> nodes_;
    std::vector<player_id_t> dense_;
    std::vector<uint32_t> slot_of_;
    
    uint32_t mask_ = 0;
    uint32_t size_ = 0;

    static ALWAYS_INLINE uint64_t hash(player_id_t x) {
        x ^= x >> 33;
        x *= 0xff51afd7ed558ccdULL;
        x ^= x >> 33;
        x *= 0xc4ceb9fe1a85ec53ULL;
        return x ^ (x >> 33);
    }

    ALWAYS_INLINE uint32_t ideal_slot(player_id_t id) const {
        return static_cast<uint32_t>(hash(id)) & mask_;
    }

    void rebuild(uint32_t new_cap) {
        std::vector<Node> old_nodes = std::move(nodes_);
        std::vector<player_id_t> old_dense = std::move(dense_);
        
        nodes_.assign(new_cap, Node{EMPTY, 0});
        mask_ = new_cap - 1;
        dense_.clear();
        dense_.reserve(new_cap >> 1);
        slot_of_.clear();
        slot_of_.reserve(new_cap >> 1);
        size_ = 0;
        
        for (auto id : old_dense) {
            if (id != EMPTY) insert(id);
        }
    }

public:
    SparseSet() {
        nodes_.assign(16, Node{EMPTY, 0});
        mask_ = 15;
        dense_.reserve(8);
        slot_of_.reserve(8);
    }

    explicit SparseSet(uint32_t reserve) {
        uint32_t cap = 16;
        while (cap < reserve * 2) cap <<= 1;
        nodes_.assign(cap, Node{EMPTY, 0});
        mask_ = cap - 1;
        dense_.reserve(reserve);
        slot_of_.reserve(reserve);
    }

    // 最优insert：Robin Hood哈希 + 动态扩容
    ALWAYS_INLINE void insert(player_id_t id) {
        if (UNLIKELY(id == EMPTY)) return;
        
        // 扩容检查
        if (UNLIKELY(size_ >= (mask_ + 1) * 0.75f)) {
            rebuild((mask_ + 1) << 1);
        }
        
        uint32_t slot = ideal_slot(id);
        uint32_t dist = 0;
        Node insert_node{id, 0};
        
        while (true) {
            Node& cur = nodes_[slot];
            
            // 空槽或已存在
            if (cur.id == EMPTY) {
                uint32_t idx = size_;
                insert_node.dense_idx = idx;
                cur = insert_node;
                dense_.push_back(id);
                slot_of_.push_back(slot);
                ++size_;
                return;
            }
            if (cur.id == id) return;  // 已存在
            
            // Robin Hood：距离远的抢占位置
            uint32_t cur_dist = (slot - ideal_slot(cur.id)) & mask_;
            if (cur_dist < dist) {
                std::swap(cur, insert_node);
                dist = cur_dist;
            }
            
            slot = (slot + 1) & mask_;
            ++dist;
        }
    }

    // 最优erase：查找 + swap-and-pop
    ALWAYS_INLINE bool erase(player_id_t id) {
        if (UNLIKELY(id == EMPTY || size_ == 0)) return false;
        
        uint32_t slot = ideal_slot(id);
        
        // 线性探测查找
        while (nodes_[slot].id != EMPTY) {
            Node& cur = nodes_[slot];
            
            if (cur.id == id) {
                uint32_t idx = cur.dense_idx;
                uint32_t last = size_ - 1;
                
                // swap-and-pop：末尾元素填充空洞
                if (idx != last) {
                    player_id_t last_id = dense_[last];
                    uint32_t last_slot = slot_of_[last];
                    
                    dense_[idx] = last_id;
                    slot_of_[idx] = last_slot;
                    nodes_[last_slot].dense_idx = idx;
                }
                
                dense_.pop_back();
                slot_of_.pop_back();
                cur.id = EMPTY;
                --size_;
                return true;
            }
            
            slot = (slot + 1) & mask_;
        }
        return false;
    }

    // 遍历（预取优化）
    template<typename Fn>
    ALWAYS_INLINE void foreach(Fn&& fn) const {
        const uint32_t n = size_;
        if (n == 0) return;
        
        const player_id_t* d = dense_.data();
        uint32_t i = 0;
        
        PREFETCH(&d[0], 0, 3);
        PREFETCH(&d[16], 0, 3);
        
        // 8路展开
        for (; i + 8 <= n; i += 8) {
            PREFETCH(&d[i + 32], 0, 3);
            fn(d[i]);   fn(d[i+1]); fn(d[i+2]); fn(d[i+3]);
            fn(d[i+4]); fn(d[i+5]); fn(d[i+6]); fn(d[i+7]);
        }
        
        for (; i < n; ++i) fn(d[i]);
    }

    // 查找
    ALWAYS_INLINE bool contains(player_id_t id) const {
        if (UNLIKELY(id == EMPTY || size_ == 0)) return false;
        
        uint32_t slot = ideal_slot(id);
        while (nodes_[slot].id != EMPTY) {
            if (nodes_[slot].id == id) return true;
            slot = (slot + 1) & mask_;
        }
        return false;
    }

    void clear() {
        for (uint32_t i = 0; i < size_; ++i) {
            nodes_[slot_of_[i]].id = EMPTY;
        }
        dense_.clear();
        slot_of_.clear();
        size_ = 0;
    }

    void reserve(uint32_t n) {
        if (n > (mask_ + 1) * 0.5f) {
            uint32_t new_cap = 16;
            while (new_cap < n * 2) new_cap <<= 1;
            rebuild(new_cap);
        }
        dense_.reserve(n);
        slot_of_.reserve(n);
    }

    uint32_t size() const { return size_; }
    bool empty() const { return size_ == 0; }
    uint32_t capacity() const { return dense_.capacity(); }
    const player_id_t* data() const { return dense_.data(); }
};

// ==================== 对比：Vector ====================

class VectorSet {
    std::vector<player_id_t> data_;
public:
    explicit VectorSet(size_t reserve = 1000) { data_.reserve(reserve); }
    
    ALWAYS_INLINE void insert(player_id_t id) { data_.push_back(id); }
    
    ALWAYS_INLINE bool erase(player_id_t id) {
        auto it = std::find(data_.begin(), data_.end(), id);
        if (it == data_.end()) return false;
        *it = data_.back();
        data_.pop_back();
        return true;
    }
    
    template<typename Fn>
    ALWAYS_INLINE void foreach(Fn&& fn) const { 
        for (auto id : data_) fn(id); 
    }
    
    ALWAYS_INLINE bool contains(player_id_t id) const {
        return std::find(data_.begin(), data_.end(), id) != data_.end();
    }
    
    size_t size() const { return data_.size(); }
    uint32_t capacity() const { return data_.capacity(); }
    void clear() { data_.clear(); }
    void reserve(size_t n) { data_.reserve(n); }
};

// ==================== 对比：std::unordered_set ====================

class StdHashSet {
    std::unordered_set<player_id_t> set_;
public:
    explicit StdHashSet(size_t reserve = 1000) { 
        set_.reserve(reserve * 2); 
    }
    
    ALWAYS_INLINE void insert(player_id_t id) { set_.insert(id); }
    ALWAYS_INLINE bool erase(player_id_t id) { return set_.erase(id) > 0; }
    
    template<typename Fn>
    ALWAYS_INLINE void foreach(Fn&& fn) const { 
        for (auto id : set_) fn(id); 
    }
    
    ALWAYS_INLINE bool contains(player_id_t id) const {
        return set_.find(id) != set_.end();
    }
    
    size_t size() const { return set_.size(); }
    void clear() { set_.clear(); }
    void reserve(size_t n) { set_.reserve(n * 2); }
};

// ==================== 测试 ====================

struct Result {
    double insert_us = 0, foreach_us = 0, erase_us = 0, total_us = 0;
    double lookup_us = 0;
};

class Benchmark {
    std::vector<player_id_t> ids_, del_, lookup_;
    int n_;
public:
    Benchmark(int n, int seed) : n_(n) {
        std::mt19937_64 rng(seed);
        for (int i = 0; i < n; ++i) {
            player_id_t id = (rng() % (UINT64_MAX - 2)) + 1;
            ids_.push_back(id);
        }
        del_ = ids_;
        std::shuffle(del_.begin(), del_.end(), rng);
        
        for (int i = 0; i < n; ++i) {
            if (i % 2 == 0) lookup_.push_back(ids_[i]);
            else lookup_.push_back((rng() % (UINT64_MAX - 2)) + 1);
        }
        std::shuffle(lookup_.begin(), lookup_.end(), rng);
    }
    
    template<typename Set>
    Result run() {
        Result r;
        Set set;
        set.reserve(n_);
        
        auto t0 = high_resolution_clock::now();
        for (auto id : ids_) set.insert(id);
        auto t1 = high_resolution_clock::now();
        
        volatile uint64_t sum = 0;
        set.foreach([&](player_id_t id) { sum += id; });
        auto t2 = high_resolution_clock::now();
        
        volatile bool found = false;
        for (auto id : lookup_) found = set.contains(id);
        auto t3 = high_resolution_clock::now();
        
        for (auto id : del_) set.erase(id);
        auto t4 = high_resolution_clock::now();
        
        r.insert_us = duration<double, std::micro>(t1-t0).count();
        r.foreach_us = duration<double, std::micro>(t2-t1).count();
        r.lookup_us = duration<double, std::micro>(t3-t2).count();
        r.erase_us = duration<double, std::micro>(t4-t3).count();
        r.total_us = duration<double, std::micro>(t4-t0).count();
        
        (void)sum;
        (void)found;
        return r;
    }
};

int main() {
    constexpr int ROUNDS = 100;
    
    std::cout << "=============================================================\n";
    std::cout << "SparseSet vs Vector vs std::unordered_set\n";
    std::cout << "只保留最优insert和erase，动态扩容，无内存浪费\n";
    std::cout << "=============================================================\n\n";
    
    for (int n : {100, 500, 1000, 2000}) {
        std::cout << "--- 规模: " << n << " 人 ---\n";
        
        std::vector<Result> vec_res, sp_res, std_res;
        
        for (int i = 0; i < ROUNDS; ++i) {
            Benchmark bm(n, i);
            vec_res.push_back(bm.run<VectorSet>());
            sp_res.push_back(bm.run<SparseSet>());
            std_res.push_back(bm.run<StdHashSet>());
        }
        
        auto avg = [](auto& v, auto f) {
            double s = 0;
            for (auto& r : v) s += r.*f;
            return s / v.size();
        };
        
        auto print = [&](const char* name, const std::vector<Result>& res) {
            std::cout << "  " << std::left << std::setw(10) << name
                      << "插入:" << std::fixed << std::setprecision(2) << std::setw(8) << avg(res, &Result::insert_us)
                      << " 遍历:" << std::setw(8) << avg(res, &Result::foreach_us)
                      << " 查找:" << std::setw(8) << avg(res, &Result::lookup_us)
                      << " 删除:" << std::setw(8) << avg(res, &Result::erase_us)
                      << " 总计:" << std::setw(8) << avg(res, &Result::total_us) << " us\n";
        };
        
        print("Vector:", vec_res);
        print("SparseSet:", sp_res);
        print("StdHash:", std_res);
        
        double vec_total = avg(vec_res, &Result::total_us);
        double sp_total = avg(sp_res, &Result::total_us);
        std::cout << "  SparseSet vs Vector: " << std::setprecision(2) << (vec_total/sp_total) << "x\n\n";
    }
    
    // 内存效率对比
    std::cout << "--- 内存效率 ---\n";
    {
        SparseSet sp;
        VectorSet vec;
        
        for (int i = 0; i < 100; ++i) {
            sp.insert(i + 1);
            vec.insert(i + 1);
        }
        
        std::cout << "插入100人:\n";
        std::cout << "  SparseSet容量: " << sp.capacity() << "\n";
        std::cout << "  Vector容量:    " << vec.capacity() << "\n";
        
        for (int i = 0; i < 90; ++i) {
            sp.erase(i + 1);
            vec.erase(i + 1);
        }
        
        std::cout << "删除90人后(剩10人):\n";
        std::cout << "  SparseSet容量: " << sp.capacity() << "\n";
        std::cout << "  Vector容量:    " << vec.capacity() << "\n";
    }
    
    return 0;
}

极致优化的结果：

=============================================================
SparseSet vs Vector vs std::unordered_set
只保留最优insert和erase，动态扩容，无内存浪费
=============================================================

--- 规模: 100 人 ---
  Vector:   插入:0.10     遍历:0.20     查找:2.94     删除:2.38     总计:5.62     us
  SparseSet:插入:1.12     遍历:0.11     查找:2.01     删除:0.99     总计:4.23     us
  StdHash:  插入:3.64     遍历:0.25     查找:2.12     删除:2.46     总计:8.49     us
  SparseSet vs Vector: 1.33x

--- 规模: 500 人 ---
  Vector:   插入:0.20     遍历:0.25     查找:34.96    删除:15.06    总计:50.46    us
  SparseSet:插入:4.17     遍历:0.22     查找:5.92     删除:3.43     总计:13.74    us
  StdHash:  插入:10.28    遍历:0.66     查找:6.36     删除:6.95     总计:24.25    us
  SparseSet vs Vector: 3.67x

--- 规模: 1000 人 ---
  Vector:   插入:0.34     遍历:0.37     查找:119.32   删除:46.56    总计:166.60   us
  SparseSet:插入:7.91     遍历:0.36     查找:11.10    删除:6.44     总计:25.81    us
  StdHash:  插入:18.09    遍历:1.42     查找:12.55    删除:13.38    总计:45.43    us
  SparseSet vs Vector: 6.45x

--- 规模: 2000 人 ---
  Vector:   插入:0.59     遍历:0.64     查找:444.43   删除:164.40   总计:610.06   us
  SparseSet:插入:15.59    遍历:0.69     查找:23.29    删除:13.09    总计:52.66    us
  StdHash:  插入:35.29    遍历:3.54     查找:25.72    删除:27.27    总计:91.82    us
  SparseSet vs Vector: 11.58x

--- 内存效率 ---
插入100人:
  SparseSet容量: 128
  Vector容量:    1000
删除90人后(剩10人):
  SparseSet容量: 128
  Vector容量:    1000

最终稀疏集类头文件：

// sparse_set.h
// 高性能稀疏集头文件，支持uint64_t、void*等类型
// ygluu, ai

#pragma once

#include <cstdint>
#include <vector>
#include <type_traits>
#include <cstring>

#ifdef __GNUC__
    #define SPARSE_ALWAYS_INLINE __attribute__((always_inline))
    #define SPARSE_LIKELY(x) __builtin_expect(!!(x), 1)
    #define SPARSE_UNLIKELY(x) __builtin_expect(!!(x), 0)
    #define SPARSE_PREFETCH(addr, rw, locality) __builtin_prefetch(addr, rw, locality)
#else
    #define SPARSE_ALWAYS_INLINE
    #define SPARSE_LIKELY(x) (x)
    #define SPARSE_UNLIKELY(x) (x)
    #define SPARSE_PREFETCH(addr, rw, locality)
#endif

namespace sset {

// 类型萃取：获取整数表示类型
template<typename T>
struct id_traits {
    using int_type = T;
    static constexpr int_type EMPTY = static_cast<int_type>(-1);
    
    static SPARSE_ALWAYS_INLINE int_type to_int(T val) { return val; }
    static SPARSE_ALWAYS_INLINE T from_int(int_type val) { return val; }
};

// void*特化
template<>
struct id_traits<void*> {
    using int_type = uintptr_t;
    static constexpr int_type EMPTY = static_cast<int_type>(-1);
    
    static SPARSE_ALWAYS_INLINE int_type to_int(void* val) { 
        return reinterpret_cast<int_type>(val); 
    }
    static SPARSE_ALWAYS_INLINE void* from_int(int_type val) { 
        return reinterpret_cast<void*>(val); 
    }
};

// 其他指针类型特化
template<typename T>
struct id_traits<T*> {
    using int_type = uintptr_t;
    static constexpr int_type EMPTY = static_cast<int_type>(-1);
    
    static SPARSE_ALWAYS_INLINE int_type to_int(T* val) { 
        return reinterpret_cast<int_type>(val); 
    }
    static SPARSE_ALWAYS_INLINE T* from_int(int_type val) { 
        return reinterpret_cast<T*>(val); 
    }
};

// ==================== 稀疏集模板类 ====================

template<typename T>
class alignas(64) SparseSet {
    using traits = id_traits<T>;
    using int_type = typename traits::int_type;
    
    static constexpr int_type EMPTY = traits::EMPTY;
    
    struct Node {
        int_type id = EMPTY;
        uint32_t dense_idx = 0;
    };

    std::vector<Node> nodes_;
    std::vector<T> dense_;
    std::vector<uint32_t> slot_of_;
    
    uint32_t mask_ = 0;
    uint32_t size_ = 0;

    // 哈希函数（针对uintptr_t优化）
    static SPARSE_ALWAYS_INLINE uint64_t hash(int_type x) {
        // 对指针类型做混合，避免低位相同
        if constexpr (sizeof(int_type) == 8) {
            x ^= x >> 33;
            x *= 0xff51afd7ed558ccdULL;
            x ^= x >> 33;
            x *= 0xc4ceb9fe1a85ec53ULL;
            return x ^ (x >> 33);
        } else {
            x ^= x >> 16;
            x *= 0x45d9f3bULL;
            x ^= x >> 16;
            x *= 0x45d9f3bULL;
            return x ^ (x >> 16);
        }
    }

    SPARSE_ALWAYS_INLINE uint32_t ideal_slot(int_type id) const {
        return static_cast<uint32_t>(hash(id)) & mask_;
    }

    void rebuild(uint32_t new_cap) {
        std::vector<Node> old_nodes = std::move(nodes_);
        std::vector<T> old_dense = std::move(dense_);
        
        nodes_.assign(new_cap, Node{EMPTY, 0});
        mask_ = new_cap - 1;
        dense_.clear();
        dense_.reserve(new_cap >> 1);
        slot_of_.clear();
        slot_of_.reserve(new_cap >> 1);
        size_ = 0;
        
        for (const auto& val : old_dense) {
            int_type id = traits::to_int(val);
            if (id != EMPTY) insert_impl(id);
        }
    }

    // 内部插入（使用int_type）
    SPARSE_ALWAYS_INLINE void insert_impl(int_type id) {
        uint32_t slot = ideal_slot(id);
        uint32_t dist = 0;
        Node insert_node{id, 0};
        
        while (true) {
            Node& cur = nodes_[slot];
            
            if (cur.id == EMPTY) {
                uint32_t idx = size_;
                insert_node.dense_idx = idx;
                cur = insert_node;
                dense_.push_back(traits::from_int(id));
                slot_of_.push_back(slot);
                ++size_;
                return;
            }
            if (cur.id == id) return;
            
            uint32_t cur_dist = (slot - ideal_slot(cur.id)) & mask_;
            if (cur_dist < dist) {
                std::swap(cur, insert_node);
                dist = cur_dist;
            }
            
            slot = (slot + 1) & mask_;
            ++dist;
        }
    }

public:
    SparseSet() {
        nodes_.assign(16, Node{EMPTY, 0});
        mask_ = 15;
        dense_.reserve(8);
        slot_of_.reserve(8);
    }

    explicit SparseSet(uint32_t reserve) {
        uint32_t cap = 16;
        while (cap < reserve * 2) cap <<= 1;
        nodes_.assign(cap, Node{EMPTY, 0});
        mask_ = cap - 1;
        dense_.reserve(reserve);
        slot_of_.reserve(reserve);
    }

    // 公开接口：使用原始类型T
    SPARSE_ALWAYS_INLINE void insert(T val) {
        int_type id = traits::to_int(val);
        if (SPARSE_UNLIKELY(id == EMPTY)) return;
        
        if (SPARSE_UNLIKELY(size_ >= (mask_ + 1) * 0.75f)) {
            rebuild((mask_ + 1) << 1);
        }
        
        insert_impl(id);
    }

    SPARSE_ALWAYS_INLINE bool erase(T val) {
        int_type id = traits::to_int(val);
        if (SPARSE_UNLIKELY(id == EMPTY || size_ == 0)) return false;
        
        uint32_t slot = ideal_slot(id);
        
        while (nodes_[slot].id != EMPTY) {
            Node& cur = nodes_[slot];
            
            if (cur.id == id) {
                uint32_t idx = cur.dense_idx;
                uint32_t last = size_ - 1;
                
                if (idx != last) {
                    T last_val = dense_[last];
                    uint32_t last_slot = slot_of_[last];
                    
                    dense_[idx] = last_val;
                    slot_of_[idx] = last_slot;
                    nodes_[last_slot].dense_idx = idx;
                }
                
                dense_.pop_back();
                slot_of_.pop_back();
                cur.id = EMPTY;
                --size_;
                return true;
            }
            
            slot = (slot + 1) & mask_;
        }
        return false;
    }

    SPARSE_ALWAYS_INLINE bool contains(T val) const {
        int_type id = traits::to_int(val);
        if (SPARSE_UNLIKELY(id == EMPTY || size_ == 0)) return false;
        
        uint32_t slot = ideal_slot(id);
        while (nodes_[slot].id != EMPTY) {
            if (nodes_[slot].id == id) return true;
            slot = (slot + 1) & mask_;
        }
        return false;
    }

    template<typename Fn>
    SPARSE_ALWAYS_INLINE void foreach(Fn&& fn) const {
        const uint32_t n = size_;
        if (n == 0) return;
        
        const T* d = dense_.data();
        uint32_t i = 0;
        
        SPARSE_PREFETCH(&d[0], 0, 3);
        SPARSE_PREFETCH(&d[16], 0, 3);
        
        for (; i + 8 <= n; i += 8) {
            SPARSE_PREFETCH(&d[i + 32], 0, 3);
            fn(d[i]);   fn(d[i+1]); fn(d[i+2]); fn(d[i+3]);
            fn(d[i+4]); fn(d[i+5]); fn(d[i+6]); fn(d[i+7]);
        }
        
        for (; i < n; ++i) fn(d[i]);
    }

    void clear() {
        for (uint32_t i = 0; i < size_; ++i) {
            nodes_[slot_of_[i]].id = EMPTY;
        }
        dense_.clear();
        slot_of_.clear();
        size_ = 0;
    }

    void reserve(uint32_t n) {
        if (n > (mask_ + 1) * 0.5f) {
            uint32_t new_cap = 16;
            while (new_cap < n * 2) new_cap <<= 1;
            rebuild(new_cap);
        }
        dense_.reserve(n);
        slot_of_.reserve(n);
    }

    uint32_t size() const { return size_; }
    bool empty() const { return size_ == 0; }
    uint32_t capacity() const { return dense_.capacity(); }
    const T* data() const { return dense_.data(); }
};

// 常用类型别名
using SparseSetU64 = SparseSet<std::uint64_t>;
using SparseSetPtr = SparseSet<void*>;

} // namespace

posted @ 2026-03-15 21:21 码客-ygluu 阅读(4) 评论(0) 收藏举报

刷新页面返回顶部

码客 ygluu

匠心隐于形 (QQ/WX: 48092788）

C++ vector、unordered_set和稀疏集的增删遍历性能对比

C++源码

测试结果（O3编译）：

优化版本

优化后的结果（O3编译）：

极致优化的源码：

极致优化的结果：

最终稀疏集类头文件：

公告