C++源码
// 优化版: std::vector vs 开放寻址哈希(二次探测+批量API) vs std::unordered_set
#include <cstdint>
#include <vector>
#include <algorithm>
#include <random>
#include <chrono>
#include <iostream>
#include <iomanip>
#include <unordered_set>
#include <numeric>
#include <cstring>
#ifdef __GNUC__
#define ALWAYS_INLINE __attribute__((always_inline))
#define PURE __attribute__((pure))
#define LIKELY(x) __builtin_expect(!!(x), 1)
#define UNLIKELY(x) __builtin_expect(!!(x), 0)
#else
#define ALWAYS_INLINE
#define PURE
#define LIKELY(x) (x)
#define UNLIKELY(x) (x)
#endif
using player_id_t = uint64_t;
using namespace std::chrono;
// ==================== 方案1: std::vector ====================
class VectorSet {
std::vector<player_id_t> data_;
public:
explicit VectorSet(size_t reserve = 1000) { data_.reserve(reserve); }
ALWAYS_INLINE void insert(player_id_t id) { data_.push_back(id); }
ALWAYS_INLINE bool erase(player_id_t id) {
auto it = std::find(data_.begin(), data_.end(), id);
if (it == data_.end()) return false;
*it = data_.back();
data_.pop_back();
return true;
}
template<typename Fn>
ALWAYS_INLINE void foreach(Fn&& fn) const {
for (auto id : data_) fn(id);
}
[[nodiscard]] size_t size() const { return data_.size(); }
void clear() { data_.clear(); }
// 批量操作
void insert_batch(const player_id_t* ids, size_t count) {
data_.reserve(data_.size() + count);
for (size_t i = 0; i < count; ++i) {
data_.push_back(ids[i]);
}
}
};
// ==================== 方案2: 开放寻址哈希 (优化版) ====================
class HashSparseSet {
struct Node {
player_id_t id = 0; // 0 表示空槽
uint32_t dense_idx = 0; // 在 dense 数组中的索引
};
std::vector<Node> nodes_; // 哈希表(开放寻址)
std::vector<player_id_t> dense_; // 密集存储,用于快速遍历
std::vector<uint32_t> slot_of_; // dense -> slot 反向映射(用于快速删除)
uint32_t mask_ = 0; // 容量掩码(2^n - 1)
uint32_t size_ = 0; // 当前元素数量
// 二次探测序列: 0, 1, 3, 6, 10... (三角形数)
// 公式: i * (i + 1) / 2
static ALWAYS_INLINE uint32_t probe_offset(uint32_t i) {
return (i * (i + 1)) >> 1;
}
// MurmurHash3 风格的 64位哈希(更好的分布)
PURE static uint64_t hash(player_id_t x) {
// 混合位,减少连续ID的碰撞
x ^= x >> 33;
x *= 0xff51afd7ed558ccdULL;
x ^= x >> 33;
x *= 0xc4ceb9fe1a85ec53ULL;
x ^= x >> 33;
return x;
}
void grow() {
uint32_t old_cap = static_cast<uint32_t>(nodes_.size());
uint32_t new_cap = old_cap ? old_cap << 1 : 16;
// 保存旧数据
std::vector<Node> old_nodes = std::move(nodes_);
std::vector<player_id_t> old_dense = std::move(dense_);
// 扩容
nodes_.assign(new_cap, {});
mask_ = new_cap - 1;
dense_.clear();
dense_.reserve(new_cap >> 1); // 负载因子 0.5
slot_of_.clear();
slot_of_.reserve(new_cap >> 1);
size_ = 0;
// 重新插入
for (auto id : old_dense) {
if (id != 0) insert(id);
}
}
// 查找槽位:返回找到的槽位或空槽
// found 输出参数表示是否找到目标ID
ALWAYS_INLINE uint32_t find_slot(player_id_t id, bool& found) const {
found = false;
if (UNLIKELY(nodes_.empty())) return 0;
const uint32_t start = static_cast<uint32_t>(hash(id)) & mask_;
for (uint32_t i = 0; i <= mask_; ++i) {
const uint32_t slot = (start + probe_offset(i)) & mask_;
const player_id_t slot_id = nodes_[slot].id;
if (UNLIKELY(slot_id == 0)) {
return slot; // 空槽,插入位置
}
if (slot_id == id) {
found = true;
return slot; // 找到目标
}
}
return UINT32_MAX; // 表满(理论上不发生)
}
public:
explicit HashSparseSet(size_t reserve = 1000) {
uint32_t cap = 16;
while (cap < reserve * 2) cap <<= 1; // 负载因子 0.5
nodes_.assign(cap, {});
mask_ = cap - 1;
dense_.reserve(reserve);
slot_of_.reserve(reserve);
}
// 单次插入
ALWAYS_INLINE void insert(player_id_t id) {
if (UNLIKELY(id == 0)) return; // 0 保留为空槽标记
bool found;
uint32_t slot = find_slot(id, found);
if (UNLIKELY(slot == UINT32_MAX)) {
grow();
slot = find_slot(id, found);
}
if (found) return; // 已存在
// 新元素
uint32_t idx = static_cast<uint32_t>(dense_.size());
nodes_[slot] = {id, idx};
dense_.push_back(id);
slot_of_.push_back(slot);
++size_;
}
// 批量插入(优化版)- 预分配 + 减少重复计算
void insert_batch(const player_id_t* ids, size_t count) {
// 预检查容量
if (dense_.size() + count > (nodes_.size() >> 1)) {
uint32_t need = static_cast<uint32_t>(dense_.size() + count);
uint32_t new_cap = static_cast<uint32_t>(nodes_.size());
while ((new_cap >> 1) < need) new_cap <<= 1;
// 重建表
std::vector<Node> old_nodes = std::move(nodes_);
std::vector<player_id_t> old_dense = std::move(dense_);
nodes_.assign(new_cap, {});
mask_ = new_cap - 1;
dense_.clear();
dense_.reserve(need);
slot_of_.clear();
slot_of_.reserve(need);
size_ = 0;
// 先插入旧元素
for (auto id : old_dense) {
if (id != 0) insert(id);
}
}
// 批量插入新元素
for (size_t i = 0; i < count; ++i) {
insert(ids[i]);
}
}
ALWAYS_INLINE bool erase(player_id_t id) {
bool found;
uint32_t slot = find_slot(id, found);
if (!found) return false;
uint32_t idx = nodes_[slot].dense_idx;
uint32_t last_idx = static_cast<uint32_t>(dense_.size()) - 1;
// 标记为空槽
nodes_[slot].id = 0;
if (idx != last_idx) {
// 用末尾元素填充空洞
player_id_t last_id = dense_.back();
uint32_t last_slot = slot_of_[last_idx];
dense_[idx] = last_id;
slot_of_[idx] = last_slot;
nodes_[last_slot].dense_idx = idx;
}
dense_.pop_back();
slot_of_.pop_back();
--size_;
return true;
}
// 批量删除(优化:先标记再清理,或逐个处理)
void erase_batch(const player_id_t* ids, size_t count) {
// 简单实现:逐个删除
// 对于大量删除,可以优化为:标记删除 -> 重建 dense 数组
for (size_t i = 0; i < count; ++i) {
erase(ids[i]);
}
}
template<typename Fn>
ALWAYS_INLINE void foreach(Fn&& fn) const {
// 预取优化
for (size_t i = 0; i < dense_.size(); ++i) {
if (i + 4 < dense_.size()) {
__builtin_prefetch(&dense_[i + 4], 0, 3);
}
fn(dense_[i]);
}
}
[[nodiscard]] size_t size() const { return size_; }
[[nodiscard]] bool empty() const { return size_ == 0; }
void clear() {
// 快速清空:只重置使用的槽位
for (auto id : dense_) {
if (id != 0) {
bool found;
uint32_t slot = find_slot(id, found);
if (found) nodes_[slot].id = 0;
}
}
dense_.clear();
slot_of_.clear();
size_ = 0;
}
// 预分配接口
void reserve(size_t n) {
if (n > (nodes_.size() >> 1)) {
uint32_t need = static_cast<uint32_t>(n);
uint32_t new_cap = 16;
while ((new_cap >> 1) < need) new_cap <<= 1;
nodes_.assign(new_cap, {});
mask_ = new_cap - 1;
dense_.reserve(n);
slot_of_.reserve(n);
}
}
};
// ==================== 方案3: std::unordered_set (优化版) ====================
class StdHashSet {
std::unordered_set<player_id_t> set_;
public:
explicit StdHashSet(size_t reserve = 1000) {
set_.reserve(reserve * 2); // 考虑负载因子
}
ALWAYS_INLINE void insert(player_id_t id) { set_.insert(id); }
ALWAYS_INLINE bool erase(player_id_t id) { return set_.erase(id) > 0; }
template<typename Fn>
ALWAYS_INLINE void foreach(Fn&& fn) const {
for (auto id : set_) fn(id);
}
[[nodiscard]] size_t size() const { return set_.size(); }
void clear() { set_.clear(); }
void insert_batch(const player_id_t* ids, size_t count) {
set_.reserve(set_.size() + count);
for (size_t i = 0; i < count; ++i) {
set_.insert(ids[i]);
}
}
};
// ==================== 测试框架 ====================
struct Result {
double insert_us = 0, foreach_us = 0, erase_us = 0, total_us = 0;
double batch_insert_us = 0; // 新增批量测试
};
class Benchmark {
std::vector<player_id_t> ids_, del_;
int n_;
public:
Benchmark(int n, int seed) : n_(n) {
std::mt19937_64 rng(seed);
for (int i = 0; i < n; ++i) ids_.push_back(rng());
del_ = ids_;
std::shuffle(del_.begin(), del_.end(), rng);
}
template<typename Set>
Result run() {
Result r;
Set set(1000);
// 测试1: 单次插入
auto t0 = high_resolution_clock::now();
for (auto id : ids_) set.insert(id);
auto t1 = high_resolution_clock::now();
// 测试2: 遍历
volatile uint64_t sum = 0;
set.foreach([&](player_id_t id) { sum += id; });
auto t2 = high_resolution_clock::now();
// 测试3: 单次删除
for (auto id : del_) set.erase(id);
auto t3 = high_resolution_clock::now();
r.insert_us = duration<double, std::micro>(t1-t0).count();
r.foreach_us = duration<double, std::micro>(t2-t1).count();
r.erase_us = duration<double, std::micro>(t3-t2).count();
r.total_us = duration<double, std::micro>(t3-t0).count();
(void)sum;
return r;
}
// 批量操作测试
template<typename Set>
Result run_batch() {
Result r;
Set set(1000);
// 批量插入
auto t0 = high_resolution_clock::now();
set.insert_batch(ids_.data(), ids_.size());
auto t1 = high_resolution_clock::now();
// 遍历
volatile uint64_t sum = 0;
set.foreach([&](player_id_t id) { sum += id; });
auto t2 = high_resolution_clock::now();
// 批量删除(模拟)
set.clear();
auto t3 = high_resolution_clock::now();
r.batch_insert_us = duration<double, std::micro>(t1-t0).count();
r.foreach_us = duration<double, std::micro>(t2-t1).count();
r.total_us = duration<double, std::micro>(t3-t0).count();
(void)sum;
return r;
}
};
int main() {
constexpr int ROUNDS = 100;
std::cout << "=================================================================\n";
std::cout << "优化版对比: Vector vs 开放寻址哈希(二次探测) vs std::unordered_set\n";
std::cout << "1000 random uint64_t IDs, " << ROUNDS << " rounds\n";
std::cout << "=================================================================\n\n";
std::vector<Result> vec_res, open_res, std_res;
std::vector<Result> vec_batch, open_batch, std_batch;
for (int i = 0; i < ROUNDS; ++i) {
Benchmark bm(1000, i);
vec_res.push_back(bm.run<VectorSet>());
open_res.push_back(bm.run<HashSparseSet>());
std_res.push_back(bm.run<StdHashSet>());
vec_batch.push_back(bm.run_batch<VectorSet>());
open_batch.push_back(bm.run_batch<HashSparseSet>());
std_batch.push_back(bm.run_batch<StdHashSet>());
}
auto avg = [](auto& v, auto f) {
double s = 0;
for (auto& r : v) s += r.*f;
return s / v.size();
};
// 单次操作结果
std::cout << "--- 单次操作测试 ---\n";
const char* names[] = {"INSERT", "FOREACH", "ERASE", "TOTAL"};
auto ptrs = {&Result::insert_us, &Result::foreach_us, &Result::erase_us, &Result::total_us};
auto it = ptrs.begin();
for (const char* name : names) {
double va = avg(vec_res, *it);
double oa = avg(open_res, *it);
double sa = avg(std_res, *it);
std::cout << "[" << name << "]\n";
std::cout << " Vector: " << std::fixed << std::setprecision(2)
<< std::setw(10) << va << " us\n";
std::cout << " OpenHash: " << std::setw(10) << oa << " us";
if (name[0] == 'E' || name[0] == 'T')
std::cout << " (" << std::setprecision(2) << (va/oa) << "x vs Vector)";
else if (name[0] == 'I')
std::cout << " (" << std::setprecision(2) << (oa/va) << "x vs Vector)";
std::cout << "\n";
std::cout << " StdUnordered: " << std::setw(10) << sa << " us";
if (name[0] == 'E' || name[0] == 'T')
std::cout << " (" << std::setprecision(2) << (va/sa) << "x vs Vector)";
else if (name[0] == 'I')
std::cout << " (" << std::setprecision(2) << (sa/va) << "x vs Vector)";
std::cout << "\n\n";
++it;
}
// 批量操作结果
std::cout << "--- 批量操作测试 ---\n";
std::cout << "[BATCH INSERT]\n";
double vb = avg(vec_batch, &Result::batch_insert_us);
double ob = avg(open_batch, &Result::batch_insert_us);
double sb = avg(std_batch, &Result::batch_insert_us);
std::cout << " Vector: " << std::fixed << std::setprecision(2)
<< std::setw(10) << vb << " us\n";
std::cout << " OpenHash: " << std::setw(10) << ob << " us"
<< " (" << std::setprecision(2) << (ob/vb) << "x vs Vector)\n";
std::cout << " StdUnordered: " << std::setw(10) << sb << " us"
<< " (" << std::setprecision(2) << (sb/vb) << "x vs Vector)\n";
return 0;
}
测试结果(O3编译):
=================================================================
优化版对比: Vector vs 开放寻址哈希(二次探测) vs std::unordered_set
1000 random uint64_t IDs, 100 rounds
=================================================================
--- 单次操作测试 ---
[INSERT]
Vector: 0.37 us
OpenHash: 7.98 us (21.73x vs Vector)
StdUnordered: 22.21 us (60.51x vs Vector)
[FOREACH]
Vector: 0.40 us
OpenHash: 0.57 us
StdUnordered: 1.51 us
[ERASE]
Vector: 49.77 us
OpenHash: 6.88 us (7.23x vs Vector)
StdUnordered: 16.29 us (3.06x vs Vector)
[TOTAL]
Vector: 50.54 us
OpenHash: 15.43 us (3.28x vs Vector)
StdUnordered: 40.01 us (1.26x vs Vector)
优化版本
// 极致性能优化版稀疏集:Robin Hood哈希 + 软件预取 + 缓存行优化
#include <cstdint>
#include <vector>
#include <algorithm>
#include <random>
#include <chrono>
#include <iostream>
#include <iomanip>
#include <unordered_set>
#include <cstring>
#include <cassert>
#ifdef __GNUC__
#define ALWAYS_INLINE __attribute__((always_inline))
#define PURE __attribute__((pure))
#define HOT __attribute__((hot))
#define LIKELY(x) __builtin_expect(!!(x), 1)
#define UNLIKELY(x) __builtin_expect(!!(x), 0)
#define PREFETCH(addr, rw, locality) __builtin_prefetch(addr, rw, locality)
#else
#define ALWAYS_INLINE
#define PURE
#define HOT
#define LIKELY(x) (x)
#define UNLIKELY(x) (x)
#define PREFETCH(addr, rw, locality)
#endif
using player_id_t = uint64_t;
using namespace std::chrono;
// ==================== 极致优化版:Robin Hood稀疏集 ====================
//
// 核心优化策略(基于[^8^][^13^]业界最佳实践):
// 1. Robin Hood哈希:控制最大探测长度,保证O(1)最坏情况
// 2. 分离存储:稀疏数组(sparse)只存索引,密集数组(dense)存实际ID
// 3. 软件预取:遍历前预取缓存行,减少内存延迟
// 4. 缓存行对齐:确保dense数组按64字节边界对齐
// 5. 零拷贝删除:swap-and-pop避免内存移动
class alignas(64) RobinHoodSparseSet {
public:
// 节点结构:8字节ID + 4字节密集索引 + 4字节距离 = 16字节(缓存行对齐友好)
struct Node {
player_id_t id; // 8 bytes: 存储的ID(0表示空槽)
uint32_t dense_idx; // 4 bytes: 在dense数组中的位置
uint32_t dist; // 4 bytes: 距离理想位置的距离(Robin Hood关键)
bool empty() const { return id == 0; }
};
static_assert(sizeof(Node) == 16, "Node size should be 16 bytes for cache efficiency");
private:
std::vector<Node> nodes_; // 稀疏数组:哈希表本体
std::vector<player_id_t> dense_; // 密集数组:连续存储所有有效ID
std::vector<uint32_t> sparse_to_dense_; // 反向映射:dense索引 -> 稀疏槽位
uint32_t mask_ = 0; // 容量掩码(2^n - 1)
uint32_t size_ = 0; // 当前元素数量
uint32_t max_dist_ = 0; // 当前最大探测距离(用于快速失败查找)
// 哈希函数:SplitMix64 - 高质量且快速
PURE static ALWAYS_INLINE uint64_t hash(player_id_t x) {
// SplitMix64算法:比MurmurHash更快,分布质量足够好
x += 0x9e3779b97f4a7c15ULL;
x = (x ^ (x >> 30)) * 0xbf58476d1ce4e5b9ULL;
x = (x ^ (x >> 27)) * 0x94d049bb133111ebULL;
return x ^ (x >> 31);
}
// 计算理想位置
PURE ALWAYS_INLINE uint32_t ideal_slot(player_id_t id) const {
return static_cast<uint32_t>(hash(id)) & mask_;
}
// 扩容并重建(保留旧元素)
HOT void grow() {
uint32_t old_cap = mask_ ? mask_ + 1 : 0;
uint32_t new_cap = old_cap ? old_cap << 1 : 16;
// 保存旧数据
std::vector<player_id_t> old_dense = std::move(dense_);
// 重置结构
nodes_.assign(new_cap, {});
mask_ = new_cap - 1;
dense_.clear();
dense_.reserve(new_cap >> 1); // 负载因子0.5
sparse_to_dense_.clear();
sparse_to_dense_.reserve(new_cap >> 1);
size_ = 0;
max_dist_ = 0;
// 重新插入所有元素(此时会重建Robin Hood顺序)
for (auto id : old_dense) {
if (id != 0) insert_robin_hood(id);
}
}
// Robin Hood插入核心:富者愈富,穷者愈穷
HOT void insert_robin_hood(player_id_t id) {
assert(id != 0 && "ID 0 is reserved for empty slot");
uint32_t slot = ideal_slot(id);
uint32_t dist = 0;
Node insert_node{id, 0, 0}; // dense_idx会在后面设置
while (true) {
Node& current = nodes_[slot];
if (current.empty()) {
// 找到空槽,放置元素
uint32_t idx = static_cast<uint32_t>(dense_.size());
insert_node.dense_idx = idx;
insert_node.dist = dist;
current = insert_node;
dense_.push_back(id);
sparse_to_dense_.push_back(slot);
if (dist > max_dist_) max_dist_ = dist;
++size_;
return;
}
if (current.id == id) {
return; // 已存在,不重复插入
}
// Robin Hood交换:如果当前元素比新元素"更富"(离理想位置更近)
if (current.dist < dist) {
// 新元素更"穷",抢夺这个位置
std::swap(current, insert_node);
dist = current.dist; // 继续携带被交换的元素
insert_node.dist = dist;
}
// 继续探测
++dist;
slot = (slot + 1) & mask_;
// 安全检查:理论上不会发生,因为负载因子0.5
if (UNLIKELY(dist > mask_)) {
grow();
insert_robin_hood(insert_node.id);
return;
}
}
}
public:
explicit RobinHoodSparseSet(size_t reserve = 1000) {
uint32_t cap = 16;
while (cap < reserve * 2) cap <<= 1; // 保证负载因子0.5
nodes_.assign(cap, {});
mask_ = cap - 1;
dense_.reserve(reserve);
sparse_to_dense_.reserve(reserve);
}
// 单次插入
HOT ALWAYS_INLINE void insert(player_id_t id) {
if (UNLIKELY(id == 0)) return;
// 快速路径:检查是否需要扩容(负载因子0.75触发)
if (UNLIKELY(size_ >= (mask_ + 1) * 0.75f)) {
grow();
}
insert_robin_hood(id);
}
// 批量插入:预分配 + 减少哈希计算
HOT void insert_batch(const player_id_t* ids, size_t count) {
if (UNLIKELY(count == 0)) return;
// 预检查容量,一次性扩容
uint32_t need = static_cast<uint32_t>(size_ + count);
if (need > (mask_ + 1) * 0.75f) {
uint32_t new_cap = mask_ + 1;
while (new_cap < need * 2) new_cap <<= 1;
std::vector<Node> old_nodes = std::move(nodes_);
std::vector<player_id_t> old_dense = std::move(dense_);
nodes_.assign(new_cap, {});
mask_ = new_cap - 1;
dense_.clear();
dense_.reserve(need);
sparse_to_dense_.clear();
sparse_to_dense_.reserve(need);
size_ = 0;
max_dist_ = 0;
// 先插入旧元素
for (auto old_id : old_dense) {
if (old_id != 0) insert_robin_hood(old_id);
}
}
// 批量插入新元素
for (size_t i = 0; i < count; ++i) {
if (ids[i] != 0) insert_robin_hood(ids[i]);
}
}
// 查找(利用max_dist_快速失败)
PURE ALWAYS_INLINE bool contains(player_id_t id) const {
if (UNLIKELY(id == 0 || size_ == 0)) return false;
uint32_t slot = ideal_slot(id);
// 最多探测max_dist_ + 1次
for (uint32_t dist = 0; dist <= max_dist_; ++dist) {
const Node& node = nodes_[slot];
if (node.empty()) return false;
if (node.id == id) return true;
if (node.dist < dist) return false; // Robin Hood性质:不可能在后面
slot = (slot + 1) & mask_;
}
return false;
}
// 删除:swap-and-pop(零拷贝)
HOT ALWAYS_INLINE bool erase(player_id_t id) {
if (UNLIKELY(id == 0 || size_ == 0)) return false;
uint32_t slot = ideal_slot(id);
for (uint32_t dist = 0; dist <= max_dist_; ++dist) {
Node& node = nodes_[slot];
if (node.empty()) return false;
if (node.id == id) {
// 找到目标,执行删除
uint32_t idx = node.dense_idx;
uint32_t last_idx = static_cast<uint32_t>(dense_.size()) - 1;
// 标记为空槽
node.id = 0;
node.dist = 0;
if (idx != last_idx) {
// swap-and-pop:用末尾元素填充空洞
player_id_t last_id = dense_.back();
uint32_t last_slot = sparse_to_dense_[last_idx];
dense_[idx] = last_id;
sparse_to_dense_[idx] = last_slot;
nodes_[last_slot].dense_idx = idx;
}
dense_.pop_back();
sparse_to_dense_.pop_back();
--size_;
// 可选:压缩max_dist_(延迟更新以节省CPU)
return true;
}
if (node.dist < dist) return false;
slot = (slot + 1) & mask_;
}
return false;
}
// 极致优化的遍历:软件预取 + 循环展开
template<typename Fn>
HOT ALWAYS_INLINE void foreach(Fn&& fn) const {
const size_t n = dense_.size();
if (n == 0) return;
const player_id_t* data = dense_.data();
// 主循环:4路展开 + 软件预取
size_t i = 0;
for (; i + 4 <= n; i += 4) {
// 预取未来4个缓存行(假设每个ID 8字节,4个ID 32字节,小于64字节缓存行)
PREFETCH(&data[i + 8], 0, 3); // 预取8个元素后的数据
fn(data[i]);
fn(data[i + 1]);
fn(data[i + 2]);
fn(data[i + 3]);
}
// 处理剩余元素
for (; i < n; ++i) {
fn(data[i]);
}
}
// 带索引的遍历(用于需要知道位置的场景)
template<typename Fn>
HOT ALWAYS_INLINE void foreach_indexed(Fn&& fn) const {
const size_t n = dense_.size();
for (size_t i = 0; i < n; ++i) {
fn(i, dense_[i]);
}
}
// 快速清空:只重置使用的槽位
HOT void clear() {
// 只遍历dense数组,标记对应的稀疏槽位
for (auto id : dense_) {
if (id != 0) {
uint32_t slot = ideal_slot(id);
// 由于Robin Hood性质,需要找到实际位置
for (uint32_t dist = 0; dist <= max_dist_; ++dist) {
if (nodes_[slot].id == id) {
nodes_[slot].id = 0;
nodes_[slot].dist = 0;
break;
}
slot = (slot + 1) & mask_;
}
}
}
dense_.clear();
sparse_to_dense_.clear();
size_ = 0;
max_dist_ = 0;
}
// 预分配接口
void reserve(size_t n) {
if (n > (mask_ + 1) * 0.5f) {
uint32_t new_cap = 16;
while (new_cap < n * 2) new_cap <<= 1;
std::vector<Node> old_nodes = std::move(nodes_);
std::vector<player_id_t> old_dense = std::move(dense_);
nodes_.assign(new_cap, {});
mask_ = new_cap - 1;
dense_.clear();
dense_.reserve(n);
sparse_to_dense_.clear();
sparse_to_dense_.reserve(n);
size_ = 0;
max_dist_ = 0;
for (auto id : old_dense) {
if (id != 0) insert_robin_hood(id);
}
}
}
[[nodiscard]] size_t size() const { return size_; }
[[nodiscard]] bool empty() const { return size_ == 0; }
[[nodiscard]] size_t capacity() const { return dense_.capacity(); }
// 获取原始密集数组指针(用于外部批量处理)
const player_id_t* data() const { return dense_.data(); }
};
// ==================== 对比方案:原始Vector ====================
class VectorSet {
std::vector<player_id_t> data_;
public:
explicit VectorSet(size_t reserve = 1000) { data_.reserve(reserve); }
ALWAYS_INLINE void insert(player_id_t id) { data_.push_back(id); }
ALWAYS_INLINE bool erase(player_id_t id) {
auto it = std::find(data_.begin(), data_.end(), id);
if (it == data_.end()) return false;
*it = data_.back();
data_.pop_back();
return true;
}
ALWAYS_INLINE bool contains(player_id_t id) const {
return std::find(data_.begin(), data_.end(), id) != data_.end();
}
template<typename Fn>
ALWAYS_INLINE void foreach(Fn&& fn) const {
for (auto id : data_) fn(id);
}
void insert_batch(const player_id_t* ids, size_t count) {
data_.reserve(data_.size() + count);
for (size_t i = 0; i < count; ++i) data_.push_back(ids[i]);
}
[[nodiscard]] size_t size() const { return data_.size(); }
void clear() { data_.clear(); }
};
// ==================== 对比方案:std::unordered_set ====================
class StdHashSet {
std::unordered_set<player_id_t> set_;
public:
explicit StdHashSet(size_t reserve = 1000) {
set_.reserve(reserve * 2);
}
ALWAYS_INLINE void insert(player_id_t id) { set_.insert(id); }
ALWAYS_INLINE bool erase(player_id_t id) { return set_.erase(id) > 0; }
ALWAYS_INLINE bool contains(player_id_t id) const {
return set_.find(id) != set_.end();
}
template<typename Fn>
ALWAYS_INLINE void foreach(Fn&& fn) const {
for (auto id : set_) fn(id);
}
void insert_batch(const player_id_t* ids, size_t count) {
set_.reserve(set_.size() + count);
for (size_t i = 0; i < count; ++i) set_.insert(ids[i]);
}
[[nodiscard]] size_t size() const { return set_.size(); }
void clear() { set_.clear(); }
};
// ==================== 测试框架 ====================
struct Result {
double insert_us = 0, foreach_us = 0, erase_us = 0, total_us = 0;
double batch_insert_us = 0;
double lookup_us = 0; // 新增查找测试
};
class Benchmark {
std::vector<player_id_t> ids_, del_, lookup_;
int n_;
public:
Benchmark(int n, int seed) : n_(n) {
std::mt19937_64 rng(seed);
for (int i = 0; i < n; ++i) ids_.push_back(rng() | 1); // 确保非0
del_ = ids_;
std::shuffle(del_.begin(), del_.end(), rng);
// 生成查找用ID(一半存在,一半不存在)
for (int i = 0; i < n; ++i) {
if (i % 2 == 0) lookup_.push_back(ids_[i]);
else lookup_.push_back(rng() | 1);
}
std::shuffle(lookup_.begin(), lookup_.end(), rng);
}
template<typename Set>
Result run() {
Result r;
Set set(1000);
// 插入
auto t0 = high_resolution_clock::now();
for (auto id : ids_) set.insert(id);
auto t1 = high_resolution_clock::now();
// 遍历
volatile uint64_t sum = 0;
set.foreach([&](player_id_t id) { sum += id; });
auto t2 = high_resolution_clock::now();
// 查找(50%命中率)
volatile bool found = false;
for (auto id : lookup_) found = set.contains(id);
auto t3 = high_resolution_clock::now();
// 删除
for (auto id : del_) set.erase(id);
auto t4 = high_resolution_clock::now();
r.insert_us = duration<double, std::micro>(t1-t0).count();
r.foreach_us = duration<double, std::micro>(t2-t1).count();
r.lookup_us = duration<double, std::micro>(t3-t2).count();
r.erase_us = duration<double, std::micro>(t4-t3).count();
r.total_us = duration<double, std::micro>(t4-t0).count();
(void)sum;
(void)found;
return r;
}
template<typename Set>
Result run_batch() {
Result r;
Set set(1000);
// 批量插入
auto t0 = high_resolution_clock::now();
set.insert_batch(ids_.data(), ids_.size());
auto t1 = high_resolution_clock::now();
// 遍历
volatile uint64_t sum = 0;
set.foreach([&](player_id_t id) { sum += id; });
auto t2 = high_resolution_clock::now();
r.batch_insert_us = duration<double, std::micro>(t1-t0).count();
r.foreach_us = duration<double, std::micro>(t2-t1).count();
r.total_us = duration<double, std::micro>(t2-t0).count();
(void)sum;
return r;
}
};
int main() {
constexpr int ROUNDS = 100;
std::cout << "=================================================================\n";
std::cout << "极致优化版对比: Vector vs RobinHood稀疏集 vs std::unordered_set\n";
std::cout << "1000 random uint64_t IDs, " << ROUNDS << " rounds\n";
std::cout << "=================================================================\n\n";
std::vector<Result> vec_res, robin_res, std_res;
std::vector<Result> vec_batch, robin_batch, std_batch;
for (int i = 0; i < ROUNDS; ++i) {
Benchmark bm(1000, i);
vec_res.push_back(bm.run<VectorSet>());
robin_res.push_back(bm.run<RobinHoodSparseSet>());
std_res.push_back(bm.run<StdHashSet>());
vec_batch.push_back(bm.run_batch<VectorSet>());
robin_batch.push_back(bm.run_batch<RobinHoodSparseSet>());
std_batch.push_back(bm.run_batch<StdHashSet>());
}
auto avg = [](auto& v, auto f) {
double s = 0;
for (auto& r : v) s += r.*f;
return s / v.size();
};
// 单次操作结果
std::cout << "--- 单次操作测试 ---\n";
const char* names[] = {"INSERT", "FOREACH", "LOOKUP", "ERASE", "TOTAL"};
auto ptrs = {&Result::insert_us, &Result::foreach_us, &Result::lookup_us, &Result::erase_us, &Result::total_us};
auto it = ptrs.begin();
for (const char* name : names) {
double va = avg(vec_res, *it);
double ra = avg(robin_res, *it);
double sa = avg(std_res, *it);
std::cout << "[" << name << "]\n";
std::cout << " Vector: " << std::fixed << std::setprecision(2)
<< std::setw(10) << va << " us\n";
std::cout << " RobinHood: " << std::setw(10) << ra << " us";
if (va > 0.01) {
double speedup = va / ra;
std::cout << " (" << std::setprecision(2) << speedup << "x vs Vector)";
}
std::cout << "\n";
std::cout << " StdUnordered: " << std::setw(10) << sa << " us";
if (va > 0.01) {
double speedup = va / sa;
std::cout << " (" << std::setprecision(2) << speedup << "x vs Vector)";
}
std::cout << "\n\n";
++it;
}
// 批量操作结果
std::cout << "--- 批量操作测试 ---\n";
std::cout << "[BATCH INSERT + FOREACH]\n";
double vb = avg(vec_batch, &Result::total_us);
double rb = avg(robin_batch, &Result::total_us);
double sb = avg(std_batch, &Result::total_us);
std::cout << " Vector: " << std::fixed << std::setprecision(2)
<< std::setw(10) << vb << " us\n";
std::cout << " RobinHood: " << std::setw(10) << rb << " us"
<< " (" << std::setprecision(2) << (vb/rb) << "x vs Vector)\n";
std::cout << " StdUnordered: " << std::setw(10) << sb << " us"
<< " (" << std::setprecision(2) << (vb/sb) << "x vs Vector)\n";
return 0;
}
优化后的结果(O3编译):
=================================================================
极致优化版对比: Vector vs RobinHood稀疏集 vs std::unordered_set
1000 random uint64_t IDs, 100 rounds
=================================================================
--- 单次操作测试 ---
[INSERT]
Vector: 0.35 us
RobinHood: 9.79 us (0.04x vs Vector)
StdUnordered: 18.59 us (0.02x vs Vector)
[FOREACH]
Vector: 0.39 us
RobinHood: 0.40 us (0.98x vs Vector)
StdUnordered: 1.49 us (0.26x vs Vector)
[LOOKUP]
Vector: 125.58 us
RobinHood: 12.22 us (10.28x vs Vector)
StdUnordered: 13.15 us (9.55x vs Vector)
[ERASE]
Vector: 49.28 us
RobinHood: 7.03 us (7.01x vs Vector)
StdUnordered: 13.78 us (3.58x vs Vector)
[TOTAL]
Vector: 175.60 us
RobinHood: 29.44 us (5.96x vs Vector)
StdUnordered: 47.00 us (3.74x vs Vector)
--- 批量操作测试 ---
[BATCH INSERT + FOREACH]
Vector: 0.73 us
RobinHood: 8.54 us (0.09x vs Vector)
StdUnordered: 25.00 us (0.03x vs Vector)
极致优化的源码:
// 精简版:只保留最优insert和erase,动态扩容,无内存浪费
#include <cstdint>
#include <vector>
#include <algorithm>
#include <random>
#include <chrono>
#include <iostream>
#include <iomanip>
#include <unordered_set>
#ifdef __GNUC__
#define ALWAYS_INLINE __attribute__((always_inline))
#define LIKELY(x) __builtin_expect(!!(x), 1)
#define UNLIKELY(x) __builtin_expect(!!(x), 0)
#define PREFETCH(addr, rw, locality) __builtin_prefetch(addr, rw, locality)
#else
#define ALWAYS_INLINE
#define LIKELY(x) (x)
#define UNLIKELY(x) (x)
#define PREFETCH(addr, rw, locality)
#endif
using player_id_t = uint64_t;
using namespace std::chrono;
// ==================== 动态稀疏集:最优insert + 最优erase ====================
class alignas(64) SparseSet {
static constexpr player_id_t EMPTY = UINT64_MAX;
struct Node {
player_id_t id = EMPTY;
uint32_t dense_idx = 0;
};
std::vector<Node> nodes_;
std::vector<player_id_t> dense_;
std::vector<uint32_t> slot_of_;
uint32_t mask_ = 0;
uint32_t size_ = 0;
static ALWAYS_INLINE uint64_t hash(player_id_t x) {
x ^= x >> 33;
x *= 0xff51afd7ed558ccdULL;
x ^= x >> 33;
x *= 0xc4ceb9fe1a85ec53ULL;
return x ^ (x >> 33);
}
ALWAYS_INLINE uint32_t ideal_slot(player_id_t id) const {
return static_cast<uint32_t>(hash(id)) & mask_;
}
void rebuild(uint32_t new_cap) {
std::vector<Node> old_nodes = std::move(nodes_);
std::vector<player_id_t> old_dense = std::move(dense_);
nodes_.assign(new_cap, Node{EMPTY, 0});
mask_ = new_cap - 1;
dense_.clear();
dense_.reserve(new_cap >> 1);
slot_of_.clear();
slot_of_.reserve(new_cap >> 1);
size_ = 0;
for (auto id : old_dense) {
if (id != EMPTY) insert(id);
}
}
public:
SparseSet() {
nodes_.assign(16, Node{EMPTY, 0});
mask_ = 15;
dense_.reserve(8);
slot_of_.reserve(8);
}
explicit SparseSet(uint32_t reserve) {
uint32_t cap = 16;
while (cap < reserve * 2) cap <<= 1;
nodes_.assign(cap, Node{EMPTY, 0});
mask_ = cap - 1;
dense_.reserve(reserve);
slot_of_.reserve(reserve);
}
// 最优insert:Robin Hood哈希 + 动态扩容
ALWAYS_INLINE void insert(player_id_t id) {
if (UNLIKELY(id == EMPTY)) return;
// 扩容检查
if (UNLIKELY(size_ >= (mask_ + 1) * 0.75f)) {
rebuild((mask_ + 1) << 1);
}
uint32_t slot = ideal_slot(id);
uint32_t dist = 0;
Node insert_node{id, 0};
while (true) {
Node& cur = nodes_[slot];
// 空槽或已存在
if (cur.id == EMPTY) {
uint32_t idx = size_;
insert_node.dense_idx = idx;
cur = insert_node;
dense_.push_back(id);
slot_of_.push_back(slot);
++size_;
return;
}
if (cur.id == id) return; // 已存在
// Robin Hood:距离远的抢占位置
uint32_t cur_dist = (slot - ideal_slot(cur.id)) & mask_;
if (cur_dist < dist) {
std::swap(cur, insert_node);
dist = cur_dist;
}
slot = (slot + 1) & mask_;
++dist;
}
}
// 最优erase:查找 + swap-and-pop
ALWAYS_INLINE bool erase(player_id_t id) {
if (UNLIKELY(id == EMPTY || size_ == 0)) return false;
uint32_t slot = ideal_slot(id);
// 线性探测查找
while (nodes_[slot].id != EMPTY) {
Node& cur = nodes_[slot];
if (cur.id == id) {
uint32_t idx = cur.dense_idx;
uint32_t last = size_ - 1;
// swap-and-pop:末尾元素填充空洞
if (idx != last) {
player_id_t last_id = dense_[last];
uint32_t last_slot = slot_of_[last];
dense_[idx] = last_id;
slot_of_[idx] = last_slot;
nodes_[last_slot].dense_idx = idx;
}
dense_.pop_back();
slot_of_.pop_back();
cur.id = EMPTY;
--size_;
return true;
}
slot = (slot + 1) & mask_;
}
return false;
}
// 遍历(预取优化)
template<typename Fn>
ALWAYS_INLINE void foreach(Fn&& fn) const {
const uint32_t n = size_;
if (n == 0) return;
const player_id_t* d = dense_.data();
uint32_t i = 0;
PREFETCH(&d[0], 0, 3);
PREFETCH(&d[16], 0, 3);
// 8路展开
for (; i + 8 <= n; i += 8) {
PREFETCH(&d[i + 32], 0, 3);
fn(d[i]); fn(d[i+1]); fn(d[i+2]); fn(d[i+3]);
fn(d[i+4]); fn(d[i+5]); fn(d[i+6]); fn(d[i+7]);
}
for (; i < n; ++i) fn(d[i]);
}
// 查找
ALWAYS_INLINE bool contains(player_id_t id) const {
if (UNLIKELY(id == EMPTY || size_ == 0)) return false;
uint32_t slot = ideal_slot(id);
while (nodes_[slot].id != EMPTY) {
if (nodes_[slot].id == id) return true;
slot = (slot + 1) & mask_;
}
return false;
}
void clear() {
for (uint32_t i = 0; i < size_; ++i) {
nodes_[slot_of_[i]].id = EMPTY;
}
dense_.clear();
slot_of_.clear();
size_ = 0;
}
void reserve(uint32_t n) {
if (n > (mask_ + 1) * 0.5f) {
uint32_t new_cap = 16;
while (new_cap < n * 2) new_cap <<= 1;
rebuild(new_cap);
}
dense_.reserve(n);
slot_of_.reserve(n);
}
uint32_t size() const { return size_; }
bool empty() const { return size_ == 0; }
uint32_t capacity() const { return dense_.capacity(); }
const player_id_t* data() const { return dense_.data(); }
};
// ==================== 对比:Vector ====================
class VectorSet {
std::vector<player_id_t> data_;
public:
explicit VectorSet(size_t reserve = 1000) { data_.reserve(reserve); }
ALWAYS_INLINE void insert(player_id_t id) { data_.push_back(id); }
ALWAYS_INLINE bool erase(player_id_t id) {
auto it = std::find(data_.begin(), data_.end(), id);
if (it == data_.end()) return false;
*it = data_.back();
data_.pop_back();
return true;
}
template<typename Fn>
ALWAYS_INLINE void foreach(Fn&& fn) const {
for (auto id : data_) fn(id);
}
ALWAYS_INLINE bool contains(player_id_t id) const {
return std::find(data_.begin(), data_.end(), id) != data_.end();
}
size_t size() const { return data_.size(); }
uint32_t capacity() const { return data_.capacity(); }
void clear() { data_.clear(); }
void reserve(size_t n) { data_.reserve(n); }
};
// ==================== 对比:std::unordered_set ====================
class StdHashSet {
std::unordered_set<player_id_t> set_;
public:
explicit StdHashSet(size_t reserve = 1000) {
set_.reserve(reserve * 2);
}
ALWAYS_INLINE void insert(player_id_t id) { set_.insert(id); }
ALWAYS_INLINE bool erase(player_id_t id) { return set_.erase(id) > 0; }
template<typename Fn>
ALWAYS_INLINE void foreach(Fn&& fn) const {
for (auto id : set_) fn(id);
}
ALWAYS_INLINE bool contains(player_id_t id) const {
return set_.find(id) != set_.end();
}
size_t size() const { return set_.size(); }
void clear() { set_.clear(); }
void reserve(size_t n) { set_.reserve(n * 2); }
};
// ==================== 测试 ====================
struct Result {
double insert_us = 0, foreach_us = 0, erase_us = 0, total_us = 0;
double lookup_us = 0;
};
class Benchmark {
std::vector<player_id_t> ids_, del_, lookup_;
int n_;
public:
Benchmark(int n, int seed) : n_(n) {
std::mt19937_64 rng(seed);
for (int i = 0; i < n; ++i) {
player_id_t id = (rng() % (UINT64_MAX - 2)) + 1;
ids_.push_back(id);
}
del_ = ids_;
std::shuffle(del_.begin(), del_.end(), rng);
for (int i = 0; i < n; ++i) {
if (i % 2 == 0) lookup_.push_back(ids_[i]);
else lookup_.push_back((rng() % (UINT64_MAX - 2)) + 1);
}
std::shuffle(lookup_.begin(), lookup_.end(), rng);
}
template<typename Set>
Result run() {
Result r;
Set set;
set.reserve(n_);
auto t0 = high_resolution_clock::now();
for (auto id : ids_) set.insert(id);
auto t1 = high_resolution_clock::now();
volatile uint64_t sum = 0;
set.foreach([&](player_id_t id) { sum += id; });
auto t2 = high_resolution_clock::now();
volatile bool found = false;
for (auto id : lookup_) found = set.contains(id);
auto t3 = high_resolution_clock::now();
for (auto id : del_) set.erase(id);
auto t4 = high_resolution_clock::now();
r.insert_us = duration<double, std::micro>(t1-t0).count();
r.foreach_us = duration<double, std::micro>(t2-t1).count();
r.lookup_us = duration<double, std::micro>(t3-t2).count();
r.erase_us = duration<double, std::micro>(t4-t3).count();
r.total_us = duration<double, std::micro>(t4-t0).count();
(void)sum;
(void)found;
return r;
}
};
int main() {
constexpr int ROUNDS = 100;
std::cout << "=============================================================\n";
std::cout << "SparseSet vs Vector vs std::unordered_set\n";
std::cout << "只保留最优insert和erase,动态扩容,无内存浪费\n";
std::cout << "=============================================================\n\n";
for (int n : {100, 500, 1000, 2000}) {
std::cout << "--- 规模: " << n << " 人 ---\n";
std::vector<Result> vec_res, sp_res, std_res;
for (int i = 0; i < ROUNDS; ++i) {
Benchmark bm(n, i);
vec_res.push_back(bm.run<VectorSet>());
sp_res.push_back(bm.run<SparseSet>());
std_res.push_back(bm.run<StdHashSet>());
}
auto avg = [](auto& v, auto f) {
double s = 0;
for (auto& r : v) s += r.*f;
return s / v.size();
};
auto print = [&](const char* name, const std::vector<Result>& res) {
std::cout << " " << std::left << std::setw(10) << name
<< "插入:" << std::fixed << std::setprecision(2) << std::setw(8) << avg(res, &Result::insert_us)
<< " 遍历:" << std::setw(8) << avg(res, &Result::foreach_us)
<< " 查找:" << std::setw(8) << avg(res, &Result::lookup_us)
<< " 删除:" << std::setw(8) << avg(res, &Result::erase_us)
<< " 总计:" << std::setw(8) << avg(res, &Result::total_us) << " us\n";
};
print("Vector:", vec_res);
print("SparseSet:", sp_res);
print("StdHash:", std_res);
double vec_total = avg(vec_res, &Result::total_us);
double sp_total = avg(sp_res, &Result::total_us);
std::cout << " SparseSet vs Vector: " << std::setprecision(2) << (vec_total/sp_total) << "x\n\n";
}
// 内存效率对比
std::cout << "--- 内存效率 ---\n";
{
SparseSet sp;
VectorSet vec;
for (int i = 0; i < 100; ++i) {
sp.insert(i + 1);
vec.insert(i + 1);
}
std::cout << "插入100人:\n";
std::cout << " SparseSet容量: " << sp.capacity() << "\n";
std::cout << " Vector容量: " << vec.capacity() << "\n";
for (int i = 0; i < 90; ++i) {
sp.erase(i + 1);
vec.erase(i + 1);
}
std::cout << "删除90人后(剩10人):\n";
std::cout << " SparseSet容量: " << sp.capacity() << "\n";
std::cout << " Vector容量: " << vec.capacity() << "\n";
}
return 0;
}
极致优化的结果:
=============================================================
SparseSet vs Vector vs std::unordered_set
只保留最优insert和erase,动态扩容,无内存浪费
=============================================================
--- 规模: 100 人 ---
Vector: 插入:0.10 遍历:0.20 查找:2.94 删除:2.38 总计:5.62 us
SparseSet:插入:1.12 遍历:0.11 查找:2.01 删除:0.99 总计:4.23 us
StdHash: 插入:3.64 遍历:0.25 查找:2.12 删除:2.46 总计:8.49 us
SparseSet vs Vector: 1.33x
--- 规模: 500 人 ---
Vector: 插入:0.20 遍历:0.25 查找:34.96 删除:15.06 总计:50.46 us
SparseSet:插入:4.17 遍历:0.22 查找:5.92 删除:3.43 总计:13.74 us
StdHash: 插入:10.28 遍历:0.66 查找:6.36 删除:6.95 总计:24.25 us
SparseSet vs Vector: 3.67x
--- 规模: 1000 人 ---
Vector: 插入:0.34 遍历:0.37 查找:119.32 删除:46.56 总计:166.60 us
SparseSet:插入:7.91 遍历:0.36 查找:11.10 删除:6.44 总计:25.81 us
StdHash: 插入:18.09 遍历:1.42 查找:12.55 删除:13.38 总计:45.43 us
SparseSet vs Vector: 6.45x
--- 规模: 2000 人 ---
Vector: 插入:0.59 遍历:0.64 查找:444.43 删除:164.40 总计:610.06 us
SparseSet:插入:15.59 遍历:0.69 查找:23.29 删除:13.09 总计:52.66 us
StdHash: 插入:35.29 遍历:3.54 查找:25.72 删除:27.27 总计:91.82 us
SparseSet vs Vector: 11.58x
--- 内存效率 ---
插入100人:
SparseSet容量: 128
Vector容量: 1000
删除90人后(剩10人):
SparseSet容量: 128
Vector容量: 1000
最终稀疏集类头文件:
// sparse_set.h
// 高性能稀疏集头文件,支持uint64_t、void*等类型
// ygluu, ai
#pragma once
#include <cstdint>
#include <vector>
#include <type_traits>
#include <cstring>
#ifdef __GNUC__
#define SPARSE_ALWAYS_INLINE __attribute__((always_inline))
#define SPARSE_LIKELY(x) __builtin_expect(!!(x), 1)
#define SPARSE_UNLIKELY(x) __builtin_expect(!!(x), 0)
#define SPARSE_PREFETCH(addr, rw, locality) __builtin_prefetch(addr, rw, locality)
#else
#define SPARSE_ALWAYS_INLINE
#define SPARSE_LIKELY(x) (x)
#define SPARSE_UNLIKELY(x) (x)
#define SPARSE_PREFETCH(addr, rw, locality)
#endif
namespace sset {
// 类型萃取:获取整数表示类型
template<typename T>
struct id_traits {
using int_type = T;
static constexpr int_type EMPTY = static_cast<int_type>(-1);
static SPARSE_ALWAYS_INLINE int_type to_int(T val) { return val; }
static SPARSE_ALWAYS_INLINE T from_int(int_type val) { return val; }
};
// void*特化
template<>
struct id_traits<void*> {
using int_type = uintptr_t;
static constexpr int_type EMPTY = static_cast<int_type>(-1);
static SPARSE_ALWAYS_INLINE int_type to_int(void* val) {
return reinterpret_cast<int_type>(val);
}
static SPARSE_ALWAYS_INLINE void* from_int(int_type val) {
return reinterpret_cast<void*>(val);
}
};
// 其他指针类型特化
template<typename T>
struct id_traits<T*> {
using int_type = uintptr_t;
static constexpr int_type EMPTY = static_cast<int_type>(-1);
static SPARSE_ALWAYS_INLINE int_type to_int(T* val) {
return reinterpret_cast<int_type>(val);
}
static SPARSE_ALWAYS_INLINE T* from_int(int_type val) {
return reinterpret_cast<T*>(val);
}
};
// ==================== 稀疏集模板类 ====================
template<typename T>
class alignas(64) SparseSet {
using traits = id_traits<T>;
using int_type = typename traits::int_type;
static constexpr int_type EMPTY = traits::EMPTY;
struct Node {
int_type id = EMPTY;
uint32_t dense_idx = 0;
};
std::vector<Node> nodes_;
std::vector<T> dense_;
std::vector<uint32_t> slot_of_;
uint32_t mask_ = 0;
uint32_t size_ = 0;
// 哈希函数(针对uintptr_t优化)
static SPARSE_ALWAYS_INLINE uint64_t hash(int_type x) {
// 对指针类型做混合,避免低位相同
if constexpr (sizeof(int_type) == 8) {
x ^= x >> 33;
x *= 0xff51afd7ed558ccdULL;
x ^= x >> 33;
x *= 0xc4ceb9fe1a85ec53ULL;
return x ^ (x >> 33);
} else {
x ^= x >> 16;
x *= 0x45d9f3bULL;
x ^= x >> 16;
x *= 0x45d9f3bULL;
return x ^ (x >> 16);
}
}
SPARSE_ALWAYS_INLINE uint32_t ideal_slot(int_type id) const {
return static_cast<uint32_t>(hash(id)) & mask_;
}
void rebuild(uint32_t new_cap) {
std::vector<Node> old_nodes = std::move(nodes_);
std::vector<T> old_dense = std::move(dense_);
nodes_.assign(new_cap, Node{EMPTY, 0});
mask_ = new_cap - 1;
dense_.clear();
dense_.reserve(new_cap >> 1);
slot_of_.clear();
slot_of_.reserve(new_cap >> 1);
size_ = 0;
for (const auto& val : old_dense) {
int_type id = traits::to_int(val);
if (id != EMPTY) insert_impl(id);
}
}
// 内部插入(使用int_type)
SPARSE_ALWAYS_INLINE void insert_impl(int_type id) {
uint32_t slot = ideal_slot(id);
uint32_t dist = 0;
Node insert_node{id, 0};
while (true) {
Node& cur = nodes_[slot];
if (cur.id == EMPTY) {
uint32_t idx = size_;
insert_node.dense_idx = idx;
cur = insert_node;
dense_.push_back(traits::from_int(id));
slot_of_.push_back(slot);
++size_;
return;
}
if (cur.id == id) return;
uint32_t cur_dist = (slot - ideal_slot(cur.id)) & mask_;
if (cur_dist < dist) {
std::swap(cur, insert_node);
dist = cur_dist;
}
slot = (slot + 1) & mask_;
++dist;
}
}
public:
SparseSet() {
nodes_.assign(16, Node{EMPTY, 0});
mask_ = 15;
dense_.reserve(8);
slot_of_.reserve(8);
}
explicit SparseSet(uint32_t reserve) {
uint32_t cap = 16;
while (cap < reserve * 2) cap <<= 1;
nodes_.assign(cap, Node{EMPTY, 0});
mask_ = cap - 1;
dense_.reserve(reserve);
slot_of_.reserve(reserve);
}
// 公开接口:使用原始类型T
SPARSE_ALWAYS_INLINE void insert(T val) {
int_type id = traits::to_int(val);
if (SPARSE_UNLIKELY(id == EMPTY)) return;
if (SPARSE_UNLIKELY(size_ >= (mask_ + 1) * 0.75f)) {
rebuild((mask_ + 1) << 1);
}
insert_impl(id);
}
SPARSE_ALWAYS_INLINE bool erase(T val) {
int_type id = traits::to_int(val);
if (SPARSE_UNLIKELY(id == EMPTY || size_ == 0)) return false;
uint32_t slot = ideal_slot(id);
while (nodes_[slot].id != EMPTY) {
Node& cur = nodes_[slot];
if (cur.id == id) {
uint32_t idx = cur.dense_idx;
uint32_t last = size_ - 1;
if (idx != last) {
T last_val = dense_[last];
uint32_t last_slot = slot_of_[last];
dense_[idx] = last_val;
slot_of_[idx] = last_slot;
nodes_[last_slot].dense_idx = idx;
}
dense_.pop_back();
slot_of_.pop_back();
cur.id = EMPTY;
--size_;
return true;
}
slot = (slot + 1) & mask_;
}
return false;
}
SPARSE_ALWAYS_INLINE bool contains(T val) const {
int_type id = traits::to_int(val);
if (SPARSE_UNLIKELY(id == EMPTY || size_ == 0)) return false;
uint32_t slot = ideal_slot(id);
while (nodes_[slot].id != EMPTY) {
if (nodes_[slot].id == id) return true;
slot = (slot + 1) & mask_;
}
return false;
}
template<typename Fn>
SPARSE_ALWAYS_INLINE void foreach(Fn&& fn) const {
const uint32_t n = size_;
if (n == 0) return;
const T* d = dense_.data();
uint32_t i = 0;
SPARSE_PREFETCH(&d[0], 0, 3);
SPARSE_PREFETCH(&d[16], 0, 3);
for (; i + 8 <= n; i += 8) {
SPARSE_PREFETCH(&d[i + 32], 0, 3);
fn(d[i]); fn(d[i+1]); fn(d[i+2]); fn(d[i+3]);
fn(d[i+4]); fn(d[i+5]); fn(d[i+6]); fn(d[i+7]);
}
for (; i < n; ++i) fn(d[i]);
}
void clear() {
for (uint32_t i = 0; i < size_; ++i) {
nodes_[slot_of_[i]].id = EMPTY;
}
dense_.clear();
slot_of_.clear();
size_ = 0;
}
void reserve(uint32_t n) {
if (n > (mask_ + 1) * 0.5f) {
uint32_t new_cap = 16;
while (new_cap < n * 2) new_cap <<= 1;
rebuild(new_cap);
}
dense_.reserve(n);
slot_of_.reserve(n);
}
uint32_t size() const { return size_; }
bool empty() const { return size_ == 0; }
uint32_t capacity() const { return dense_.capacity(); }
const T* data() const { return dense_.data(); }
};
// 常用类型别名
using SparseSetU64 = SparseSet<std::uint64_t>;
using SparseSetPtr = SparseSet<void*>;
} // namespace