C++中vector、unordered_set和稀疏集增删遍历性能差异如何?

摘要:C++源码优化版: std::vector vs 开放寻址哈希(二次探测+批量API) vs std::unordered_set #include <cs
C++源码 // 优化版: std::vector vs 开放寻址哈希(二次探测+批量API) vs std::unordered_set #include <cstdint> #include <vector> #include <algorithm> #include <random> #include <chrono> #include <iostream> #include <iomanip> #include <unordered_set> #include <numeric> #include <cstring> #ifdef __GNUC__ #define ALWAYS_INLINE __attribute__((always_inline)) #define PURE __attribute__((pure)) #define LIKELY(x) __builtin_expect(!!(x), 1) #define UNLIKELY(x) __builtin_expect(!!(x), 0) #else #define ALWAYS_INLINE #define PURE #define LIKELY(x) (x) #define UNLIKELY(x) (x) #endif using player_id_t = uint64_t; using namespace std::chrono; // ==================== 方案1: std::vector ==================== class VectorSet { std::vector<player_id_t> data_; public: explicit VectorSet(size_t reserve = 1000) { data_.reserve(reserve); } ALWAYS_INLINE void insert(player_id_t id) { data_.push_back(id); } ALWAYS_INLINE bool erase(player_id_t id) { auto it = std::find(data_.begin(), data_.end(), id); if (it == data_.end()) return false; *it = data_.back(); data_.pop_back(); return true; } template<typename Fn> ALWAYS_INLINE void foreach(Fn&& fn) const { for (auto id : data_) fn(id); } [[nodiscard]] size_t size() const { return data_.size(); } void clear() { data_.clear(); } // 批量操作 void insert_batch(const player_id_t* ids, size_t count) { data_.reserve(data_.size() + count); for (size_t i = 0; i < count; ++i) { data_.push_back(ids[i]); } } }; // ==================== 方案2: 开放寻址哈希 (优化版) ==================== class HashSparseSet { struct Node { player_id_t id = 0; // 0 表示空槽 uint32_t dense_idx = 0; // 在 dense 数组中的索引 }; std::vector<Node> nodes_; // 哈希表(开放寻址) std::vector<player_id_t> dense_; // 密集存储,用于快速遍历 std::vector<uint32_t> slot_of_; // dense -> slot 反向映射(用于快速删除) uint32_t mask_ = 0; // 容量掩码(2^n - 1) uint32_t size_ = 0; // 当前元素数量 // 二次探测序列: 0, 1, 3, 6, 10... (三角形数) // 公式: i * (i + 1) / 2 static ALWAYS_INLINE uint32_t probe_offset(uint32_t i) { return (i * (i + 1)) >> 1; } // MurmurHash3 风格的 64位哈希(更好的分布) PURE static uint64_t hash(player_id_t x) { // 混合位,减少连续ID的碰撞 x ^= x >> 33; x *= 0xff51afd7ed558ccdULL; x ^= x >> 33; x *= 0xc4ceb9fe1a85ec53ULL; x ^= x >> 33; return x; } void grow() { uint32_t old_cap = static_cast<uint32_t>(nodes_.size()); uint32_t new_cap = old_cap ? old_cap << 1 : 16; // 保存旧数据 std::vector<Node> old_nodes = std::move(nodes_); std::vector<player_id_t> old_dense = std::move(dense_); // 扩容 nodes_.assign(new_cap, {}); mask_ = new_cap - 1; dense_.clear(); dense_.reserve(new_cap >> 1); // 负载因子 0.5 slot_of_.clear(); slot_of_.reserve(new_cap >> 1); size_ = 0; // 重新插入 for (auto id : old_dense) { if (id != 0) insert(id); } } // 查找槽位:返回找到的槽位或空槽 // found 输出参数表示是否找到目标ID ALWAYS_INLINE uint32_t find_slot(player_id_t id, bool& found) const { found = false; if (UNLIKELY(nodes_.empty())) return 0; const uint32_t start = static_cast<uint32_t>(hash(id)) & mask_; for (uint32_t i = 0; i <= mask_; ++i) { const uint32_t slot = (start + probe_offset(i)) & mask_; const player_id_t slot_id = nodes_[slot].id; if (UNLIKELY(slot_id == 0)) { return slot; // 空槽,插入位置 } if (slot_id == id) { found = true; return slot; // 找到目标 } } return UINT32_MAX; // 表满(理论上不发生) } public: explicit HashSparseSet(size_t reserve = 1000) { uint32_t cap = 16; while (cap < reserve * 2) cap <<= 1; // 负载因子 0.5 nodes_.assign(cap, {}); mask_ = cap - 1; dense_.reserve(reserve); slot_of_.reserve(reserve); } // 单次插入 ALWAYS_INLINE void insert(player_id_t id) { if (UNLIKELY(id == 0)) return; // 0 保留为空槽标记 bool found; uint32_t slot = find_slot(id, found); if (UNLIKELY(slot == UINT32_MAX)) { grow(); slot = find_slot(id, found); } if (found) return; // 已存在 // 新元素 uint32_t idx = static_cast<uint32_t>(dense_.size()); nodes_[slot] = {id, idx}; dense_.push_back(id); slot_of_.push_back(slot); ++size_; } // 批量插入(优化版)- 预分配 + 减少重复计算 void insert_batch(const player_id_t* ids, size_t count) { // 预检查容量 if (dense_.size() + count > (nodes_.size() >> 1)) { uint32_t need = static_cast<uint32_t>(dense_.size() + count); uint32_t new_cap = static_cast<uint32_t>(nodes_.size()); while ((new_cap >> 1) < need) new_cap <<= 1; // 重建表 std::vector<Node> old_nodes = std::move(nodes_); std::vector<player_id_t> old_dense = std::move(dense_); nodes_.assign(new_cap, {}); mask_ = new_cap - 1; dense_.clear(); dense_.reserve(need); slot_of_.clear(); slot_of_.reserve(need); size_ = 0; // 先插入旧元素 for (auto id : old_dense) { if (id != 0) insert(id); } } // 批量插入新元素 for (size_t i = 0; i < count; ++i) { insert(ids[i]); } } ALWAYS_INLINE bool erase(player_id_t id) { bool found; uint32_t slot = find_slot(id, found); if (!found) return false; uint32_t idx = nodes_[slot].dense_idx; uint32_t last_idx = static_cast<uint32_t>(dense_.size()) - 1; // 标记为空槽 nodes_[slot].id = 0; if (idx != last_idx) { // 用末尾元素填充空洞 player_id_t last_id = dense_.back(); uint32_t last_slot = slot_of_[last_idx]; dense_[idx] = last_id; slot_of_[idx] = last_slot; nodes_[last_slot].dense_idx = idx; } dense_.pop_back(); slot_of_.pop_back(); --size_; return true; } // 批量删除(优化:先标记再清理,或逐个处理) void erase_batch(const player_id_t* ids, size_t count) { // 简单实现:逐个删除 // 对于大量删除,可以优化为:标记删除 -> 重建 dense 数组 for (size_t i = 0; i < count; ++i) { erase(ids[i]); } } template<typename Fn> ALWAYS_INLINE void foreach(Fn&& fn) const { // 预取优化 for (size_t i = 0; i < dense_.size(); ++i) { if (i + 4 < dense_.size()) { __builtin_prefetch(&dense_[i + 4], 0, 3); } fn(dense_[i]); } } [[nodiscard]] size_t size() const { return size_; } [[nodiscard]] bool empty() const { return size_ == 0; } void clear() { // 快速清空:只重置使用的槽位 for (auto id : dense_) { if (id != 0) { bool found; uint32_t slot = find_slot(id, found); if (found) nodes_[slot].id = 0; } } dense_.clear(); slot_of_.clear(); size_ = 0; } // 预分配接口 void reserve(size_t n) { if (n > (nodes_.size() >> 1)) { uint32_t need = static_cast<uint32_t>(n); uint32_t new_cap = 16; while ((new_cap >> 1) < need) new_cap <<= 1; nodes_.assign(new_cap, {}); mask_ = new_cap - 1; dense_.reserve(n); slot_of_.reserve(n); } } }; // ==================== 方案3: std::unordered_set (优化版) ==================== class StdHashSet { std::unordered_set<player_id_t> set_; public: explicit StdHashSet(size_t reserve = 1000) { set_.reserve(reserve * 2); // 考虑负载因子 } ALWAYS_INLINE void insert(player_id_t id) { set_.insert(id); } ALWAYS_INLINE bool erase(player_id_t id) { return set_.erase(id) > 0; } template<typename Fn> ALWAYS_INLINE void foreach(Fn&& fn) const { for (auto id : set_) fn(id); } [[nodiscard]] size_t size() const { return set_.size(); } void clear() { set_.clear(); } void insert_batch(const player_id_t* ids, size_t count) { set_.reserve(set_.size() + count); for (size_t i = 0; i < count; ++i) { set_.insert(ids[i]); } } }; // ==================== 测试框架 ==================== struct Result { double insert_us = 0, foreach_us = 0, erase_us = 0, total_us = 0; double batch_insert_us = 0; // 新增批量测试 }; class Benchmark { std::vector<player_id_t> ids_, del_; int n_; public: Benchmark(int n, int seed) : n_(n) { std::mt19937_64 rng(seed); for (int i = 0; i < n; ++i) ids_.push_back(rng()); del_ = ids_; std::shuffle(del_.begin(), del_.end(), rng); } template<typename Set> Result run() { Result r; Set set(1000); // 测试1: 单次插入 auto t0 = high_resolution_clock::now(); for (auto id : ids_) set.insert(id); auto t1 = high_resolution_clock::now(); // 测试2: 遍历 volatile uint64_t sum = 0; set.foreach([&](player_id_t id) { sum += id; }); auto t2 = high_resolution_clock::now(); // 测试3: 单次删除 for (auto id : del_) set.erase(id); auto t3 = high_resolution_clock::now(); r.insert_us = duration<double, std::micro>(t1-t0).count(); r.foreach_us = duration<double, std::micro>(t2-t1).count(); r.erase_us = duration<double, std::micro>(t3-t2).count(); r.total_us = duration<double, std::micro>(t3-t0).count(); (void)sum; return r; } // 批量操作测试 template<typename Set> Result run_batch() { Result r; Set set(1000); // 批量插入 auto t0 = high_resolution_clock::now(); set.insert_batch(ids_.data(), ids_.size()); auto t1 = high_resolution_clock::now(); // 遍历 volatile uint64_t sum = 0; set.foreach([&](player_id_t id) { sum += id; }); auto t2 = high_resolution_clock::now(); // 批量删除(模拟) set.clear(); auto t3 = high_resolution_clock::now(); r.batch_insert_us = duration<double, std::micro>(t1-t0).count(); r.foreach_us = duration<double, std::micro>(t2-t1).count(); r.total_us = duration<double, std::micro>(t3-t0).count(); (void)sum; return r; } }; int main() { constexpr int ROUNDS = 100; std::cout << "=================================================================\n"; std::cout << "优化版对比: Vector vs 开放寻址哈希(二次探测) vs std::unordered_set\n"; std::cout << "1000 random uint64_t IDs, " << ROUNDS << " rounds\n"; std::cout << "=================================================================\n\n"; std::vector<Result> vec_res, open_res, std_res; std::vector<Result> vec_batch, open_batch, std_batch; for (int i = 0; i < ROUNDS; ++i) { Benchmark bm(1000, i); vec_res.push_back(bm.run<VectorSet>()); open_res.push_back(bm.run<HashSparseSet>()); std_res.push_back(bm.run<StdHashSet>()); vec_batch.push_back(bm.run_batch<VectorSet>()); open_batch.push_back(bm.run_batch<HashSparseSet>()); std_batch.push_back(bm.run_batch<StdHashSet>()); } auto avg = [](auto& v, auto f) { double s = 0; for (auto& r : v) s += r.*f; return s / v.size(); }; // 单次操作结果 std::cout << "--- 单次操作测试 ---\n"; const char* names[] = {"INSERT", "FOREACH", "ERASE", "TOTAL"}; auto ptrs = {&Result::insert_us, &Result::foreach_us, &Result::erase_us, &Result::total_us}; auto it = ptrs.begin(); for (const char* name : names) { double va = avg(vec_res, *it); double oa = avg(open_res, *it); double sa = avg(std_res, *it); std::cout << "[" << name << "]\n"; std::cout << " Vector: " << std::fixed << std::setprecision(2) << std::setw(10) << va << " us\n"; std::cout << " OpenHash: " << std::setw(10) << oa << " us"; if (name[0] == 'E' || name[0] == 'T') std::cout << " (" << std::setprecision(2) << (va/oa) << "x vs Vector)"; else if (name[0] == 'I') std::cout << " (" << std::setprecision(2) << (oa/va) << "x vs Vector)"; std::cout << "\n"; std::cout << " StdUnordered: " << std::setw(10) << sa << " us"; if (name[0] == 'E' || name[0] == 'T') std::cout << " (" << std::setprecision(2) << (va/sa) << "x vs Vector)"; else if (name[0] == 'I') std::cout << " (" << std::setprecision(2) << (sa/va) << "x vs Vector)"; std::cout << "\n\n"; ++it; } // 批量操作结果 std::cout << "--- 批量操作测试 ---\n"; std::cout << "[BATCH INSERT]\n"; double vb = avg(vec_batch, &Result::batch_insert_us); double ob = avg(open_batch, &Result::batch_insert_us); double sb = avg(std_batch, &Result::batch_insert_us); std::cout << " Vector: " << std::fixed << std::setprecision(2) << std::setw(10) << vb << " us\n"; std::cout << " OpenHash: " << std::setw(10) << ob << " us" << " (" << std::setprecision(2) << (ob/vb) << "x vs Vector)\n"; std::cout << " StdUnordered: " << std::setw(10) << sb << " us" << " (" << std::setprecision(2) << (sb/vb) << "x vs Vector)\n"; return 0; } 测试结果(O3编译): ================================================================= 优化版对比: Vector vs 开放寻址哈希(二次探测) vs std::unordered_set 1000 random uint64_t IDs, 100 rounds ================================================================= --- 单次操作测试 --- [INSERT] Vector: 0.37 us OpenHash: 7.98 us (21.73x vs Vector) StdUnordered: 22.21 us (60.51x vs Vector) [FOREACH] Vector: 0.40 us OpenHash: 0.57 us StdUnordered: 1.51 us [ERASE] Vector: 49.77 us OpenHash: 6.88 us (7.23x vs Vector) StdUnordered: 16.29 us (3.06x vs Vector) [TOTAL] Vector: 50.54 us OpenHash: 15.43 us (3.28x vs Vector) StdUnordered: 40.01 us (1.26x vs Vector) 优化版本 // 极致性能优化版稀疏集:Robin Hood哈希 + 软件预取 + 缓存行优化 #include <cstdint> #include <vector> #include <algorithm> #include <random> #include <chrono> #include <iostream> #include <iomanip> #include <unordered_set> #include <cstring> #include <cassert> #ifdef __GNUC__ #define ALWAYS_INLINE __attribute__((always_inline)) #define PURE __attribute__((pure)) #define HOT __attribute__((hot)) #define LIKELY(x) __builtin_expect(!!(x), 1) #define UNLIKELY(x) __builtin_expect(!!(x), 0) #define PREFETCH(addr, rw, locality) __builtin_prefetch(addr, rw, locality) #else #define ALWAYS_INLINE #define PURE #define HOT #define LIKELY(x) (x) #define UNLIKELY(x) (x) #define PREFETCH(addr, rw, locality) #endif using player_id_t = uint64_t; using namespace std::chrono; // ==================== 极致优化版:Robin Hood稀疏集 ==================== // // 核心优化策略(基于[^8^][^13^]业界最佳实践): // 1. Robin Hood哈希:控制最大探测长度,保证O(1)最坏情况 // 2. 分离存储:稀疏数组(sparse)只存索引,密集数组(dense)存实际ID // 3. 软件预取:遍历前预取缓存行,减少内存延迟 // 4. 缓存行对齐:确保dense数组按64字节边界对齐 // 5. 零拷贝删除:swap-and-pop避免内存移动 class alignas(64) RobinHoodSparseSet { public: // 节点结构:8字节ID + 4字节密集索引 + 4字节距离 = 16字节(缓存行对齐友好) struct Node { player_id_t id; // 8 bytes: 存储的ID(0表示空槽) uint32_t dense_idx; // 4 bytes: 在dense数组中的位置 uint32_t dist; // 4 bytes: 距离理想位置的距离(Robin Hood关键) bool empty() const { return id == 0; } }; static_assert(sizeof(Node) == 16, "Node size should be 16 bytes for cache efficiency"); private: std::vector<Node> nodes_; // 稀疏数组:哈希表本体 std::vector<player_id_t> dense_; // 密集数组:连续存储所有有效ID std::vector<uint32_t> sparse_to_dense_; // 反向映射:dense索引 -> 稀疏槽位 uint32_t mask_ = 0; // 容量掩码(2^n - 1) uint32_t size_ = 0; // 当前元素数量 uint32_t max_dist_ = 0; // 当前最大探测距离(用于快速失败查找) // 哈希函数:SplitMix64 - 高质量且快速 PURE static ALWAYS_INLINE uint64_t hash(player_id_t x) { // SplitMix64算法:比MurmurHash更快,分布质量足够好 x += 0x9e3779b97f4a7c15ULL; x = (x ^ (x >> 30)) * 0xbf58476d1ce4e5b9ULL; x = (x ^ (x >> 27)) * 0x94d049bb133111ebULL; return x ^ (x >> 31); } // 计算理想位置 PURE ALWAYS_INLINE uint32_t ideal_slot(player_id_t id) const { return static_cast<uint32_t>(hash(id)) & mask_; } // 扩容并重建(保留旧元素) HOT void grow() { uint32_t old_cap = mask_ ? mask_ + 1 : 0; uint32_t new_cap = old_cap ? old_cap << 1 : 16; // 保存旧数据 std::vector<player_id_t> old_dense = std::move(dense_); // 重置结构 nodes_.assign(new_cap, {}); mask_ = new_cap - 1; dense_.clear(); dense_.reserve(new_cap >> 1); // 负载因子0.5 sparse_to_dense_.clear(); sparse_to_dense_.reserve(new_cap >> 1); size_ = 0; max_dist_ = 0; // 重新插入所有元素(此时会重建Robin Hood顺序) for (auto id : old_dense) { if (id != 0) insert_robin_hood(id); } } // Robin Hood插入核心:富者愈富,穷者愈穷 HOT void insert_robin_hood(player_id_t id) { assert(id != 0 && "ID 0 is reserved for empty slot"); uint32_t slot = ideal_slot(id); uint32_t dist = 0; Node insert_node{id, 0, 0}; // dense_idx会在后面设置 while (true) { Node& current = nodes_[slot]; if (current.empty()) { // 找到空槽,放置元素 uint32_t idx = static_cast<uint32_t>(dense_.size()); insert_node.dense_idx = idx; insert_node.dist = dist; current = insert_node; dense_.push_back(id); sparse_to_dense_.push_back(slot); if (dist > max_dist_) max_dist_ = dist; ++size_; return; } if (current.id == id) { return; // 已存在,不重复插入 } // Robin Hood交换:如果当前元素比新元素"更富"(离理想位置更近) if (current.dist < dist) { // 新元素更"穷",抢夺这个位置 std::swap(current, insert_node); dist = current.dist; // 继续携带被交换的元素 insert_node.dist = dist; } // 继续探测 ++dist; slot = (slot + 1) & mask_; // 安全检查:理论上不会发生,因为负载因子0.5 if (UNLIKELY(dist > mask_)) { grow(); insert_robin_hood(insert_node.id); return; } } } public: explicit RobinHoodSparseSet(size_t reserve = 1000) { uint32_t cap = 16; while (cap < reserve * 2) cap <<= 1; // 保证负载因子0.5 nodes_.assign(cap, {}); mask_ = cap - 1; dense_.reserve(reserve); sparse_to_dense_.reserve(reserve); } // 单次插入 HOT ALWAYS_INLINE void insert(player_id_t id) { if (UNLIKELY(id == 0)) return; // 快速路径:检查是否需要扩容(负载因子0.75触发) if (UNLIKELY(size_ >= (mask_ + 1) * 0.75f)) { grow(); } insert_robin_hood(id); } // 批量插入:预分配 + 减少哈希计算 HOT void insert_batch(const player_id_t* ids, size_t count) { if (UNLIKELY(count == 0)) return; // 预检查容量,一次性扩容 uint32_t need = static_cast<uint32_t>(size_ + count); if (need > (mask_ + 1) * 0.75f) { uint32_t new_cap = mask_ + 1; while (new_cap < need * 2) new_cap <<= 1; std::vector<Node> old_nodes = std::move(nodes_); std::vector<player_id_t> old_dense = std::move(dense_); nodes_.assign(new_cap, {}); mask_ = new_cap - 1; dense_.clear(); dense_.reserve(need); sparse_to_dense_.clear(); sparse_to_dense_.reserve(need); size_ = 0; max_dist_ = 0; // 先插入旧元素 for (auto old_id : old_dense) { if (old_id != 0) insert_robin_hood(old_id); } } // 批量插入新元素 for (size_t i = 0; i < count; ++i) { if (ids[i] != 0) insert_robin_hood(ids[i]); } } // 查找(利用max_dist_快速失败) PURE ALWAYS_INLINE bool contains(player_id_t id) const { if (UNLIKELY(id == 0 || size_ == 0)) return false; uint32_t slot = ideal_slot(id); // 最多探测max_dist_ + 1次 for (uint32_t dist = 0; dist <= max_dist_; ++dist) { const Node& node = nodes_[slot]; if (node.empty()) return false; if (node.id == id) return true; if (node.dist < dist) return false; // Robin Hood性质:不可能在后面 slot = (slot + 1) & mask_; } return false; } // 删除:swap-and-pop(零拷贝) HOT ALWAYS_INLINE bool erase(player_id_t id) { if (UNLIKELY(id == 0 || size_ == 0)) return false; uint32_t slot = ideal_slot(id); for (uint32_t dist = 0; dist <= max_dist_; ++dist) { Node& node = nodes_[slot]; if (node.empty()) return false; if (node.id == id) { // 找到目标,执行删除 uint32_t idx = node.dense_idx; uint32_t last_idx = static_cast<uint32_t>(dense_.size()) - 1; // 标记为空槽 node.id = 0; node.dist = 0; if (idx != last_idx) { // swap-and-pop:用末尾元素填充空洞 player_id_t last_id = dense_.back(); uint32_t last_slot = sparse_to_dense_[last_idx]; dense_[idx] = last_id; sparse_to_dense_[idx] = last_slot; nodes_[last_slot].dense_idx = idx; } dense_.pop_back(); sparse_to_dense_.pop_back(); --size_; // 可选:压缩max_dist_(延迟更新以节省CPU) return true; } if (node.dist < dist) return false; slot = (slot + 1) & mask_; } return false; } // 极致优化的遍历:软件预取 + 循环展开 template<typename Fn> HOT ALWAYS_INLINE void foreach(Fn&& fn) const { const size_t n = dense_.size(); if (n == 0) return; const player_id_t* data = dense_.data(); // 主循环:4路展开 + 软件预取 size_t i = 0; for (; i + 4 <= n; i += 4) { // 预取未来4个缓存行(假设每个ID 8字节,4个ID 32字节,小于64字节缓存行) PREFETCH(&data[i + 8], 0, 3); // 预取8个元素后的数据 fn(data[i]); fn(data[i + 1]); fn(data[i + 2]); fn(data[i + 3]); } // 处理剩余元素 for (; i < n; ++i) { fn(data[i]); } } // 带索引的遍历(用于需要知道位置的场景) template<typename Fn> HOT ALWAYS_INLINE void foreach_indexed(Fn&& fn) const { const size_t n = dense_.size(); for (size_t i = 0; i < n; ++i) { fn(i, dense_[i]); } } // 快速清空:只重置使用的槽位 HOT void clear() { // 只遍历dense数组,标记对应的稀疏槽位 for (auto id : dense_) { if (id != 0) { uint32_t slot = ideal_slot(id); // 由于Robin Hood性质,需要找到实际位置 for (uint32_t dist = 0; dist <= max_dist_; ++dist) { if (nodes_[slot].id == id) { nodes_[slot].id = 0; nodes_[slot].dist = 0; break; } slot = (slot + 1) & mask_; } } } dense_.clear(); sparse_to_dense_.clear(); size_ = 0; max_dist_ = 0; } // 预分配接口 void reserve(size_t n) { if (n > (mask_ + 1) * 0.5f) { uint32_t new_cap = 16; while (new_cap < n * 2) new_cap <<= 1; std::vector<Node> old_nodes = std::move(nodes_); std::vector<player_id_t> old_dense = std::move(dense_); nodes_.assign(new_cap, {}); mask_ = new_cap - 1; dense_.clear(); dense_.reserve(n); sparse_to_dense_.clear(); sparse_to_dense_.reserve(n); size_ = 0; max_dist_ = 0; for (auto id : old_dense) { if (id != 0) insert_robin_hood(id); } } } [[nodiscard]] size_t size() const { return size_; } [[nodiscard]] bool empty() const { return size_ == 0; } [[nodiscard]] size_t capacity() const { return dense_.capacity(); } // 获取原始密集数组指针(用于外部批量处理) const player_id_t* data() const { return dense_.data(); } }; // ==================== 对比方案:原始Vector ==================== class VectorSet { std::vector<player_id_t> data_; public: explicit VectorSet(size_t reserve = 1000) { data_.reserve(reserve); } ALWAYS_INLINE void insert(player_id_t id) { data_.push_back(id); } ALWAYS_INLINE bool erase(player_id_t id) { auto it = std::find(data_.begin(), data_.end(), id); if (it == data_.end()) return false; *it = data_.back(); data_.pop_back(); return true; } ALWAYS_INLINE bool contains(player_id_t id) const { return std::find(data_.begin(), data_.end(), id) != data_.end(); } template<typename Fn> ALWAYS_INLINE void foreach(Fn&& fn) const { for (auto id : data_) fn(id); } void insert_batch(const player_id_t* ids, size_t count) { data_.reserve(data_.size() + count); for (size_t i = 0; i < count; ++i) data_.push_back(ids[i]); } [[nodiscard]] size_t size() const { return data_.size(); } void clear() { data_.clear(); } }; // ==================== 对比方案:std::unordered_set ==================== class StdHashSet { std::unordered_set<player_id_t> set_; public: explicit StdHashSet(size_t reserve = 1000) { set_.reserve(reserve * 2); } ALWAYS_INLINE void insert(player_id_t id) { set_.insert(id); } ALWAYS_INLINE bool erase(player_id_t id) { return set_.erase(id) > 0; } ALWAYS_INLINE bool contains(player_id_t id) const { return set_.find(id) != set_.end(); } template<typename Fn> ALWAYS_INLINE void foreach(Fn&& fn) const { for (auto id : set_) fn(id); } void insert_batch(const player_id_t* ids, size_t count) { set_.reserve(set_.size() + count); for (size_t i = 0; i < count; ++i) set_.insert(ids[i]); } [[nodiscard]] size_t size() const { return set_.size(); } void clear() { set_.clear(); } }; // ==================== 测试框架 ==================== struct Result { double insert_us = 0, foreach_us = 0, erase_us = 0, total_us = 0; double batch_insert_us = 0; double lookup_us = 0; // 新增查找测试 }; class Benchmark { std::vector<player_id_t> ids_, del_, lookup_; int n_; public: Benchmark(int n, int seed) : n_(n) { std::mt19937_64 rng(seed); for (int i = 0; i < n; ++i) ids_.push_back(rng() | 1); // 确保非0 del_ = ids_; std::shuffle(del_.begin(), del_.end(), rng); // 生成查找用ID(一半存在,一半不存在) for (int i = 0; i < n; ++i) { if (i % 2 == 0) lookup_.push_back(ids_[i]); else lookup_.push_back(rng() | 1); } std::shuffle(lookup_.begin(), lookup_.end(), rng); } template<typename Set> Result run() { Result r; Set set(1000); // 插入 auto t0 = high_resolution_clock::now(); for (auto id : ids_) set.insert(id); auto t1 = high_resolution_clock::now(); // 遍历 volatile uint64_t sum = 0; set.foreach([&](player_id_t id) { sum += id; }); auto t2 = high_resolution_clock::now(); // 查找(50%命中率) volatile bool found = false; for (auto id : lookup_) found = set.contains(id); auto t3 = high_resolution_clock::now(); // 删除 for (auto id : del_) set.erase(id); auto t4 = high_resolution_clock::now(); r.insert_us = duration<double, std::micro>(t1-t0).count(); r.foreach_us = duration<double, std::micro>(t2-t1).count(); r.lookup_us = duration<double, std::micro>(t3-t2).count(); r.erase_us = duration<double, std::micro>(t4-t3).count(); r.total_us = duration<double, std::micro>(t4-t0).count(); (void)sum; (void)found; return r; } template<typename Set> Result run_batch() { Result r; Set set(1000); // 批量插入 auto t0 = high_resolution_clock::now(); set.insert_batch(ids_.data(), ids_.size()); auto t1 = high_resolution_clock::now(); // 遍历 volatile uint64_t sum = 0; set.foreach([&](player_id_t id) { sum += id; }); auto t2 = high_resolution_clock::now(); r.batch_insert_us = duration<double, std::micro>(t1-t0).count(); r.foreach_us = duration<double, std::micro>(t2-t1).count(); r.total_us = duration<double, std::micro>(t2-t0).count(); (void)sum; return r; } }; int main() { constexpr int ROUNDS = 100; std::cout << "=================================================================\n"; std::cout << "极致优化版对比: Vector vs RobinHood稀疏集 vs std::unordered_set\n"; std::cout << "1000 random uint64_t IDs, " << ROUNDS << " rounds\n"; std::cout << "=================================================================\n\n"; std::vector<Result> vec_res, robin_res, std_res; std::vector<Result> vec_batch, robin_batch, std_batch; for (int i = 0; i < ROUNDS; ++i) { Benchmark bm(1000, i); vec_res.push_back(bm.run<VectorSet>()); robin_res.push_back(bm.run<RobinHoodSparseSet>()); std_res.push_back(bm.run<StdHashSet>()); vec_batch.push_back(bm.run_batch<VectorSet>()); robin_batch.push_back(bm.run_batch<RobinHoodSparseSet>()); std_batch.push_back(bm.run_batch<StdHashSet>()); } auto avg = [](auto& v, auto f) { double s = 0; for (auto& r : v) s += r.*f; return s / v.size(); }; // 单次操作结果 std::cout << "--- 单次操作测试 ---\n"; const char* names[] = {"INSERT", "FOREACH", "LOOKUP", "ERASE", "TOTAL"}; auto ptrs = {&Result::insert_us, &Result::foreach_us, &Result::lookup_us, &Result::erase_us, &Result::total_us}; auto it = ptrs.begin(); for (const char* name : names) { double va = avg(vec_res, *it); double ra = avg(robin_res, *it); double sa = avg(std_res, *it); std::cout << "[" << name << "]\n"; std::cout << " Vector: " << std::fixed << std::setprecision(2) << std::setw(10) << va << " us\n"; std::cout << " RobinHood: " << std::setw(10) << ra << " us"; if (va > 0.01) { double speedup = va / ra; std::cout << " (" << std::setprecision(2) << speedup << "x vs Vector)"; } std::cout << "\n"; std::cout << " StdUnordered: " << std::setw(10) << sa << " us"; if (va > 0.01) { double speedup = va / sa; std::cout << " (" << std::setprecision(2) << speedup << "x vs Vector)"; } std::cout << "\n\n"; ++it; } // 批量操作结果 std::cout << "--- 批量操作测试 ---\n"; std::cout << "[BATCH INSERT + FOREACH]\n"; double vb = avg(vec_batch, &Result::total_us); double rb = avg(robin_batch, &Result::total_us); double sb = avg(std_batch, &Result::total_us); std::cout << " Vector: " << std::fixed << std::setprecision(2) << std::setw(10) << vb << " us\n"; std::cout << " RobinHood: " << std::setw(10) << rb << " us" << " (" << std::setprecision(2) << (vb/rb) << "x vs Vector)\n"; std::cout << " StdUnordered: " << std::setw(10) << sb << " us" << " (" << std::setprecision(2) << (vb/sb) << "x vs Vector)\n"; return 0; } 优化后的结果(O3编译): ================================================================= 极致优化版对比: Vector vs RobinHood稀疏集 vs std::unordered_set 1000 random uint64_t IDs, 100 rounds ================================================================= --- 单次操作测试 --- [INSERT] Vector: 0.35 us RobinHood: 9.79 us (0.04x vs Vector) StdUnordered: 18.59 us (0.02x vs Vector) [FOREACH] Vector: 0.39 us RobinHood: 0.40 us (0.98x vs Vector) StdUnordered: 1.49 us (0.26x vs Vector) [LOOKUP] Vector: 125.58 us RobinHood: 12.22 us (10.28x vs Vector) StdUnordered: 13.15 us (9.55x vs Vector) [ERASE] Vector: 49.28 us RobinHood: 7.03 us (7.01x vs Vector) StdUnordered: 13.78 us (3.58x vs Vector) [TOTAL] Vector: 175.60 us RobinHood: 29.44 us (5.96x vs Vector) StdUnordered: 47.00 us (3.74x vs Vector) --- 批量操作测试 --- [BATCH INSERT + FOREACH] Vector: 0.73 us RobinHood: 8.54 us (0.09x vs Vector) StdUnordered: 25.00 us (0.03x vs Vector) 极致优化的源码: // 精简版:只保留最优insert和erase,动态扩容,无内存浪费 #include <cstdint> #include <vector> #include <algorithm> #include <random> #include <chrono> #include <iostream> #include <iomanip> #include <unordered_set> #ifdef __GNUC__ #define ALWAYS_INLINE __attribute__((always_inline)) #define LIKELY(x) __builtin_expect(!!(x), 1) #define UNLIKELY(x) __builtin_expect(!!(x), 0) #define PREFETCH(addr, rw, locality) __builtin_prefetch(addr, rw, locality) #else #define ALWAYS_INLINE #define LIKELY(x) (x) #define UNLIKELY(x) (x) #define PREFETCH(addr, rw, locality) #endif using player_id_t = uint64_t; using namespace std::chrono; // ==================== 动态稀疏集:最优insert + 最优erase ==================== class alignas(64) SparseSet { static constexpr player_id_t EMPTY = UINT64_MAX; struct Node { player_id_t id = EMPTY; uint32_t dense_idx = 0; }; std::vector<Node> nodes_; std::vector<player_id_t> dense_; std::vector<uint32_t> slot_of_; uint32_t mask_ = 0; uint32_t size_ = 0; static ALWAYS_INLINE uint64_t hash(player_id_t x) { x ^= x >> 33; x *= 0xff51afd7ed558ccdULL; x ^= x >> 33; x *= 0xc4ceb9fe1a85ec53ULL; return x ^ (x >> 33); } ALWAYS_INLINE uint32_t ideal_slot(player_id_t id) const { return static_cast<uint32_t>(hash(id)) & mask_; } void rebuild(uint32_t new_cap) { std::vector<Node> old_nodes = std::move(nodes_); std::vector<player_id_t> old_dense = std::move(dense_); nodes_.assign(new_cap, Node{EMPTY, 0}); mask_ = new_cap - 1; dense_.clear(); dense_.reserve(new_cap >> 1); slot_of_.clear(); slot_of_.reserve(new_cap >> 1); size_ = 0; for (auto id : old_dense) { if (id != EMPTY) insert(id); } } public: SparseSet() { nodes_.assign(16, Node{EMPTY, 0}); mask_ = 15; dense_.reserve(8); slot_of_.reserve(8); } explicit SparseSet(uint32_t reserve) { uint32_t cap = 16; while (cap < reserve * 2) cap <<= 1; nodes_.assign(cap, Node{EMPTY, 0}); mask_ = cap - 1; dense_.reserve(reserve); slot_of_.reserve(reserve); } // 最优insert:Robin Hood哈希 + 动态扩容 ALWAYS_INLINE void insert(player_id_t id) { if (UNLIKELY(id == EMPTY)) return; // 扩容检查 if (UNLIKELY(size_ >= (mask_ + 1) * 0.75f)) { rebuild((mask_ + 1) << 1); } uint32_t slot = ideal_slot(id); uint32_t dist = 0; Node insert_node{id, 0}; while (true) { Node& cur = nodes_[slot]; // 空槽或已存在 if (cur.id == EMPTY) { uint32_t idx = size_; insert_node.dense_idx = idx; cur = insert_node; dense_.push_back(id); slot_of_.push_back(slot); ++size_; return; } if (cur.id == id) return; // 已存在 // Robin Hood:距离远的抢占位置 uint32_t cur_dist = (slot - ideal_slot(cur.id)) & mask_; if (cur_dist < dist) { std::swap(cur, insert_node); dist = cur_dist; } slot = (slot + 1) & mask_; ++dist; } } // 最优erase:查找 + swap-and-pop ALWAYS_INLINE bool erase(player_id_t id) { if (UNLIKELY(id == EMPTY || size_ == 0)) return false; uint32_t slot = ideal_slot(id); // 线性探测查找 while (nodes_[slot].id != EMPTY) { Node& cur = nodes_[slot]; if (cur.id == id) { uint32_t idx = cur.dense_idx; uint32_t last = size_ - 1; // swap-and-pop:末尾元素填充空洞 if (idx != last) { player_id_t last_id = dense_[last]; uint32_t last_slot = slot_of_[last]; dense_[idx] = last_id; slot_of_[idx] = last_slot; nodes_[last_slot].dense_idx = idx; } dense_.pop_back(); slot_of_.pop_back(); cur.id = EMPTY; --size_; return true; } slot = (slot + 1) & mask_; } return false; } // 遍历(预取优化) template<typename Fn> ALWAYS_INLINE void foreach(Fn&& fn) const { const uint32_t n = size_; if (n == 0) return; const player_id_t* d = dense_.data(); uint32_t i = 0; PREFETCH(&d[0], 0, 3); PREFETCH(&d[16], 0, 3); // 8路展开 for (; i + 8 <= n; i += 8) { PREFETCH(&d[i + 32], 0, 3); fn(d[i]); fn(d[i+1]); fn(d[i+2]); fn(d[i+3]); fn(d[i+4]); fn(d[i+5]); fn(d[i+6]); fn(d[i+7]); } for (; i < n; ++i) fn(d[i]); } // 查找 ALWAYS_INLINE bool contains(player_id_t id) const { if (UNLIKELY(id == EMPTY || size_ == 0)) return false; uint32_t slot = ideal_slot(id); while (nodes_[slot].id != EMPTY) { if (nodes_[slot].id == id) return true; slot = (slot + 1) & mask_; } return false; } void clear() { for (uint32_t i = 0; i < size_; ++i) { nodes_[slot_of_[i]].id = EMPTY; } dense_.clear(); slot_of_.clear(); size_ = 0; } void reserve(uint32_t n) { if (n > (mask_ + 1) * 0.5f) { uint32_t new_cap = 16; while (new_cap < n * 2) new_cap <<= 1; rebuild(new_cap); } dense_.reserve(n); slot_of_.reserve(n); } uint32_t size() const { return size_; } bool empty() const { return size_ == 0; } uint32_t capacity() const { return dense_.capacity(); } const player_id_t* data() const { return dense_.data(); } }; // ==================== 对比:Vector ==================== class VectorSet { std::vector<player_id_t> data_; public: explicit VectorSet(size_t reserve = 1000) { data_.reserve(reserve); } ALWAYS_INLINE void insert(player_id_t id) { data_.push_back(id); } ALWAYS_INLINE bool erase(player_id_t id) { auto it = std::find(data_.begin(), data_.end(), id); if (it == data_.end()) return false; *it = data_.back(); data_.pop_back(); return true; } template<typename Fn> ALWAYS_INLINE void foreach(Fn&& fn) const { for (auto id : data_) fn(id); } ALWAYS_INLINE bool contains(player_id_t id) const { return std::find(data_.begin(), data_.end(), id) != data_.end(); } size_t size() const { return data_.size(); } uint32_t capacity() const { return data_.capacity(); } void clear() { data_.clear(); } void reserve(size_t n) { data_.reserve(n); } }; // ==================== 对比:std::unordered_set ==================== class StdHashSet { std::unordered_set<player_id_t> set_; public: explicit StdHashSet(size_t reserve = 1000) { set_.reserve(reserve * 2); } ALWAYS_INLINE void insert(player_id_t id) { set_.insert(id); } ALWAYS_INLINE bool erase(player_id_t id) { return set_.erase(id) > 0; } template<typename Fn> ALWAYS_INLINE void foreach(Fn&& fn) const { for (auto id : set_) fn(id); } ALWAYS_INLINE bool contains(player_id_t id) const { return set_.find(id) != set_.end(); } size_t size() const { return set_.size(); } void clear() { set_.clear(); } void reserve(size_t n) { set_.reserve(n * 2); } }; // ==================== 测试 ==================== struct Result { double insert_us = 0, foreach_us = 0, erase_us = 0, total_us = 0; double lookup_us = 0; }; class Benchmark { std::vector<player_id_t> ids_, del_, lookup_; int n_; public: Benchmark(int n, int seed) : n_(n) { std::mt19937_64 rng(seed); for (int i = 0; i < n; ++i) { player_id_t id = (rng() % (UINT64_MAX - 2)) + 1; ids_.push_back(id); } del_ = ids_; std::shuffle(del_.begin(), del_.end(), rng); for (int i = 0; i < n; ++i) { if (i % 2 == 0) lookup_.push_back(ids_[i]); else lookup_.push_back((rng() % (UINT64_MAX - 2)) + 1); } std::shuffle(lookup_.begin(), lookup_.end(), rng); } template<typename Set> Result run() { Result r; Set set; set.reserve(n_); auto t0 = high_resolution_clock::now(); for (auto id : ids_) set.insert(id); auto t1 = high_resolution_clock::now(); volatile uint64_t sum = 0; set.foreach([&](player_id_t id) { sum += id; }); auto t2 = high_resolution_clock::now(); volatile bool found = false; for (auto id : lookup_) found = set.contains(id); auto t3 = high_resolution_clock::now(); for (auto id : del_) set.erase(id); auto t4 = high_resolution_clock::now(); r.insert_us = duration<double, std::micro>(t1-t0).count(); r.foreach_us = duration<double, std::micro>(t2-t1).count(); r.lookup_us = duration<double, std::micro>(t3-t2).count(); r.erase_us = duration<double, std::micro>(t4-t3).count(); r.total_us = duration<double, std::micro>(t4-t0).count(); (void)sum; (void)found; return r; } }; int main() { constexpr int ROUNDS = 100; std::cout << "=============================================================\n"; std::cout << "SparseSet vs Vector vs std::unordered_set\n"; std::cout << "只保留最优insert和erase,动态扩容,无内存浪费\n"; std::cout << "=============================================================\n\n"; for (int n : {100, 500, 1000, 2000}) { std::cout << "--- 规模: " << n << " 人 ---\n"; std::vector<Result> vec_res, sp_res, std_res; for (int i = 0; i < ROUNDS; ++i) { Benchmark bm(n, i); vec_res.push_back(bm.run<VectorSet>()); sp_res.push_back(bm.run<SparseSet>()); std_res.push_back(bm.run<StdHashSet>()); } auto avg = [](auto& v, auto f) { double s = 0; for (auto& r : v) s += r.*f; return s / v.size(); }; auto print = [&](const char* name, const std::vector<Result>& res) { std::cout << " " << std::left << std::setw(10) << name << "插入:" << std::fixed << std::setprecision(2) << std::setw(8) << avg(res, &Result::insert_us) << " 遍历:" << std::setw(8) << avg(res, &Result::foreach_us) << " 查找:" << std::setw(8) << avg(res, &Result::lookup_us) << " 删除:" << std::setw(8) << avg(res, &Result::erase_us) << " 总计:" << std::setw(8) << avg(res, &Result::total_us) << " us\n"; }; print("Vector:", vec_res); print("SparseSet:", sp_res); print("StdHash:", std_res); double vec_total = avg(vec_res, &Result::total_us); double sp_total = avg(sp_res, &Result::total_us); std::cout << " SparseSet vs Vector: " << std::setprecision(2) << (vec_total/sp_total) << "x\n\n"; } // 内存效率对比 std::cout << "--- 内存效率 ---\n"; { SparseSet sp; VectorSet vec; for (int i = 0; i < 100; ++i) { sp.insert(i + 1); vec.insert(i + 1); } std::cout << "插入100人:\n"; std::cout << " SparseSet容量: " << sp.capacity() << "\n"; std::cout << " Vector容量: " << vec.capacity() << "\n"; for (int i = 0; i < 90; ++i) { sp.erase(i + 1); vec.erase(i + 1); } std::cout << "删除90人后(剩10人):\n"; std::cout << " SparseSet容量: " << sp.capacity() << "\n"; std::cout << " Vector容量: " << vec.capacity() << "\n"; } return 0; } 极致优化的结果: ============================================================= SparseSet vs Vector vs std::unordered_set 只保留最优insert和erase,动态扩容,无内存浪费 ============================================================= --- 规模: 100 人 --- Vector: 插入:0.10 遍历:0.20 查找:2.94 删除:2.38 总计:5.62 us SparseSet:插入:1.12 遍历:0.11 查找:2.01 删除:0.99 总计:4.23 us StdHash: 插入:3.64 遍历:0.25 查找:2.12 删除:2.46 总计:8.49 us SparseSet vs Vector: 1.33x --- 规模: 500 人 --- Vector: 插入:0.20 遍历:0.25 查找:34.96 删除:15.06 总计:50.46 us SparseSet:插入:4.17 遍历:0.22 查找:5.92 删除:3.43 总计:13.74 us StdHash: 插入:10.28 遍历:0.66 查找:6.36 删除:6.95 总计:24.25 us SparseSet vs Vector: 3.67x --- 规模: 1000 人 --- Vector: 插入:0.34 遍历:0.37 查找:119.32 删除:46.56 总计:166.60 us SparseSet:插入:7.91 遍历:0.36 查找:11.10 删除:6.44 总计:25.81 us StdHash: 插入:18.09 遍历:1.42 查找:12.55 删除:13.38 总计:45.43 us SparseSet vs Vector: 6.45x --- 规模: 2000 人 --- Vector: 插入:0.59 遍历:0.64 查找:444.43 删除:164.40 总计:610.06 us SparseSet:插入:15.59 遍历:0.69 查找:23.29 删除:13.09 总计:52.66 us StdHash: 插入:35.29 遍历:3.54 查找:25.72 删除:27.27 总计:91.82 us SparseSet vs Vector: 11.58x --- 内存效率 --- 插入100人: SparseSet容量: 128 Vector容量: 1000 删除90人后(剩10人): SparseSet容量: 128 Vector容量: 1000 最终稀疏集类头文件: // sparse_set.h // 高性能稀疏集头文件,支持uint64_t、void*等类型 // ygluu, ai #pragma once #include <cstdint> #include <vector> #include <type_traits> #include <cstring> #ifdef __GNUC__ #define SPARSE_ALWAYS_INLINE __attribute__((always_inline)) #define SPARSE_LIKELY(x) __builtin_expect(!!(x), 1) #define SPARSE_UNLIKELY(x) __builtin_expect(!!(x), 0) #define SPARSE_PREFETCH(addr, rw, locality) __builtin_prefetch(addr, rw, locality) #else #define SPARSE_ALWAYS_INLINE #define SPARSE_LIKELY(x) (x) #define SPARSE_UNLIKELY(x) (x) #define SPARSE_PREFETCH(addr, rw, locality) #endif namespace sset { // 类型萃取:获取整数表示类型 template<typename T> struct id_traits { using int_type = T; static constexpr int_type EMPTY = static_cast<int_type>(-1); static SPARSE_ALWAYS_INLINE int_type to_int(T val) { return val; } static SPARSE_ALWAYS_INLINE T from_int(int_type val) { return val; } }; // void*特化 template<> struct id_traits<void*> { using int_type = uintptr_t; static constexpr int_type EMPTY = static_cast<int_type>(-1); static SPARSE_ALWAYS_INLINE int_type to_int(void* val) { return reinterpret_cast<int_type>(val); } static SPARSE_ALWAYS_INLINE void* from_int(int_type val) { return reinterpret_cast<void*>(val); } }; // 其他指针类型特化 template<typename T> struct id_traits<T*> { using int_type = uintptr_t; static constexpr int_type EMPTY = static_cast<int_type>(-1); static SPARSE_ALWAYS_INLINE int_type to_int(T* val) { return reinterpret_cast<int_type>(val); } static SPARSE_ALWAYS_INLINE T* from_int(int_type val) { return reinterpret_cast<T*>(val); } }; // ==================== 稀疏集模板类 ==================== template<typename T> class alignas(64) SparseSet { using traits = id_traits<T>; using int_type = typename traits::int_type; static constexpr int_type EMPTY = traits::EMPTY; struct Node { int_type id = EMPTY; uint32_t dense_idx = 0; }; std::vector<Node> nodes_; std::vector<T> dense_; std::vector<uint32_t> slot_of_; uint32_t mask_ = 0; uint32_t size_ = 0; // 哈希函数(针对uintptr_t优化) static SPARSE_ALWAYS_INLINE uint64_t hash(int_type x) { // 对指针类型做混合,避免低位相同 if constexpr (sizeof(int_type) == 8) { x ^= x >> 33; x *= 0xff51afd7ed558ccdULL; x ^= x >> 33; x *= 0xc4ceb9fe1a85ec53ULL; return x ^ (x >> 33); } else { x ^= x >> 16; x *= 0x45d9f3bULL; x ^= x >> 16; x *= 0x45d9f3bULL; return x ^ (x >> 16); } } SPARSE_ALWAYS_INLINE uint32_t ideal_slot(int_type id) const { return static_cast<uint32_t>(hash(id)) & mask_; } void rebuild(uint32_t new_cap) { std::vector<Node> old_nodes = std::move(nodes_); std::vector<T> old_dense = std::move(dense_); nodes_.assign(new_cap, Node{EMPTY, 0}); mask_ = new_cap - 1; dense_.clear(); dense_.reserve(new_cap >> 1); slot_of_.clear(); slot_of_.reserve(new_cap >> 1); size_ = 0; for (const auto& val : old_dense) { int_type id = traits::to_int(val); if (id != EMPTY) insert_impl(id); } } // 内部插入(使用int_type) SPARSE_ALWAYS_INLINE void insert_impl(int_type id) { uint32_t slot = ideal_slot(id); uint32_t dist = 0; Node insert_node{id, 0}; while (true) { Node& cur = nodes_[slot]; if (cur.id == EMPTY) { uint32_t idx = size_; insert_node.dense_idx = idx; cur = insert_node; dense_.push_back(traits::from_int(id)); slot_of_.push_back(slot); ++size_; return; } if (cur.id == id) return; uint32_t cur_dist = (slot - ideal_slot(cur.id)) & mask_; if (cur_dist < dist) { std::swap(cur, insert_node); dist = cur_dist; } slot = (slot + 1) & mask_; ++dist; } } public: SparseSet() { nodes_.assign(16, Node{EMPTY, 0}); mask_ = 15; dense_.reserve(8); slot_of_.reserve(8); } explicit SparseSet(uint32_t reserve) { uint32_t cap = 16; while (cap < reserve * 2) cap <<= 1; nodes_.assign(cap, Node{EMPTY, 0}); mask_ = cap - 1; dense_.reserve(reserve); slot_of_.reserve(reserve); } // 公开接口:使用原始类型T SPARSE_ALWAYS_INLINE void insert(T val) { int_type id = traits::to_int(val); if (SPARSE_UNLIKELY(id == EMPTY)) return; if (SPARSE_UNLIKELY(size_ >= (mask_ + 1) * 0.75f)) { rebuild((mask_ + 1) << 1); } insert_impl(id); } SPARSE_ALWAYS_INLINE bool erase(T val) { int_type id = traits::to_int(val); if (SPARSE_UNLIKELY(id == EMPTY || size_ == 0)) return false; uint32_t slot = ideal_slot(id); while (nodes_[slot].id != EMPTY) { Node& cur = nodes_[slot]; if (cur.id == id) { uint32_t idx = cur.dense_idx; uint32_t last = size_ - 1; if (idx != last) { T last_val = dense_[last]; uint32_t last_slot = slot_of_[last]; dense_[idx] = last_val; slot_of_[idx] = last_slot; nodes_[last_slot].dense_idx = idx; } dense_.pop_back(); slot_of_.pop_back(); cur.id = EMPTY; --size_; return true; } slot = (slot + 1) & mask_; } return false; } SPARSE_ALWAYS_INLINE bool contains(T val) const { int_type id = traits::to_int(val); if (SPARSE_UNLIKELY(id == EMPTY || size_ == 0)) return false; uint32_t slot = ideal_slot(id); while (nodes_[slot].id != EMPTY) { if (nodes_[slot].id == id) return true; slot = (slot + 1) & mask_; } return false; } template<typename Fn> SPARSE_ALWAYS_INLINE void foreach(Fn&& fn) const { const uint32_t n = size_; if (n == 0) return; const T* d = dense_.data(); uint32_t i = 0; SPARSE_PREFETCH(&d[0], 0, 3); SPARSE_PREFETCH(&d[16], 0, 3); for (; i + 8 <= n; i += 8) { SPARSE_PREFETCH(&d[i + 32], 0, 3); fn(d[i]); fn(d[i+1]); fn(d[i+2]); fn(d[i+3]); fn(d[i+4]); fn(d[i+5]); fn(d[i+6]); fn(d[i+7]); } for (; i < n; ++i) fn(d[i]); } void clear() { for (uint32_t i = 0; i < size_; ++i) { nodes_[slot_of_[i]].id = EMPTY; } dense_.clear(); slot_of_.clear(); size_ = 0; } void reserve(uint32_t n) { if (n > (mask_ + 1) * 0.5f) { uint32_t new_cap = 16; while (new_cap < n * 2) new_cap <<= 1; rebuild(new_cap); } dense_.reserve(n); slot_of_.reserve(n); } uint32_t size() const { return size_; } bool empty() const { return size_ == 0; } uint32_t capacity() const { return dense_.capacity(); } const T* data() const { return dense_.data(); } }; // 常用类型别名 using SparseSetU64 = SparseSet<std::uint64_t>; using SparseSetPtr = SparseSet<void*>; } // namespace