性能分析
Software | Data Structure | Space [MiB] | Insert [ns/key] | Lookup [ns/key] |
---|---|---|---|---|
cedar | Double-array trie | 1173.02 | 631.06 | 50.40 |
cedar ORDERED=false | Double-array prefix trie | 671.66 | 786.02 | 49.99 |
libdatrie 0.2.8 | Double-array prefix trie | n/a | n/a | n/a |
libtrie 0.1.1 | Double-array two-trie | 2756.30 | 8116.16 | 185.85 |
dary | Double-array trie | 1119.04 | 1786.93 | 79.96 |
doar 0.0.13 | Compacted double-array trie | 2285.21 | 17687.60 | 83.41 |
critbit | Crit-bit (patricia) tree | 1457.02 | 1713.69 | 752.49 |
libdict | Splay tree | 1823.12 | 1541.48 | 229.34 |
libdict | Treap | 1823.13 | 1682.26 | 902.43 |
libdict | Skip list | 1852.86 | 1907.25 | 1265.79 |
Andersson tree library | AA tree | 1457.02 | 2100.03 | 337.14 |
C Containers library | Scapegoat tree | 1891.74 | 2380.65 | 254.34 |
tst_vanilla | ternary search tree | 3318.75 | 1109.25 | 129.12 |
Judy 1.0.5 | Judy trie SL | 897.59 | 580.67 | 142.64 |
hat-trie 0.1.0 | HAT-trie | 695.49 | 916.02 | 75.51 |
std::map | Red-black tree | 2506.27 | 1617.60 | 851.33 |
std::unordered_map | Hash table | 2471.60 | 615.30 | 170.41 |
array hash | Array Hash | 1725.56 | 17273.22 | 330.76 |
CMPH 2.0 | Hash table | 2741.03 | 2744.92 | 285.11 |
cpp-btree 1.0.1 | B-tree | 1744.96 | 1749.96 | 1080.04 |
sparsetable 2.0.2 | Sparse hash table | 1685.41 | 2635.32 | 157.63 |
sparsetable 2.0.2 (dense) | Hash table | 2335.04 | 502.66 | 123.3 |
Software | Data Structure | Space [MiB] | Size [MiB] | Build [ns/key] | Lookup [ns/key] |
---|---|---|---|---|---|
cedar | Double-array trie | 832.82 | 816.54 | 183.57 | 38.95 |
cedar ORDERED=false | Double-array prefix trie | 490.59 | 488.35 | 221.87 | 39.07 |
libdatrie 0.2.8 | Double-array prefix trie | 1229.12 | 644.97 | 209955.04 | 124.66 |
libtrie 0.1.1 | Double-array two-trie | 2312.11 | 654.39 | 5401.59 | 181.95 |
dary | Double-array trie | 897.75 | 895.54 | 51144.92 | 57.90 |
doar 0.0.13 | Compacted double-array trie | 1937.25 | 334.59 | 990.51 | 48.00 |
Darts 0.32 | Double-array trie | 4306.02 | 858.93 | 2387.87 | 40.89 |
Darts-clone 0.32g | Directed-acyclic word graph | 2311.39 | 409.17 | 1339.14 | 36.39 |
Darts-clone 0.32e5 | Compacted double-array trie | 2779.10 | 309.31 | 1011.92 | 59.42 |
DASTrie 1.0 | Compacted double-array trie | 2626.16 | 383.37 | 92634.88 | 85.02 |
tx-trie 0.18 | LOUDS trie | 1791.10 | 113.11 | 626.90 | 972.32 |
ux-trie 0.1.9 | LOUDS two-trie | 2223.80 | 92.39 | 1229.11 | 1975.28 |
marisa-trie 0.2.4 | LOUDS nested patricia trie | 2036.49 | 87.27 | 698.76 | 194.87 |
使用
template <typename value_type,
const int NO_VALUE = nan<value_type>::N1,
const int NO_PATH = nan<value_type>::N2,
const bool ORDERED = true,
const int MAX_TRIAL = 1,
const size_t NUM_TRACKING_NODES = >
class da;
NO_VALUE的值是-1,NO_PATH的值是-2
因为其他的模版参数都有默认值,一般只特化value_type
即可。
cedar::da<int> trie;
trie.update("hello", strlen("hello"), 1);
接口
template <...>
class da {
size_t capacity() const;
size_t size() const;
size_t total_size() const;
size_t unit_size() const;
size_t nonzero_size() const; // warning: O(size)
size_t num_keys() const; // warning: O(size)
template <typename T>
T exactMatchSearch(const char* key) const;
template <typename T>
T exactMatchSearch(const char* key, size_t len, size_t from=) const;
template <typename T>
size_t commonPrefixSearch(const char* str, T* result, size_t result_len) const;
template <typename T>
size_t commonPrefixSearch(const char* str, T* result, size_t result_len, size_t len,
size_t from=) const;
template <typename T>
size_t commonPrefixPredict(const char* str, T* result, size_t result_len);
template <typename T>
size_t commonPrefixPredict(const char* str, T* result, size_t result_len, size_t len,
size_t from = );
void suffix(char* key, size_t len, size_t to) const;
value_type traverse(const char* key, size_t& from, size_t& pos) const;
value_type traverse(const char* key, size_t& from, size_t& pos, size_t end_pos) const;
value_type& update(const char* key);
value_type& update(const char* key, size_t len, value_type val=value_type());
value_type& update(const char* key, size_t& from, size_t& pos, size_t len,
value_type val=value_type());
template <typename T>
value_type& update(const char* key, size_t& from, size_t& pos, size_t len,
value_type val, T& cf)
int erase(const char* key);
int erase(const char* key, size_t len, size_t from = );
void erase(size_t from);
int build(size_t num, const char** key, const size_t* len = , const value_type* val = );
template <typename T>
void dump(T* result, const size_t result_len);
int save(const char* fn, const char* mode = "wb") const;
int open(const char* fn, const char* mode = "rb",
const size_t offset = , size_t size_ = );
void restore()
void set_array(void* p, size_t size_ = );
const void* array() const;
void clear(const bool reuse = true);
int begin(size_t& from, size_t& len);
int next(size_t& from, size_t& len, const size_t root=);
void test(const size_t from=) const;
};
update
value_type& update(const char* key);
// update(key, from=, len=strlen(key), val=)
value_type& update(const char* key, size_t len, value_type val=value_type());
// update(key, from=, len, val)
value_type& update(const char* key, size_t& from, size_t& pos, size_t len,
value_type val=value_type());
erase
int erase(const char* key);
int erase(const char* key, size_t len, size_t from = );
void erase(size_t from);
build
int build(size_t num, const char** key, const size_t* len=NULL,
const value_type* val=NULL);
exactMatchSearch
template <typename T>
T exactMatchSearch(const char* key) const;
// exactMatchSearch(key, len=strlen(key), from=0);
template <typename T>
T exactMatchSearch(const char* key, size_t len, size_t from=0) const;
commonPrefixSearch
template <typename T>
size_t commonPrefixSearch(const char* str, T* result, size_t result_len) const;
// commonPrefixSearch(str, result, result_len, len=strlen(key), from=0);
template <typename T>
size_t commonPrefixSearch(const char* str, T* result, size_t result_len, size_t len,
size_t from=0) const;
commonPrefixPredict
template <typename T>
size_t commonPrefixPredict(const char* str, T* result, size_t result_len);
// commonPrefixPredict(str, result, result_len, len=strlen(key), from=0);
template <typename T>
size_t commonPrefixPredict(const char* str, T* result, size_t result_len, size_t len,
size_t from = 0);
traverse
value_type traverse(const char* key, size_t& from, size_t& pos) const;
// traverse(key, form, pos, end_pos=strlen(key))
value_type traverse(const char* key, size_t& from, size_t& pos, size_t end_pos) const;
trie中重要的函数,可以灵活的查找trie
重点是依据返回值来判定traverse的结果
如果返回NO_VALUE(N1),说明有key的前缀是当前[pos, end_pos)子串,但没有匹配。
如果返回NO_PATH(N2),说明当前子串对应的路径在trie中不存在。
如果返回其他值,说明当前子串对应表示的key恰好在trie中。
来源 https://blog.csdn.net/weixin_34111819/article/details/89444083?ops_request_misc=&request_id=&biz_id=102&utm_term=Cedar&utm_medium=distribute.pc_search_result.none-task-blog-2~all~sobaiduweb~default-1-89444083.142^v7^control,157^v4^new_style&spm=1018.2226.3001.4187