hash32/
murmur3.rs

1use core::{mem, slice};
2
3use byteorder::{ByteOrder, LE};
4
5/// 32-bit MurmurHash3 hasher
6pub struct Hasher {
7    buf: Buffer,
8    index: Index,
9    processed: u32,
10    state: State,
11}
12
13struct State(u32);
14
15#[derive(Clone, Copy)]
16#[repr(align(4))]
17struct Buffer {
18    bytes: [u8; 4],
19}
20
21#[derive(Clone, Copy, PartialEq)]
22enum Index {
23    _0,
24    _1,
25    _2,
26    _3,
27}
28
29impl Index {
30    fn usize(&self) -> usize {
31        match *self {
32            Index::_0 => 0,
33            Index::_1 => 1,
34            Index::_2 => 2,
35            Index::_3 => 3,
36        }
37    }
38}
39
40impl From<usize> for Index {
41    fn from(x: usize) -> Self {
42        match x % 4 {
43            0 => Index::_0,
44            1 => Index::_1,
45            2 => Index::_2,
46            3 => Index::_3,
47            _ => unreachable!(),
48        }
49    }
50}
51
52impl Hasher {
53    fn push(&mut self, buf: &[u8]) {
54        let start = self.index.usize();
55        let len = buf.len();
56        // NOTE(unsafe) avoid calling `memcpy` on a 0-3 byte copy
57        // self.buf.bytes[start..start+len].copy_from(buf);
58        for i in 0..len {
59            unsafe {
60                *self.buf.bytes.get_unchecked_mut(start + i) = *buf.get_unchecked(i);
61            }
62        }
63        self.index = Index::from(start + len);
64    }
65}
66
67impl Default for Hasher {
68    #[allow(deprecated)]
69    fn default() -> Self {
70        Hasher {
71            buf: unsafe { mem::uninitialized() },
72            index: Index::_0,
73            processed: 0,
74            state: State(0),
75        }
76    }
77}
78
79impl ::Hasher for Hasher {
80    fn finish(&self) -> u32 {
81        // tail
82        let mut state = match self.index {
83            Index::_3 => {
84                let mut block = 0;
85                block ^= u32::from(self.buf.bytes[2]) << 16;
86                block ^= u32::from(self.buf.bytes[1]) << 8;
87                block ^= u32::from(self.buf.bytes[0]);
88                self.state.0 ^ pre_mix(block)
89            }
90            Index::_2 => {
91                let mut block = 0;
92                block ^= u32::from(self.buf.bytes[1]) << 8;
93                block ^= u32::from(self.buf.bytes[0]);
94                self.state.0 ^ pre_mix(block)
95            }
96            Index::_1 => {
97                let mut block = 0;
98                block ^= u32::from(self.buf.bytes[0]);
99                self.state.0 ^ pre_mix(block)
100            }
101            Index::_0 => self.state.0,
102        };
103
104        // finalization mix
105        state ^= self.processed;
106        state ^= state >> 16;
107        state = state.wrapping_mul(0x85ebca6b);
108        state ^= state >> 13;
109        state = state.wrapping_mul(0xc2b2ae35);
110        state ^= state >> 16;
111
112        state
113    }
114
115    #[inline]
116    fn write(&mut self, bytes: &[u8]) {
117        let len = bytes.len();
118        self.processed += len as u32;
119
120        let body = if self.index == Index::_0 {
121            bytes
122        } else {
123            let index = self.index.usize();
124            if len + index >= 4 {
125                // we can complete a block using the data left in the buffer
126                // NOTE(unsafe) avoid panicking branch (`slice_index_len_fail`)
127                // let (head, body) = bytes.split_at(4 - index);
128                let mid = 4 - index;
129                let head = unsafe { slice::from_raw_parts(bytes.as_ptr(), mid) };
130                let body = unsafe {
131                    slice::from_raw_parts(bytes.as_ptr().offset(mid as isize), len - mid)
132                };
133
134                // NOTE(unsafe) avoid calling `memcpy` on a 0-3 byte copy
135                // self.buf.bytes[index..].copy_from_slice(head);
136                for i in 0..4 - index {
137                    unsafe {
138                        *self.buf.bytes.get_unchecked_mut(index + i) = *head.get_unchecked(i);
139                    }
140                }
141
142                self.index = Index::_0;
143
144                self.state.process_block(&self.buf.bytes);
145
146                body
147            } else {
148                bytes
149            }
150        };
151
152        for block in body.chunks(4) {
153            if block.len() == 4 {
154                self.state
155                    .process_block(unsafe { &*(block.as_ptr() as *const _) });
156            } else {
157                self.push(block);
158            }
159        }
160
161        // XXX is this faster?
162        // for block in body.exact_chunks(4) {
163        //     self.state
164        //         .process_block(unsafe { &*(block.as_ptr() as *const _) });
165        // }
166
167        // let tail = body.split_at(body.len() / 4 * 4).1;
168
169        // self.push(tail);
170    }
171}
172
173const C1: u32 = 0xcc9e2d51;
174const C2: u32 = 0x1b873593;
175const R1: u32 = 15;
176
177impl State {
178    fn process_block(&mut self, block: &[u8; 4]) {
179        self.0 ^= pre_mix(LE::read_u32(block));
180        self.0 = self.0.rotate_left(13);
181        self.0 = 5u32.wrapping_mul(self.0).wrapping_add(0xe6546b64);
182    }
183}
184
185fn pre_mix(mut block: u32) -> u32 {
186    block = block.wrapping_mul(C1);
187    block = block.rotate_left(R1);
188    block = block.wrapping_mul(C2);
189    block
190}