Skip to main content

maxminddb/
reader.rs

1//! MaxMind DB reader implementation.
2
3use std::collections::HashSet;
4use std::fs;
5use std::net::IpAddr;
6use std::path::Path;
7
8use ipnetwork::IpNetwork;
9use serde::Deserialize;
10
11#[cfg(feature = "mmap")]
12pub use memmap2::Mmap;
13#[cfg(feature = "mmap")]
14use memmap2::MmapOptions;
15#[cfg(feature = "mmap")]
16use std::fs::File;
17
18use crate::decoder;
19use crate::error::MaxMindDbError;
20use crate::metadata::Metadata;
21use crate::result::{LookupResult, LookupSource, NetworkKind};
22use crate::within::{IpInt, Within, WithinNode, WithinOptions};
23
24/// Size of the data section separator (16 zero bytes).
25const DATA_SECTION_SEPARATOR_SIZE: usize = 16;
26const METADATA_START_MARKER: &[u8] = b"\xab\xcd\xefMaxMind.com";
27
28/// A reader for the MaxMind DB format. The lifetime `'data` is tied to the
29/// lifetime of the underlying buffer holding the contents of the database file.
30///
31/// The `Reader` supports both file-based and memory-mapped access to MaxMind
32/// DB files, including GeoIP2 and GeoLite2 databases.
33///
34/// # Features
35///
36/// - **`mmap`**: Enable memory-mapped file access for better performance
37/// - **`simdutf8`**: Use SIMD-accelerated UTF-8 validation (faster string
38///   decoding)
39/// - **`unsafe-str-decode`**: Skip UTF-8 validation entirely (unsafe, but
40///   ~20% faster)
41pub struct Reader<S: AsRef<[u8]>> {
42    pub(crate) buf: S,
43    /// Database metadata.
44    metadata: Metadata,
45    record_size: u16,
46    /// Cached `Metadata::node_count` for `Reader` search-tree traversal.
47    /// Use this instead of `metadata.node_count` for traversal invariants.
48    node_count: usize,
49    /// Cached bytes per node derived from `Metadata::record_size` for `Reader`.
50    /// Use this instead of `metadata.record_size` in lookup hot paths.
51    node_byte_size: usize,
52    pub(crate) ipv4_start: usize,
53    /// Bit depth at which ipv4_start was found (0-96). Used to calculate
54    /// correct prefix lengths for IPv4 lookups in IPv6 databases.
55    pub(crate) ipv4_start_bit_depth: usize,
56    pub(crate) pointer_base: usize,
57    pub(crate) data_section_len: usize,
58    pub(crate) metadata_start: usize,
59}
60
61impl<S: AsRef<[u8]>> std::fmt::Debug for Reader<S> {
62    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
63        f.debug_struct("Reader")
64            .field("buf_len", &self.buf.as_ref().len())
65            .field("metadata", &self.metadata)
66            .field("ipv4_start", &self.ipv4_start)
67            .field("ipv4_start_bit_depth", &self.ipv4_start_bit_depth)
68            .field("pointer_base", &self.pointer_base)
69            .field("data_section_len", &self.data_section_len)
70            .field("metadata_start", &self.metadata_start)
71            .finish_non_exhaustive()
72    }
73}
74
75#[cfg(feature = "mmap")]
76impl Reader<Mmap> {
77    /// Open a MaxMind DB database file by memory mapping it.
78    ///
79    /// # Safety
80    ///
81    /// The caller must ensure that the database file is not modified or
82    /// truncated while the `Reader` exists. Modifying or truncating the
83    /// file while it is memory-mapped will result in undefined behavior.
84    ///
85    /// # Example
86    ///
87    /// ```
88    /// # #[cfg(feature = "mmap")]
89    /// # {
90    /// // SAFETY: The database file will not be modified while the reader exists.
91    /// let reader = unsafe {
92    ///     maxminddb::Reader::open_mmap("test-data/test-data/GeoIP2-City-Test.mmdb")
93    /// }.unwrap();
94    /// # }
95    /// ```
96    pub unsafe fn open_mmap<P: AsRef<Path>>(database: P) -> Result<Reader<Mmap>, MaxMindDbError> {
97        let file_read = File::open(database)?;
98        let mmap = MmapOptions::new()
99            .map(&file_read)
100            .map_err(MaxMindDbError::Mmap)?;
101        Reader::from_source(mmap)
102    }
103}
104
105impl Reader<Vec<u8>> {
106    /// Open a MaxMind DB database file by loading it into memory.
107    ///
108    /// # Example
109    ///
110    /// ```
111    /// let reader = maxminddb::Reader::open_readfile(
112    ///     "test-data/test-data/GeoIP2-City-Test.mmdb").unwrap();
113    /// ```
114    pub fn open_readfile<P: AsRef<Path>>(database: P) -> Result<Reader<Vec<u8>>, MaxMindDbError> {
115        let buf: Vec<u8> = fs::read(&database)?; // IO error converted via #[from]
116        Reader::from_source(buf)
117    }
118}
119
120impl<'de, S: AsRef<[u8]>> Reader<S> {
121    /// Open a MaxMind DB database from anything that implements AsRef<[u8]>
122    ///
123    /// # Example
124    ///
125    /// ```
126    /// use std::fs;
127    /// let buf = fs::read("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap();
128    /// let reader = maxminddb::Reader::from_source(buf).unwrap();
129    /// ```
130    pub fn from_source(buf: S) -> Result<Reader<S>, MaxMindDbError> {
131        let metadata_start = find_metadata_start(buf.as_ref())?;
132        // find_metadata_start returns the offset after the marker; the marker
133        // bytes are not part of the data section and must stay out of limits.
134        let data_section_end = metadata_marker_start(metadata_start)?;
135        let mut type_decoder = decoder::Decoder::new(&buf.as_ref()[metadata_start..], 0);
136        let metadata = Metadata::deserialize(&mut type_decoder)?;
137        validate_metadata_for_reader(&metadata)?;
138
139        let search_tree_size =
140            search_tree_size_bytes(metadata.node_count as usize, metadata.record_size as usize)?;
141        let record_size = metadata.record_size;
142        let node_count = metadata.node_count as usize;
143        let node_byte_size = record_size as usize / 4;
144        let pointer_base = search_tree_size
145            .checked_add(DATA_SECTION_SEPARATOR_SIZE)
146            .ok_or_else(|| {
147                MaxMindDbError::invalid_database(
148                    "the MaxMind DB file's search tree extends beyond the file",
149                )
150            })?;
151        validate_search_tree_layout(pointer_base, data_section_end)?;
152        let data_section_len = data_section_end - pointer_base;
153
154        let mut reader = Reader {
155            buf,
156            record_size,
157            node_count,
158            node_byte_size,
159            pointer_base,
160            data_section_len,
161            metadata_start,
162            metadata,
163            ipv4_start: 0,
164            ipv4_start_bit_depth: 0,
165        };
166        let (ipv4_start, ipv4_start_bit_depth) = reader.find_ipv4_start();
167        reader.ipv4_start = ipv4_start;
168        reader.ipv4_start_bit_depth = ipv4_start_bit_depth;
169
170        Ok(reader)
171    }
172
173    /// Returns database metadata.
174    ///
175    /// Metadata is validated when the reader is created and exposed by
176    /// reference so it cannot be mutated independently of cached reader state.
177    #[inline]
178    pub fn metadata(&self) -> &Metadata {
179        &self.metadata
180    }
181
182    /// Lookup an IP address in the database.
183    ///
184    /// Returns a [`LookupResult`] that can be used to:
185    /// - Check if data exists with [`has_data()`](LookupResult::has_data)
186    /// - Get the network containing the IP with [`network()`](LookupResult::network)
187    /// - Decode the full record with [`decode()`](LookupResult::decode)
188    /// - Decode a specific path with [`decode_path()`](LookupResult::decode_path)
189    ///
190    /// # Examples
191    ///
192    /// Basic city lookup:
193    /// ```
194    /// # use maxminddb::geoip2;
195    /// # use std::net::IpAddr;
196    /// # fn main() -> Result<(), maxminddb::MaxMindDbError> {
197    /// let reader = maxminddb::Reader::open_readfile(
198    ///     "test-data/test-data/GeoIP2-City-Test.mmdb")?;
199    ///
200    /// let ip: IpAddr = "89.160.20.128".parse().unwrap();
201    /// let result = reader.lookup(ip)?;
202    ///
203    /// if let Some(city) = result.decode::<geoip2::City>()? {
204    ///     // Access nested structs directly - no Option unwrapping needed
205    ///     if let Some(name) = city.city.names.english {
206    ///         println!("City: {}", name);
207    ///     }
208    /// } else {
209    ///     println!("No data found for IP {}", ip);
210    /// }
211    /// # Ok(())
212    /// # }
213    /// ```
214    ///
215    /// Selective field access:
216    /// ```
217    /// # use maxminddb::{path, Reader};
218    /// # use std::net::IpAddr;
219    /// # fn main() -> Result<(), maxminddb::MaxMindDbError> {
220    /// let reader = Reader::open_readfile(
221    ///     "test-data/test-data/GeoIP2-City-Test.mmdb")?;
222    /// let ip: IpAddr = "89.160.20.128".parse().unwrap();
223    ///
224    /// let result = reader.lookup(ip)?;
225    /// let country_code: Option<String> = result.decode_path(&path!["country", "iso_code"])?;
226    ///
227    /// println!("Country: {:?}", country_code);
228    /// # Ok(())
229    /// # }
230    /// ```
231    pub fn lookup(&'de self, address: IpAddr) -> Result<LookupResult<'de, S>, MaxMindDbError> {
232        match address {
233            IpAddr::V4(v4) => {
234                let (pointer, prefix_len) = self.find_address_in_tree_v4(v4.into());
235
236                // For IPv4 addresses in IPv6 databases, adjust prefix_len to reflect
237                // the actual bit depth in the tree. The ipv4_start_bit_depth tells us
238                // how deep in the IPv6 tree we were when we found the IPv4 subtree.
239                let prefix_len = if self.metadata.ip_version == 6 {
240                    self.ipv4_start_bit_depth + prefix_len
241                } else {
242                    prefix_len
243                };
244
245                self.lookup_result(pointer, prefix_len as u8, address)
246            }
247            IpAddr::V6(v6) => {
248                if self.metadata.ip_version == 4 {
249                    return Err(MaxMindDbError::invalid_input(
250                        "cannot look up IPv6 address in IPv4-only database",
251                    ));
252                }
253
254                let (pointer, prefix_len) = self.find_address_in_tree_v6(v6.into());
255                self.lookup_result(pointer, prefix_len as u8, address)
256            }
257        }
258    }
259
260    /// Iterate over all networks in the database.
261    ///
262    /// This is a convenience method equivalent to calling [`within()`](Self::within)
263    /// with `0.0.0.0/0` for IPv4-only databases or `::/0` for IPv6 databases.
264    ///
265    /// # Arguments
266    ///
267    /// * `options` - Controls which networks are yielded. Use [`Default::default()`]
268    ///   for standard behavior.
269    ///
270    /// # Examples
271    ///
272    /// Iterate over all networks with default options:
273    /// ```
274    /// use maxminddb::{geoip2, Reader};
275    ///
276    /// let reader = Reader::open_readfile(
277    ///     "test-data/test-data/GeoIP2-City-Test.mmdb").unwrap();
278    ///
279    /// let mut count = 0;
280    /// for result in reader.networks(Default::default()).unwrap() {
281    ///     let lookup = result.unwrap();
282    ///     count += 1;
283    ///     if count >= 10 { break; }
284    /// }
285    /// ```
286    pub fn networks(&'de self, options: WithinOptions) -> Result<Within<'de, S>, MaxMindDbError> {
287        let cidr = if self.metadata.ip_version == 6 {
288            IpNetwork::V6("::/0".parse().unwrap())
289        } else {
290            IpNetwork::V4("0.0.0.0/0".parse().unwrap())
291        };
292        self.within(cidr, options)
293    }
294
295    /// Iterate over IP networks within a CIDR range.
296    ///
297    /// Returns an iterator that yields [`LookupResult`] for each network in the
298    /// database that falls within the specified CIDR range.
299    ///
300    /// # Arguments
301    ///
302    /// * `cidr` - The CIDR range to iterate over.
303    /// * `options` - Controls which networks are yielded. Use [`Default::default()`]
304    ///   for standard behavior (skip aliases, skip networks without data, include
305    ///   empty values).
306    ///
307    /// # Examples
308    ///
309    /// Iterate over all IPv4 networks:
310    /// ```
311    /// use ipnetwork::IpNetwork;
312    /// use maxminddb::{geoip2, Reader};
313    ///
314    /// let reader = Reader::open_readfile(
315    ///     "test-data/test-data/GeoIP2-City-Test.mmdb").unwrap();
316    ///
317    /// let ipv4_all = IpNetwork::V4("0.0.0.0/0".parse().unwrap());
318    /// let mut count = 0;
319    /// for result in reader.within(ipv4_all, Default::default()).unwrap() {
320    ///     let lookup = result.unwrap();
321    ///     let network = lookup.network().unwrap();
322    ///     let city: geoip2::City = lookup.decode().unwrap().unwrap();
323    ///     let city_name = city.city.names.english;
324    ///     println!("Network: {}, City: {:?}", network, city_name);
325    ///     count += 1;
326    ///     if count >= 10 { break; } // Limit output for example
327    /// }
328    /// ```
329    ///
330    /// Search within a specific subnet:
331    /// ```
332    /// use ipnetwork::IpNetwork;
333    /// use maxminddb::{geoip2, Reader};
334    ///
335    /// let reader = Reader::open_readfile(
336    ///     "test-data/test-data/GeoIP2-City-Test.mmdb").unwrap();
337    ///
338    /// let subnet = IpNetwork::V4("192.168.0.0/16".parse().unwrap());
339    /// for result in reader.within(subnet, Default::default()).unwrap() {
340    ///     match result {
341    ///         Ok(lookup) => {
342    ///             let network = lookup.network().unwrap();
343    ///             println!("Found: {}", network);
344    ///         }
345    ///         Err(e) => eprintln!("Error: {}", e),
346    ///     }
347    /// }
348    /// ```
349    ///
350    /// Include networks without data:
351    /// ```
352    /// use ipnetwork::IpNetwork;
353    /// use maxminddb::{Reader, WithinOptions};
354    ///
355    /// let reader = Reader::open_readfile(
356    ///     "test-data/test-data/MaxMind-DB-test-mixed-24.mmdb").unwrap();
357    ///
358    /// let opts = WithinOptions::default().include_networks_without_data();
359    /// for result in reader.within("1.0.0.0/8".parse().unwrap(), opts).unwrap() {
360    ///     let lookup = result.unwrap();
361    ///     if !lookup.has_data() {
362    ///         println!("Network {} has no data", lookup.network().unwrap());
363    ///     }
364    /// }
365    /// ```
366    pub fn within(
367        &'de self,
368        cidr: IpNetwork,
369        options: WithinOptions,
370    ) -> Result<Within<'de, S>, MaxMindDbError> {
371        if self.metadata.ip_version == 4 && matches!(cidr, IpNetwork::V6(_)) {
372            return Err(MaxMindDbError::invalid_input(
373                "cannot iterate IPv6 network in IPv4-only database",
374            ));
375        }
376        let ip_address = cidr.network();
377        let prefix_len = cidr.prefix() as usize;
378        let ip_int = IpInt::new(ip_address);
379        let bit_count = ip_int.bit_count();
380
381        let mut node = self.start_node(bit_count);
382        let node_count = self.node_count;
383        let has_ipv4_subtree = self.has_ipv4_subtree();
384
385        let mut stack: Vec<WithinNode> = Vec::with_capacity(bit_count - prefix_len);
386
387        // `bit_count == 32` means the caller requested an IPv4 CIDR. In an
388        // IPv6 database with no IPv4 subtree, `start_node(32)` can already be a
389        // terminal IPv6 record reached by walking the all-zero prefix. Do not
390        // read that terminal value as a tree node; yield the containing IPv6
391        // network instead, matching lookup behavior.
392        if bit_count == 32
393            && self.metadata.ip_version == 6
394            && !has_ipv4_subtree
395            && node >= node_count
396        {
397            stack.push(WithinNode {
398                node,
399                ip_int: IpInt::V6(0),
400                prefix_len: self.ipv4_start_bit_depth,
401            });
402
403            return Ok(Within {
404                reader: self,
405                node_count,
406                has_ipv4_subtree,
407                stack,
408                options,
409            });
410        }
411
412        // Traverse down the tree to the level that matches the cidr mark
413        let mut depth = 0_usize;
414        for i in 0..prefix_len {
415            // `read_node` is only valid for internal search-tree nodes.
416            if node >= node_count {
417                // We've hit a data node or dead end before we exhausted our prefix.
418                // This means the requested CIDR is contained in a single record.
419                break;
420            }
421
422            let bit = ip_int.get_bit(i);
423            node = self.read_node(node, bit as usize);
424            depth = i + 1; // We've now traversed i+1 bits (bits 0 through i)
425
426            if node >= node_count {
427                // We've hit a data node or dead end before we exhausted our prefix.
428                // This means the requested CIDR is contained in a single record.
429                break;
430            }
431        }
432
433        // Always push the node - it could be:
434        // - A data node (> node_count): will be yielded as a single record
435        // - The empty node (== node_count): will be skipped unless include_networks_without_data
436        // - An internal node (< node_count): will be traversed to find all contained records
437        stack.push(WithinNode {
438            node,
439            ip_int,
440            prefix_len: depth,
441        });
442
443        let within = Within {
444            reader: self,
445            node_count,
446            has_ipv4_subtree,
447            stack,
448            options,
449        };
450
451        Ok(within)
452    }
453
454    // Pointer 0 means "not found" because normalize_lookup_result collapses both
455    // the placeholder empty node (`node == node_count`) and an unfinished internal
456    // terminal (`node < node_count`, i.e. bits exhausted while still on a tree
457    // node) into 0, so neither path reaches resolve_data_pointer with a non-data
458    // value.
459    #[inline(always)]
460    fn lookup_result(
461        &'de self,
462        pointer: usize,
463        prefix_len: u8,
464        address: IpAddr,
465    ) -> Result<LookupResult<'de, S>, MaxMindDbError> {
466        let network_kind = match address {
467            IpAddr::V4(_) if self.metadata.ip_version == 6 && self.has_ipv4_subtree() => {
468                NetworkKind::V4InV6Subtree
469            }
470            IpAddr::V4(_) if self.metadata.ip_version == 6 => NetworkKind::V6,
471            IpAddr::V4(_) => NetworkKind::V4,
472            IpAddr::V6(_) => NetworkKind::V6,
473        };
474        if pointer == 0 {
475            Ok(LookupResult::new_not_found(
476                self,
477                prefix_len,
478                address,
479                LookupSource::Lookup,
480                network_kind,
481            ))
482        } else {
483            let data_offset = self.resolve_data_pointer(pointer)?;
484            Ok(LookupResult::new_found(
485                self,
486                data_offset,
487                prefix_len,
488                address,
489                LookupSource::Lookup,
490                network_kind,
491            ))
492        }
493    }
494
495    #[inline(always)]
496    fn find_address_in_tree_v4(&self, ip: u32) -> (usize, usize) {
497        let buf = self.buf.as_ref();
498        let node_count = self.node_count;
499
500        match self.record_size {
501            24 => find_address_in_tree_v4::<RecordSize24>(buf, self.ipv4_start, node_count, ip),
502            28 => find_address_in_tree_v4::<RecordSize28>(buf, self.ipv4_start, node_count, ip),
503            32 => find_address_in_tree_v4::<RecordSize32>(buf, self.ipv4_start, node_count, ip),
504            _ => unreachable!("record_size is validated in Reader::from_source"),
505        }
506    }
507
508    #[inline(always)]
509    fn find_address_in_tree_v6(&self, ip: u128) -> (usize, usize) {
510        let buf = self.buf.as_ref();
511        let node_count = self.node_count;
512
513        match self.record_size {
514            24 => find_address_in_tree_v6::<RecordSize24>(buf, node_count, ip),
515            28 => find_address_in_tree_v6::<RecordSize28>(buf, node_count, ip),
516            32 => find_address_in_tree_v6::<RecordSize32>(buf, node_count, ip),
517            _ => unreachable!("record_size is validated in Reader::from_source"),
518        }
519    }
520
521    #[inline]
522    fn start_node(&self, length: usize) -> usize {
523        if length == 128 {
524            0
525        } else {
526            self.ipv4_start
527        }
528    }
529
530    #[inline]
531    pub(crate) fn has_ipv4_subtree(&self) -> bool {
532        self.metadata.ip_version == 6 && self.ipv4_start < self.node_count
533    }
534
535    /// Find the IPv4 start node and the bit depth at which it was found.
536    /// Returns (node, depth) where depth is how far into the tree we traversed.
537    fn find_ipv4_start(&self) -> (usize, usize) {
538        if self.metadata.ip_version != 6 {
539            return (0, 0);
540        }
541
542        // We are looking up an IPv4 address in an IPv6 tree. Skip over the
543        // first 96 nodes.
544        let mut node: usize = 0;
545        let mut depth: usize = 0;
546        for i in 0_u8..96 {
547            if node >= self.node_count {
548                depth = i as usize;
549                break;
550            }
551            node = self.read_node(node, 0);
552            depth = (i + 1) as usize;
553        }
554        (node, depth)
555    }
556
557    #[inline(always)]
558    pub(crate) fn read_node(&self, node_number: usize, index: usize) -> usize {
559        let buf = self.buf.as_ref();
560        let base_offset = node_number * self.node_byte_size;
561
562        match self.record_size {
563            24 => {
564                let offset = base_offset + index * 3;
565                (buf[offset] as usize) << 16
566                    | (buf[offset + 1] as usize) << 8
567                    | buf[offset + 2] as usize
568            }
569            28 => {
570                let middle = if index != 0 {
571                    buf[base_offset + 3] & 0x0F
572                } else {
573                    (buf[base_offset + 3] & 0xF0) >> 4
574                };
575                let offset = base_offset + index * 4;
576                (middle as usize) << 24
577                    | (buf[offset] as usize) << 16
578                    | (buf[offset + 1] as usize) << 8
579                    | buf[offset + 2] as usize
580            }
581            32 => {
582                let offset = base_offset + index * 4;
583                (buf[offset] as usize) << 24
584                    | (buf[offset + 1] as usize) << 16
585                    | (buf[offset + 2] as usize) << 8
586                    | buf[offset + 3] as usize
587            }
588            _ => unreachable!("record_size is validated in Reader::from_source"),
589        }
590    }
591
592    /// Resolves a pointer from the search tree to an offset in the data section.
593    #[inline]
594    pub(crate) fn resolve_data_pointer(&self, pointer: usize) -> Result<usize, MaxMindDbError> {
595        let resolved = pointer
596            .checked_sub(self.node_count)
597            .and_then(|p| p.checked_sub(DATA_SECTION_SEPARATOR_SIZE))
598            .ok_or_else(|| {
599                MaxMindDbError::invalid_database(
600                    "the MaxMind DB file's data pointer resolves to an invalid location",
601                )
602            })?;
603        // Reject offsets at or beyond the marker-excluding data section length.
604        if resolved >= self.data_section_len {
605            return Err(MaxMindDbError::invalid_database(
606                "the MaxMind DB file's data pointer resolves to an invalid location",
607            ));
608        }
609
610        Ok(resolved)
611    }
612
613    /// Performs comprehensive validation of the MaxMind DB file.
614    ///
615    /// This method validates:
616    /// - Metadata section: format versions, required fields, and value constraints
617    /// - Search tree: traverses all networks to verify tree structure integrity
618    /// - Data section separator: validates the 16-byte separator between tree and data
619    /// - Data section: verifies all data records referenced by the search tree
620    ///
621    /// The verifier is stricter than the MaxMind DB specification and may return
622    /// errors on some databases that are still readable by normal operations.
623    /// This method is useful for:
624    /// - Validating database files after download or generation
625    /// - Debugging database corruption issues
626    /// - Ensuring database integrity in critical applications
627    ///
628    /// Note: Verification traverses the entire database and may be slow on large files.
629    /// The method is thread-safe and can be called on an active Reader.
630    ///
631    /// # Example
632    ///
633    /// ```
634    /// use maxminddb::Reader;
635    ///
636    /// let reader = Reader::open_readfile("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap();
637    /// reader.verify().expect("Database should be valid");
638    /// ```
639    pub fn verify(&self) -> Result<(), MaxMindDbError> {
640        let metadata_start = find_metadata_start(self.buf.as_ref())?;
641        let data_section_end = metadata_marker_start(metadata_start)?;
642        self.verify_metadata(data_section_end)?;
643        self.verify_database(data_section_end)
644    }
645
646    fn verify_metadata(&self, data_section_end: usize) -> Result<(), MaxMindDbError> {
647        let m = &self.metadata;
648
649        validate_metadata_for_reader(m)?;
650        if m.database_type.is_empty() {
651            return Err(MaxMindDbError::invalid_database(
652                "database_type - Expected: non-empty string Actual: \"\"",
653            ));
654        }
655        if m.description.is_empty() {
656            return Err(MaxMindDbError::invalid_database(
657                "description - Expected: non-empty map Actual: {}",
658            ));
659        }
660        validate_search_tree_layout(self.pointer_base, data_section_end)?;
661        Ok(())
662    }
663
664    fn verify_database(&self, data_section_end: usize) -> Result<(), MaxMindDbError> {
665        let offsets = self.verify_search_tree()?;
666        self.verify_data_section_separator()?;
667        self.verify_data_section(offsets, data_section_end)
668    }
669
670    fn verify_search_tree(&self) -> Result<HashSet<usize>, MaxMindDbError> {
671        let mut offsets = HashSet::new();
672        let opts = WithinOptions::default().include_networks_without_data();
673
674        // Maximum number of networks we can expect in a valid database.
675        // A database with N nodes can have at most 2N data entries (each leaf node
676        // can have data). We add some margin for safety.
677        let max_iterations = self.node_count.saturating_mul(3);
678        let mut iteration_count = 0usize;
679
680        for result in self.networks(opts)? {
681            let lookup = result?;
682            if let Some(offset) = lookup.offset() {
683                offsets.insert(offset);
684            }
685
686            iteration_count += 1;
687            if iteration_count > max_iterations {
688                return Err(MaxMindDbError::invalid_database(format!(
689                    "search tree appears to have a cycle or invalid structure (exceeded {max_iterations} iterations)"
690                )));
691            }
692        }
693        Ok(offsets)
694    }
695
696    fn verify_data_section_separator(&self) -> Result<(), MaxMindDbError> {
697        let separator_start = self.node_count * self.node_byte_size;
698        let separator_end = separator_start + DATA_SECTION_SEPARATOR_SIZE;
699
700        if separator_end > self.buf.as_ref().len() {
701            return Err(MaxMindDbError::invalid_database_at(
702                "data section separator extends past end of file",
703                separator_start,
704            ));
705        }
706
707        let separator = &self.buf.as_ref()[separator_start..separator_end];
708
709        for &b in separator {
710            if b != 0 {
711                return Err(MaxMindDbError::invalid_database_at(
712                    format!("unexpected byte in data separator: {separator:?}"),
713                    separator_start,
714                ));
715            }
716        }
717        Ok(())
718    }
719
720    fn verify_data_section(
721        &self,
722        offsets: HashSet<usize>,
723        data_section_end: usize,
724    ) -> Result<(), MaxMindDbError> {
725        let data_section = &self.buf.as_ref()[self.pointer_base..data_section_end];
726
727        // Verify each offset from the search tree points to valid, decodable data
728        for &offset in &offsets {
729            if offset >= data_section.len() {
730                return Err(MaxMindDbError::invalid_database_at(
731                    format!(
732                        "search tree pointer is beyond data section (len: {})",
733                        data_section.len()
734                    ),
735                    offset,
736                ));
737            }
738
739            let mut dec = decoder::Decoder::new(data_section, offset);
740
741            // Try to skip/decode the value to verify it's valid
742            if let Err(e) = dec.skip_value_for_verification() {
743                return Err(MaxMindDbError::invalid_database_at(
744                    format!("decoding error: {e}"),
745                    offset,
746                ));
747            }
748        }
749
750        Ok(())
751    }
752}
753
754fn validate_record_size(record_size: u16) -> Result<(), MaxMindDbError> {
755    if matches!(record_size, 24 | 28 | 32) {
756        Ok(())
757    } else {
758        Err(MaxMindDbError::invalid_database(format!(
759            "record_size - Expected: 24, 28, or 32 Actual: {}",
760            record_size
761        )))
762    }
763}
764
765pub(crate) fn validate_metadata_for_reader(metadata: &Metadata) -> Result<(), MaxMindDbError> {
766    if metadata.binary_format_major_version != 2 {
767        return Err(MaxMindDbError::invalid_database(format!(
768            "binary_format_major_version - Expected: 2 Actual: {}",
769            metadata.binary_format_major_version
770        )));
771    }
772    // Minor format versions are intended to be forward-compatible.
773    if metadata.ip_version != 4 && metadata.ip_version != 6 {
774        return Err(MaxMindDbError::invalid_database(format!(
775            "ip_version - Expected: 4 or 6 Actual: {}",
776            metadata.ip_version
777        )));
778    }
779    if metadata.node_count == 0 {
780        return Err(MaxMindDbError::invalid_database(
781            "node_count - Expected: positive integer Actual: 0",
782        ));
783    }
784    metadata.build_time()?;
785    validate_record_size(metadata.record_size)
786}
787
788fn search_tree_size_bytes(node_count: usize, record_size: usize) -> Result<usize, MaxMindDbError> {
789    node_count
790        .checked_mul(record_size)
791        .map(|size| size / 4)
792        .ok_or_else(|| {
793            MaxMindDbError::invalid_database(
794                "search tree size calculation overflowed or is impossibly large",
795            )
796        })
797}
798
799fn validate_search_tree_layout(
800    pointer_base: usize,
801    data_section_end: usize,
802) -> Result<(), MaxMindDbError> {
803    if pointer_base > data_section_end {
804        return Err(MaxMindDbError::invalid_database(
805            "the MaxMind DB file's search tree extends beyond the metadata section",
806        ));
807    }
808    Ok(())
809}
810
811trait SearchTreeRecord {
812    fn read_node(buf: &[u8], node_number: usize, index: usize) -> usize;
813}
814
815struct RecordSize24;
816
817impl SearchTreeRecord for RecordSize24 {
818    #[inline(always)]
819    fn read_node(buf: &[u8], node_number: usize, index: usize) -> usize {
820        let offset = node_number * 6 + index * 3;
821        (buf[offset] as usize) << 16 | (buf[offset + 1] as usize) << 8 | buf[offset + 2] as usize
822    }
823}
824
825struct RecordSize28;
826
827impl SearchTreeRecord for RecordSize28 {
828    #[inline(always)]
829    fn read_node(buf: &[u8], node_number: usize, index: usize) -> usize {
830        let base_offset = node_number * 7;
831        let middle = if index == 0 {
832            (buf[base_offset + 3] & 0xF0) >> 4
833        } else {
834            buf[base_offset + 3] & 0x0F
835        };
836        let offset = base_offset + index * 4;
837        (middle as usize) << 24
838            | (buf[offset] as usize) << 16
839            | (buf[offset + 1] as usize) << 8
840            | buf[offset + 2] as usize
841    }
842}
843
844struct RecordSize32;
845
846impl SearchTreeRecord for RecordSize32 {
847    #[inline(always)]
848    fn read_node(buf: &[u8], node_number: usize, index: usize) -> usize {
849        let offset = node_number * 8 + index * 4;
850        (buf[offset] as usize) << 24
851            | (buf[offset + 1] as usize) << 16
852            | (buf[offset + 2] as usize) << 8
853            | buf[offset + 3] as usize
854    }
855}
856
857#[inline(always)]
858fn find_address_in_tree_v4<R: SearchTreeRecord>(
859    buf: &[u8],
860    start_node: usize,
861    node_count: usize,
862    ip: u32,
863) -> (usize, usize) {
864    let mut node = start_node;
865    let mut prefix_len = 32;
866
867    for i in 0..32 {
868        if node >= node_count {
869            prefix_len = i;
870            break;
871        }
872        let bit = ((ip >> (31 - i)) & 1) as usize;
873        node = R::read_node(buf, node, bit);
874    }
875
876    normalize_lookup_result(node, node_count, prefix_len)
877}
878
879#[inline(always)]
880fn find_address_in_tree_v6<R: SearchTreeRecord>(
881    buf: &[u8],
882    node_count: usize,
883    ip: u128,
884) -> (usize, usize) {
885    let mut node = 0;
886    let mut prefix_len = 128;
887
888    for i in 0..128 {
889        if node >= node_count {
890            prefix_len = i;
891            break;
892        }
893        let bit = ((ip >> (127 - i)) & 1) as usize;
894        node = R::read_node(buf, node, bit);
895    }
896
897    normalize_lookup_result(node, node_count, prefix_len)
898}
899
900// Map both "not found" outcomes onto pointer 0:
901//   - `node == node_count`: the placeholder empty terminal in the search tree.
902//   - `node < node_count`: bits exhausted while still on an internal node
903//     (a partially-specified address that did not reach a record).
904// Anything strictly greater than `node_count` is a data-section pointer that
905// the caller must resolve via `resolve_data_pointer`.
906#[inline(always)]
907fn normalize_lookup_result(node: usize, node_count: usize, prefix_len: usize) -> (usize, usize) {
908    if node <= node_count {
909        (0, prefix_len)
910    } else {
911        (node, prefix_len)
912    }
913}
914
915fn find_metadata_start(buf: &[u8]) -> Result<usize, MaxMindDbError> {
916    memchr::memmem::rfind(buf, METADATA_START_MARKER)
917        .map(|x| x + METADATA_START_MARKER.len())
918        .ok_or_else(|| {
919            MaxMindDbError::invalid_database("could not find MaxMind DB metadata in file")
920        })
921}
922
923fn metadata_marker_start(metadata_start: usize) -> Result<usize, MaxMindDbError> {
924    metadata_start
925        .checked_sub(METADATA_START_MARKER.len())
926        .ok_or_else(|| MaxMindDbError::invalid_database("invalid metadata marker location"))
927}