Skip to main content

maxminddb/
reader.rs

1//! MaxMind DB reader implementation.
2
3use std::collections::HashSet;
4use std::fs;
5use std::net::IpAddr;
6use std::path::Path;
7
8use ipnetwork::IpNetwork;
9use serde::Deserialize;
10
11#[cfg(feature = "mmap")]
12pub use memmap2::Mmap;
13#[cfg(feature = "mmap")]
14use memmap2::MmapOptions;
15#[cfg(feature = "mmap")]
16use std::fs::File;
17
18use crate::decoder;
19use crate::error::MaxMindDbError;
20use crate::metadata::Metadata;
21use crate::result::{LookupResult, LookupSource, NetworkKind};
22use crate::within::{IpInt, Within, WithinNode, WithinOptions};
23
24/// Size of the data section separator (16 zero bytes).
25const DATA_SECTION_SEPARATOR_SIZE: usize = 16;
26
27/// A reader for the MaxMind DB format. The lifetime `'data` is tied to the
28/// lifetime of the underlying buffer holding the contents of the database file.
29///
30/// The `Reader` supports both file-based and memory-mapped access to MaxMind
31/// DB files, including GeoIP2 and GeoLite2 databases.
32///
33/// # Features
34///
35/// - **`mmap`**: Enable memory-mapped file access for better performance
36/// - **`simdutf8`**: Use SIMD-accelerated UTF-8 validation (faster string
37///   decoding)
38/// - **`unsafe-str-decode`**: Skip UTF-8 validation entirely (unsafe, but
39///   ~20% faster)
40pub struct Reader<S: AsRef<[u8]>> {
41    pub(crate) buf: S,
42    /// Database metadata.
43    pub metadata: Metadata,
44    record_size: u16,
45    /// Cached `Metadata::node_count` for `Reader` search-tree traversal.
46    /// Use this instead of `metadata.node_count`, which is publicly mutable.
47    node_count: usize,
48    /// Cached bytes per node derived from `Metadata::record_size` for `Reader`.
49    /// Use this instead of `metadata.record_size` in lookup hot paths.
50    node_byte_size: usize,
51    pub(crate) ipv4_start: usize,
52    /// Bit depth at which ipv4_start was found (0-96). Used to calculate
53    /// correct prefix lengths for IPv4 lookups in IPv6 databases.
54    pub(crate) ipv4_start_bit_depth: usize,
55    pub(crate) pointer_base: usize,
56}
57
58impl<S: AsRef<[u8]>> std::fmt::Debug for Reader<S> {
59    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
60        f.debug_struct("Reader")
61            .field("buf_len", &self.buf.as_ref().len())
62            .field("metadata", &self.metadata)
63            .field("ipv4_start", &self.ipv4_start)
64            .field("ipv4_start_bit_depth", &self.ipv4_start_bit_depth)
65            .field("pointer_base", &self.pointer_base)
66            .finish_non_exhaustive()
67    }
68}
69
70#[cfg(feature = "mmap")]
71impl Reader<Mmap> {
72    /// Open a MaxMind DB database file by memory mapping it.
73    ///
74    /// # Safety
75    ///
76    /// The caller must ensure that the database file is not modified or
77    /// truncated while the `Reader` exists. Modifying or truncating the
78    /// file while it is memory-mapped will result in undefined behavior.
79    ///
80    /// # Example
81    ///
82    /// ```
83    /// # #[cfg(feature = "mmap")]
84    /// # {
85    /// // SAFETY: The database file will not be modified while the reader exists.
86    /// let reader = unsafe {
87    ///     maxminddb::Reader::open_mmap("test-data/test-data/GeoIP2-City-Test.mmdb")
88    /// }.unwrap();
89    /// # }
90    /// ```
91    pub unsafe fn open_mmap<P: AsRef<Path>>(database: P) -> Result<Reader<Mmap>, MaxMindDbError> {
92        let file_read = File::open(database)?;
93        let mmap = MmapOptions::new()
94            .map(&file_read)
95            .map_err(MaxMindDbError::Mmap)?;
96        Reader::from_source(mmap)
97    }
98}
99
100impl Reader<Vec<u8>> {
101    /// Open a MaxMind DB database file by loading it into memory.
102    ///
103    /// # Example
104    ///
105    /// ```
106    /// let reader = maxminddb::Reader::open_readfile(
107    ///     "test-data/test-data/GeoIP2-City-Test.mmdb").unwrap();
108    /// ```
109    pub fn open_readfile<P: AsRef<Path>>(database: P) -> Result<Reader<Vec<u8>>, MaxMindDbError> {
110        let buf: Vec<u8> = fs::read(&database)?; // IO error converted via #[from]
111        Reader::from_source(buf)
112    }
113}
114
115impl<'de, S: AsRef<[u8]>> Reader<S> {
116    /// Open a MaxMind DB database from anything that implements AsRef<[u8]>
117    ///
118    /// # Example
119    ///
120    /// ```
121    /// use std::fs;
122    /// let buf = fs::read("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap();
123    /// let reader = maxminddb::Reader::from_source(buf).unwrap();
124    /// ```
125    pub fn from_source(buf: S) -> Result<Reader<S>, MaxMindDbError> {
126        let metadata_start = find_metadata_start(buf.as_ref())?;
127        let mut type_decoder = decoder::Decoder::new(&buf.as_ref()[metadata_start..], 0);
128        let metadata = Metadata::deserialize(&mut type_decoder)?;
129        validate_record_size(metadata.record_size)?;
130
131        let search_tree_size =
132            search_tree_size_bytes(metadata.node_count as usize, metadata.record_size as usize)?;
133        let record_size = metadata.record_size;
134        let node_count = metadata.node_count as usize;
135        let node_byte_size = record_size as usize / 4;
136        let pointer_base = search_tree_size
137            .checked_add(DATA_SECTION_SEPARATOR_SIZE)
138            .ok_or_else(|| {
139                MaxMindDbError::invalid_database(
140                    "the MaxMind DB file's search tree extends beyond the file",
141                )
142            })?;
143        validate_search_tree_layout(pointer_base, metadata_start)?;
144
145        let mut reader = Reader {
146            buf,
147            record_size,
148            node_count,
149            node_byte_size,
150            pointer_base,
151            metadata,
152            ipv4_start: 0,
153            ipv4_start_bit_depth: 0,
154        };
155        let (ipv4_start, ipv4_start_bit_depth) = reader.find_ipv4_start();
156        reader.ipv4_start = ipv4_start;
157        reader.ipv4_start_bit_depth = ipv4_start_bit_depth;
158
159        Ok(reader)
160    }
161
162    /// Lookup an IP address in the database.
163    ///
164    /// Returns a [`LookupResult`] that can be used to:
165    /// - Check if data exists with [`has_data()`](LookupResult::has_data)
166    /// - Get the network containing the IP with [`network()`](LookupResult::network)
167    /// - Decode the full record with [`decode()`](LookupResult::decode)
168    /// - Decode a specific path with [`decode_path()`](LookupResult::decode_path)
169    /// - Get a low-level decoder with [`decoder()`](LookupResult::decoder)
170    ///
171    /// # Examples
172    ///
173    /// Basic city lookup:
174    /// ```
175    /// # use maxminddb::geoip2;
176    /// # use std::net::IpAddr;
177    /// # fn main() -> Result<(), maxminddb::MaxMindDbError> {
178    /// let reader = maxminddb::Reader::open_readfile(
179    ///     "test-data/test-data/GeoIP2-City-Test.mmdb")?;
180    ///
181    /// let ip: IpAddr = "89.160.20.128".parse().unwrap();
182    /// let result = reader.lookup(ip)?;
183    ///
184    /// if let Some(city) = result.decode::<geoip2::City>()? {
185    ///     // Access nested structs directly - no Option unwrapping needed
186    ///     if let Some(name) = city.city.names.english {
187    ///         println!("City: {}", name);
188    ///     }
189    /// } else {
190    ///     println!("No data found for IP {}", ip);
191    /// }
192    /// # Ok(())
193    /// # }
194    /// ```
195    ///
196    /// Selective field access:
197    /// ```
198    /// # use maxminddb::{path, Reader};
199    /// # use std::net::IpAddr;
200    /// # fn main() -> Result<(), maxminddb::MaxMindDbError> {
201    /// let reader = Reader::open_readfile(
202    ///     "test-data/test-data/GeoIP2-City-Test.mmdb")?;
203    /// let ip: IpAddr = "89.160.20.128".parse().unwrap();
204    ///
205    /// let result = reader.lookup(ip)?;
206    /// let country_code: Option<String> = result.decode_path(&path!["country", "iso_code"])?;
207    ///
208    /// println!("Country: {:?}", country_code);
209    /// # Ok(())
210    /// # }
211    /// ```
212    pub fn lookup(&'de self, address: IpAddr) -> Result<LookupResult<'de, S>, MaxMindDbError> {
213        match address {
214            IpAddr::V4(v4) => {
215                let (pointer, prefix_len) = self.find_address_in_tree_v4(v4.into());
216
217                // For IPv4 addresses in IPv6 databases, adjust prefix_len to reflect
218                // the actual bit depth in the tree. The ipv4_start_bit_depth tells us
219                // how deep in the IPv6 tree we were when we found the IPv4 subtree.
220                let prefix_len = if self.metadata.ip_version == 6 {
221                    self.ipv4_start_bit_depth + prefix_len
222                } else {
223                    prefix_len
224                };
225
226                self.lookup_result(pointer, prefix_len as u8, address)
227            }
228            IpAddr::V6(v6) => {
229                if self.metadata.ip_version == 4 {
230                    return Err(MaxMindDbError::invalid_input(
231                        "cannot look up IPv6 address in IPv4-only database",
232                    ));
233                }
234
235                let (pointer, prefix_len) = self.find_address_in_tree_v6(v6.into());
236                self.lookup_result(pointer, prefix_len as u8, address)
237            }
238        }
239    }
240
241    /// Iterate over all networks in the database.
242    ///
243    /// This is a convenience method equivalent to calling [`within()`](Self::within)
244    /// with `0.0.0.0/0` for IPv4-only databases or `::/0` for IPv6 databases.
245    ///
246    /// # Arguments
247    ///
248    /// * `options` - Controls which networks are yielded. Use [`Default::default()`]
249    ///   for standard behavior.
250    ///
251    /// # Examples
252    ///
253    /// Iterate over all networks with default options:
254    /// ```
255    /// use maxminddb::{geoip2, Reader};
256    ///
257    /// let reader = Reader::open_readfile(
258    ///     "test-data/test-data/GeoIP2-City-Test.mmdb").unwrap();
259    ///
260    /// let mut count = 0;
261    /// for result in reader.networks(Default::default()).unwrap() {
262    ///     let lookup = result.unwrap();
263    ///     count += 1;
264    ///     if count >= 10 { break; }
265    /// }
266    /// ```
267    pub fn networks(&'de self, options: WithinOptions) -> Result<Within<'de, S>, MaxMindDbError> {
268        let cidr = if self.metadata.ip_version == 6 {
269            IpNetwork::V6("::/0".parse().unwrap())
270        } else {
271            IpNetwork::V4("0.0.0.0/0".parse().unwrap())
272        };
273        self.within(cidr, options)
274    }
275
276    /// Iterate over IP networks within a CIDR range.
277    ///
278    /// Returns an iterator that yields [`LookupResult`] for each network in the
279    /// database that falls within the specified CIDR range.
280    ///
281    /// # Arguments
282    ///
283    /// * `cidr` - The CIDR range to iterate over.
284    /// * `options` - Controls which networks are yielded. Use [`Default::default()`]
285    ///   for standard behavior (skip aliases, skip networks without data, include
286    ///   empty values).
287    ///
288    /// # Examples
289    ///
290    /// Iterate over all IPv4 networks:
291    /// ```
292    /// use ipnetwork::IpNetwork;
293    /// use maxminddb::{geoip2, Reader};
294    ///
295    /// let reader = Reader::open_readfile(
296    ///     "test-data/test-data/GeoIP2-City-Test.mmdb").unwrap();
297    ///
298    /// let ipv4_all = IpNetwork::V4("0.0.0.0/0".parse().unwrap());
299    /// let mut count = 0;
300    /// for result in reader.within(ipv4_all, Default::default()).unwrap() {
301    ///     let lookup = result.unwrap();
302    ///     let network = lookup.network().unwrap();
303    ///     let city: geoip2::City = lookup.decode().unwrap().unwrap();
304    ///     let city_name = city.city.names.english;
305    ///     println!("Network: {}, City: {:?}", network, city_name);
306    ///     count += 1;
307    ///     if count >= 10 { break; } // Limit output for example
308    /// }
309    /// ```
310    ///
311    /// Search within a specific subnet:
312    /// ```
313    /// use ipnetwork::IpNetwork;
314    /// use maxminddb::{geoip2, Reader};
315    ///
316    /// let reader = Reader::open_readfile(
317    ///     "test-data/test-data/GeoIP2-City-Test.mmdb").unwrap();
318    ///
319    /// let subnet = IpNetwork::V4("192.168.0.0/16".parse().unwrap());
320    /// for result in reader.within(subnet, Default::default()).unwrap() {
321    ///     match result {
322    ///         Ok(lookup) => {
323    ///             let network = lookup.network().unwrap();
324    ///             println!("Found: {}", network);
325    ///         }
326    ///         Err(e) => eprintln!("Error: {}", e),
327    ///     }
328    /// }
329    /// ```
330    ///
331    /// Include networks without data:
332    /// ```
333    /// use ipnetwork::IpNetwork;
334    /// use maxminddb::{Reader, WithinOptions};
335    ///
336    /// let reader = Reader::open_readfile(
337    ///     "test-data/test-data/MaxMind-DB-test-mixed-24.mmdb").unwrap();
338    ///
339    /// let opts = WithinOptions::default().include_networks_without_data();
340    /// for result in reader.within("1.0.0.0/8".parse().unwrap(), opts).unwrap() {
341    ///     let lookup = result.unwrap();
342    ///     if !lookup.has_data() {
343    ///         println!("Network {} has no data", lookup.network().unwrap());
344    ///     }
345    /// }
346    /// ```
347    pub fn within(
348        &'de self,
349        cidr: IpNetwork,
350        options: WithinOptions,
351    ) -> Result<Within<'de, S>, MaxMindDbError> {
352        if self.metadata.ip_version == 4 && matches!(cidr, IpNetwork::V6(_)) {
353            return Err(MaxMindDbError::invalid_input(
354                "cannot iterate IPv6 network in IPv4-only database",
355            ));
356        }
357        let ip_address = cidr.network();
358        let prefix_len = cidr.prefix() as usize;
359        let ip_int = IpInt::new(ip_address);
360        let bit_count = ip_int.bit_count();
361
362        let mut node = self.start_node(bit_count);
363        let node_count = self.node_count;
364
365        let mut stack: Vec<WithinNode> = Vec::with_capacity(bit_count - prefix_len);
366
367        // Traverse down the tree to the level that matches the cidr mark
368        let mut depth = 0_usize;
369        for i in 0..prefix_len {
370            let bit = ip_int.get_bit(i);
371            node = self.read_node(node, bit as usize);
372            depth = i + 1; // We've now traversed i+1 bits (bits 0 through i)
373
374            if node >= node_count {
375                // We've hit a data node or dead end before we exhausted our prefix.
376                // This means the requested CIDR is contained in a single record.
377                break;
378            }
379        }
380
381        // Always push the node - it could be:
382        // - A data node (> node_count): will be yielded as a single record
383        // - The empty node (== node_count): will be skipped unless include_networks_without_data
384        // - An internal node (< node_count): will be traversed to find all contained records
385        stack.push(WithinNode {
386            node,
387            ip_int,
388            prefix_len: depth,
389        });
390
391        let within = Within {
392            reader: self,
393            node_count,
394            stack,
395            options,
396        };
397
398        Ok(within)
399    }
400
401    // Pointer 0 means "not found" because normalize_lookup_result collapses both
402    // the placeholder empty node (`node == node_count`) and an unfinished internal
403    // terminal (`node < node_count`, i.e. bits exhausted while still on a tree
404    // node) into 0, so neither path reaches resolve_data_pointer with a non-data
405    // value.
406    #[inline(always)]
407    fn lookup_result(
408        &'de self,
409        pointer: usize,
410        prefix_len: u8,
411        address: IpAddr,
412    ) -> Result<LookupResult<'de, S>, MaxMindDbError> {
413        let network_kind = match address {
414            IpAddr::V4(_) if self.metadata.ip_version == 6 && self.has_ipv4_subtree() => {
415                NetworkKind::V4InV6Subtree
416            }
417            IpAddr::V4(_) if self.metadata.ip_version == 6 => NetworkKind::V6,
418            IpAddr::V4(_) => NetworkKind::V4,
419            IpAddr::V6(_) => NetworkKind::V6,
420        };
421        if pointer == 0 {
422            Ok(LookupResult::new_not_found(
423                self,
424                prefix_len,
425                address,
426                LookupSource::Lookup,
427                network_kind,
428            ))
429        } else {
430            let data_offset = self.resolve_data_pointer(pointer)?;
431            Ok(LookupResult::new_found(
432                self,
433                data_offset,
434                prefix_len,
435                address,
436                LookupSource::Lookup,
437                network_kind,
438            ))
439        }
440    }
441
442    #[inline(always)]
443    fn find_address_in_tree_v4(&self, ip: u32) -> (usize, usize) {
444        let buf = self.buf.as_ref();
445        let node_count = self.node_count;
446
447        match self.record_size {
448            24 => find_address_in_tree_v4::<RecordSize24>(buf, self.ipv4_start, node_count, ip),
449            28 => find_address_in_tree_v4::<RecordSize28>(buf, self.ipv4_start, node_count, ip),
450            32 => find_address_in_tree_v4::<RecordSize32>(buf, self.ipv4_start, node_count, ip),
451            _ => unreachable!("record_size is validated in Reader::from_source"),
452        }
453    }
454
455    #[inline(always)]
456    fn find_address_in_tree_v6(&self, ip: u128) -> (usize, usize) {
457        let buf = self.buf.as_ref();
458        let node_count = self.node_count;
459
460        match self.record_size {
461            24 => find_address_in_tree_v6::<RecordSize24>(buf, node_count, ip),
462            28 => find_address_in_tree_v6::<RecordSize28>(buf, node_count, ip),
463            32 => find_address_in_tree_v6::<RecordSize32>(buf, node_count, ip),
464            _ => unreachable!("record_size is validated in Reader::from_source"),
465        }
466    }
467
468    #[inline]
469    fn start_node(&self, length: usize) -> usize {
470        if length == 128 {
471            0
472        } else {
473            self.ipv4_start
474        }
475    }
476
477    #[inline]
478    pub(crate) fn has_ipv4_subtree(&self) -> bool {
479        self.metadata.ip_version == 6 && self.ipv4_start < self.node_count
480    }
481
482    /// Find the IPv4 start node and the bit depth at which it was found.
483    /// Returns (node, depth) where depth is how far into the tree we traversed.
484    fn find_ipv4_start(&self) -> (usize, usize) {
485        if self.metadata.ip_version != 6 {
486            return (0, 0);
487        }
488
489        // We are looking up an IPv4 address in an IPv6 tree. Skip over the
490        // first 96 nodes.
491        let mut node: usize = 0;
492        let mut depth: usize = 0;
493        for i in 0_u8..96 {
494            if node >= self.node_count {
495                depth = i as usize;
496                break;
497            }
498            node = self.read_node(node, 0);
499            depth = (i + 1) as usize;
500        }
501        (node, depth)
502    }
503
504    #[inline(always)]
505    pub(crate) fn read_node(&self, node_number: usize, index: usize) -> usize {
506        let buf = self.buf.as_ref();
507        let base_offset = node_number * self.node_byte_size;
508
509        match self.record_size {
510            24 => {
511                let offset = base_offset + index * 3;
512                (buf[offset] as usize) << 16
513                    | (buf[offset + 1] as usize) << 8
514                    | buf[offset + 2] as usize
515            }
516            28 => {
517                let middle = if index != 0 {
518                    buf[base_offset + 3] & 0x0F
519                } else {
520                    (buf[base_offset + 3] & 0xF0) >> 4
521                };
522                let offset = base_offset + index * 4;
523                (middle as usize) << 24
524                    | (buf[offset] as usize) << 16
525                    | (buf[offset + 1] as usize) << 8
526                    | buf[offset + 2] as usize
527            }
528            32 => {
529                let offset = base_offset + index * 4;
530                (buf[offset] as usize) << 24
531                    | (buf[offset + 1] as usize) << 16
532                    | (buf[offset + 2] as usize) << 8
533                    | buf[offset + 3] as usize
534            }
535            _ => unreachable!("record_size is validated in Reader::from_source"),
536        }
537    }
538
539    /// Resolves a pointer from the search tree to an offset in the data section.
540    #[inline]
541    pub(crate) fn resolve_data_pointer(&self, pointer: usize) -> Result<usize, MaxMindDbError> {
542        let resolved = pointer
543            .checked_sub(self.node_count)
544            .and_then(|p| p.checked_sub(DATA_SECTION_SEPARATOR_SIZE))
545            .ok_or_else(|| {
546                MaxMindDbError::invalid_database(
547                    "the MaxMind DB file's data pointer resolves to an invalid location",
548                )
549            })?;
550        let data_section_len = self
551            .buf
552            .as_ref()
553            .len()
554            .checked_sub(self.pointer_base)
555            .ok_or_else(|| {
556                MaxMindDbError::invalid_database(
557                    "the MaxMind DB file's data pointer resolves to an invalid location",
558                )
559            })?;
560
561        // Check bounds using pointer_base which marks the start of the data section
562        if resolved >= data_section_len {
563            return Err(MaxMindDbError::invalid_database(
564                "the MaxMind DB file's data pointer resolves to an invalid location",
565            ));
566        }
567
568        Ok(resolved)
569    }
570
571    /// Performs comprehensive validation of the MaxMind DB file.
572    ///
573    /// This method validates:
574    /// - Metadata section: format versions, required fields, and value constraints
575    /// - Search tree: traverses all networks to verify tree structure integrity
576    /// - Data section separator: validates the 16-byte separator between tree and data
577    /// - Data section: verifies all data records referenced by the search tree
578    ///
579    /// The verifier is stricter than the MaxMind DB specification and may return
580    /// errors on some databases that are still readable by normal operations.
581    /// This method is useful for:
582    /// - Validating database files after download or generation
583    /// - Debugging database corruption issues
584    /// - Ensuring database integrity in critical applications
585    ///
586    /// Note: Verification traverses the entire database and may be slow on large files.
587    /// The method is thread-safe and can be called on an active Reader.
588    ///
589    /// # Example
590    ///
591    /// ```
592    /// use maxminddb::Reader;
593    ///
594    /// let reader = Reader::open_readfile("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap();
595    /// reader.verify().expect("Database should be valid");
596    /// ```
597    pub fn verify(&self) -> Result<(), MaxMindDbError> {
598        let metadata_start = find_metadata_start(self.buf.as_ref())?;
599        self.verify_metadata(metadata_start)?;
600        self.verify_database(metadata_start)
601    }
602
603    fn verify_metadata(&self, metadata_start: usize) -> Result<(), MaxMindDbError> {
604        let m = &self.metadata;
605
606        if m.binary_format_major_version != 2 {
607            return Err(MaxMindDbError::invalid_database(format!(
608                "binary_format_major_version - Expected: 2 Actual: {}",
609                m.binary_format_major_version
610            )));
611        }
612        if m.binary_format_minor_version != 0 {
613            return Err(MaxMindDbError::invalid_database(format!(
614                "binary_format_minor_version - Expected: 0 Actual: {}",
615                m.binary_format_minor_version
616            )));
617        }
618        if m.database_type.is_empty() {
619            return Err(MaxMindDbError::invalid_database(
620                "database_type - Expected: non-empty string Actual: \"\"",
621            ));
622        }
623        if m.description.is_empty() {
624            return Err(MaxMindDbError::invalid_database(
625                "description - Expected: non-empty map Actual: {}",
626            ));
627        }
628        if m.ip_version != 4 && m.ip_version != 6 {
629            return Err(MaxMindDbError::invalid_database(format!(
630                "ip_version - Expected: 4 or 6 Actual: {}",
631                m.ip_version
632            )));
633        }
634        validate_record_size(m.record_size)?;
635        if m.node_count == 0 {
636            return Err(MaxMindDbError::invalid_database(
637                "node_count - Expected: positive integer Actual: 0",
638            ));
639        }
640        validate_search_tree_layout(self.pointer_base, metadata_start)?;
641        Ok(())
642    }
643
644    fn verify_database(&self, metadata_start: usize) -> Result<(), MaxMindDbError> {
645        let offsets = self.verify_search_tree()?;
646        self.verify_data_section_separator()?;
647        self.verify_data_section(offsets, metadata_start)
648    }
649
650    fn verify_search_tree(&self) -> Result<HashSet<usize>, MaxMindDbError> {
651        let mut offsets = HashSet::new();
652        let opts = WithinOptions::default().include_networks_without_data();
653
654        // Maximum number of networks we can expect in a valid database.
655        // A database with N nodes can have at most 2N data entries (each leaf node
656        // can have data). We add some margin for safety.
657        let max_iterations = self.node_count.saturating_mul(3);
658        let mut iteration_count = 0usize;
659
660        for result in self.networks(opts)? {
661            let lookup = result?;
662            if let Some(offset) = lookup.offset() {
663                offsets.insert(offset);
664            }
665
666            iteration_count += 1;
667            if iteration_count > max_iterations {
668                return Err(MaxMindDbError::invalid_database(format!(
669                    "search tree appears to have a cycle or invalid structure (exceeded {max_iterations} iterations)"
670                )));
671            }
672        }
673        Ok(offsets)
674    }
675
676    fn verify_data_section_separator(&self) -> Result<(), MaxMindDbError> {
677        let separator_start = self.node_count * self.node_byte_size;
678        let separator_end = separator_start + DATA_SECTION_SEPARATOR_SIZE;
679
680        if separator_end > self.buf.as_ref().len() {
681            return Err(MaxMindDbError::invalid_database_at(
682                "data section separator extends past end of file",
683                separator_start,
684            ));
685        }
686
687        let separator = &self.buf.as_ref()[separator_start..separator_end];
688
689        for &b in separator {
690            if b != 0 {
691                return Err(MaxMindDbError::invalid_database_at(
692                    format!("unexpected byte in data separator: {separator:?}"),
693                    separator_start,
694                ));
695            }
696        }
697        Ok(())
698    }
699
700    fn verify_data_section(
701        &self,
702        offsets: HashSet<usize>,
703        metadata_start: usize,
704    ) -> Result<(), MaxMindDbError> {
705        let data_section = &self.buf.as_ref()[self.pointer_base..metadata_start];
706
707        // Verify each offset from the search tree points to valid, decodable data
708        for &offset in &offsets {
709            if offset >= data_section.len() {
710                return Err(MaxMindDbError::invalid_database_at(
711                    format!(
712                        "search tree pointer is beyond data section (len: {})",
713                        data_section.len()
714                    ),
715                    offset,
716                ));
717            }
718
719            let mut dec = decoder::Decoder::new(data_section, offset);
720
721            // Try to skip/decode the value to verify it's valid
722            if let Err(e) = dec.skip_value_for_verification() {
723                return Err(MaxMindDbError::invalid_database_at(
724                    format!("decoding error: {e}"),
725                    offset,
726                ));
727            }
728        }
729
730        Ok(())
731    }
732}
733
734fn validate_record_size(record_size: u16) -> Result<(), MaxMindDbError> {
735    if matches!(record_size, 24 | 28 | 32) {
736        Ok(())
737    } else {
738        Err(MaxMindDbError::invalid_database(format!(
739            "record_size - Expected: 24, 28, or 32 Actual: {}",
740            record_size
741        )))
742    }
743}
744
745fn search_tree_size_bytes(node_count: usize, record_size: usize) -> Result<usize, MaxMindDbError> {
746    node_count
747        .checked_mul(record_size)
748        .map(|size| size / 4)
749        .ok_or_else(|| {
750            MaxMindDbError::invalid_database(
751                "search tree size calculation overflowed or is impossibly large",
752            )
753        })
754}
755
756fn validate_search_tree_layout(
757    pointer_base: usize,
758    metadata_start: usize,
759) -> Result<(), MaxMindDbError> {
760    if pointer_base > metadata_start {
761        return Err(MaxMindDbError::invalid_database(
762            "the MaxMind DB file's search tree extends beyond the metadata section",
763        ));
764    }
765    Ok(())
766}
767
768trait SearchTreeRecord {
769    fn read_node(buf: &[u8], node_number: usize, index: usize) -> usize;
770}
771
772struct RecordSize24;
773
774impl SearchTreeRecord for RecordSize24 {
775    #[inline(always)]
776    fn read_node(buf: &[u8], node_number: usize, index: usize) -> usize {
777        let offset = node_number * 6 + index * 3;
778        (buf[offset] as usize) << 16 | (buf[offset + 1] as usize) << 8 | buf[offset + 2] as usize
779    }
780}
781
782struct RecordSize28;
783
784impl SearchTreeRecord for RecordSize28 {
785    #[inline(always)]
786    fn read_node(buf: &[u8], node_number: usize, index: usize) -> usize {
787        let base_offset = node_number * 7;
788        let middle = if index == 0 {
789            (buf[base_offset + 3] & 0xF0) >> 4
790        } else {
791            buf[base_offset + 3] & 0x0F
792        };
793        let offset = base_offset + index * 4;
794        (middle as usize) << 24
795            | (buf[offset] as usize) << 16
796            | (buf[offset + 1] as usize) << 8
797            | buf[offset + 2] as usize
798    }
799}
800
801struct RecordSize32;
802
803impl SearchTreeRecord for RecordSize32 {
804    #[inline(always)]
805    fn read_node(buf: &[u8], node_number: usize, index: usize) -> usize {
806        let offset = node_number * 8 + index * 4;
807        (buf[offset] as usize) << 24
808            | (buf[offset + 1] as usize) << 16
809            | (buf[offset + 2] as usize) << 8
810            | buf[offset + 3] as usize
811    }
812}
813
814#[inline(always)]
815fn find_address_in_tree_v4<R: SearchTreeRecord>(
816    buf: &[u8],
817    start_node: usize,
818    node_count: usize,
819    ip: u32,
820) -> (usize, usize) {
821    let mut node = start_node;
822    let mut prefix_len = 32;
823
824    for i in 0..32 {
825        if node >= node_count {
826            prefix_len = i;
827            break;
828        }
829        let bit = ((ip >> (31 - i)) & 1) as usize;
830        node = R::read_node(buf, node, bit);
831    }
832
833    normalize_lookup_result(node, node_count, prefix_len)
834}
835
836#[inline(always)]
837fn find_address_in_tree_v6<R: SearchTreeRecord>(
838    buf: &[u8],
839    node_count: usize,
840    ip: u128,
841) -> (usize, usize) {
842    let mut node = 0;
843    let mut prefix_len = 128;
844
845    for i in 0..128 {
846        if node >= node_count {
847            prefix_len = i;
848            break;
849        }
850        let bit = ((ip >> (127 - i)) & 1) as usize;
851        node = R::read_node(buf, node, bit);
852    }
853
854    normalize_lookup_result(node, node_count, prefix_len)
855}
856
857// Map both "not found" outcomes onto pointer 0:
858//   - `node == node_count`: the placeholder empty terminal in the search tree.
859//   - `node < node_count`: bits exhausted while still on an internal node
860//     (a partially-specified address that did not reach a record).
861// Anything strictly greater than `node_count` is a data-section pointer that
862// the caller must resolve via `resolve_data_pointer`.
863#[inline(always)]
864fn normalize_lookup_result(node: usize, node_count: usize, prefix_len: usize) -> (usize, usize) {
865    if node <= node_count {
866        (0, prefix_len)
867    } else {
868        (node, prefix_len)
869    }
870}
871
872fn find_metadata_start(buf: &[u8]) -> Result<usize, MaxMindDbError> {
873    const METADATA_START_MARKER: &[u8] = b"\xab\xcd\xefMaxMind.com";
874
875    memchr::memmem::rfind(buf, METADATA_START_MARKER)
876        .map(|x| x + METADATA_START_MARKER.len())
877        .ok_or_else(|| {
878            MaxMindDbError::invalid_database("could not find MaxMind DB metadata in file")
879        })
880}