diff --git a/dissect/database/chromium/__init__.py b/dissect/database/chromium/__init__.py new file mode 100644 index 0000000..fdaf54a --- /dev/null +++ b/dissect/database/chromium/__init__.py @@ -0,0 +1,8 @@ +from __future__ import annotations + +from dissect.database.chromium.cache import DiskCache, SimpleDiskCache + +__all__ = [ + "DiskCache", + "SimpleDiskCache", +] diff --git a/dissect/database/chromium/cache/__init__.py b/dissect/database/chromium/cache/__init__.py new file mode 100644 index 0000000..5fbbd3c --- /dev/null +++ b/dissect/database/chromium/cache/__init__.py @@ -0,0 +1,13 @@ +from __future__ import annotations + +from dissect.database.chromium.cache.c_cache import c_cache +from dissect.database.chromium.cache.c_simple import c_simple +from dissect.database.chromium.cache.cache import DiskCache +from dissect.database.chromium.cache.simple import SimpleDiskCache + +__all__ = [ + "DiskCache", + "SimpleDiskCache", + "c_cache", + "c_simple", +] diff --git a/dissect/database/chromium/cache/c_cache.py b/dissect/database/chromium/cache/c_cache.py new file mode 100644 index 0000000..6d22157 --- /dev/null +++ b/dissect/database/chromium/cache/c_cache.py @@ -0,0 +1,154 @@ +from __future__ import annotations + +from dissect.cstruct import cstruct + +# References: +# - https://chromium.googlesource.com/chromium/src/+/HEAD/net/disk_cache/blockfile/addr.h +# - https://chromium.googlesource.com/chromium/src/+/HEAD/net/disk_cache/blockfile/disk_format_base.h +# - https://chromium.googlesource.com/chromium/src/+/HEAD/net/disk_cache/blockfile/disk_format.h +cache_def = """ + +/* Cache Address format. */ + +enum FileType { + EXTERNAL = 0, + RANKINGS = 1, + BLOCK_256 = 2, + BLOCK_1K = 3, + BLOCK_4K = 4, + BLOCK_FILES = 5, + BLOCK_ENTRIES = 6, + BLOCK_EVICTED = 7 +}; + +// int kMaxBlockSize = 4096 * 4; +// int16_t kMaxBlockFile = 255; +// int kMaxNumBlocks = 4; +// int16_t kFirstAdditionalBlockFile = 4; + +#define kInitializedMask 0x80000000 +#define kFileTypeMask 0x70000000 +#define kFileTypeOffset 28 +#define kReservedBitsMask 0x0c000000 +#define kNumBlocksMask 0x03000000 +#define kNumBlocksOffset 24 +#define kFileSelectorMask 0x00ff0000 +#define kFileSelectorOffset 16 +#define kStartBlockMask 0x0000FFFF +#define kFileNameMask 0x0FFFFFFF + +/* Cache types. */ + +/* Index file format. */ +typedef uint32_t CacheAddr; + +struct LruData { + int32 padding_1[2]; + int32 filled; // Flag to tell when we filled the cache. + int32 sizes[5]; + CacheAddr heads[5]; + CacheAddr tails[5]; + CacheAddr transaction; // In-flight operation target. + int32 operation; // Actual in-flight operation. + int32 operation_list; // In-flight operation list. + int32 padding_2[7]; +}; + +struct IndexHeader { + uint32 magic; // 0xc3ca03c1 + uint32 version; + int32 num_entries; + int32 num_bytes_legacy; + int32 last_file; // f_###### + int32 dirty_flag; + CacheAddr stats; + int32 table_len; + int32 crash_flag; + int32 experiment_flag; + uint64 create_time; + int64 num_bytes; + int32 corruption_flag; + int32 padding[49]; + LruData lru_data; + // CacheAddr table[table_len]; // max is kIndexTablesize (0x10000) +}; + +/* Data Block File Format. */ +#define kBlockHeaderSize 8192 + +struct BlockFileHeader { + uint32 magic; // 0xc3ca04c1 + uint32 version; + int16 this_file; // Index of this file (data_#). + int16 next_file; // Next file when this one is full (data_#). + int32 entry_size; // Size of the blocks of this file. + int32 num_entries; // Number of stored entries. + int32 max_entries; // Current maximum number of entries. + int32 empty[4]; + int32 hints[4]; + int32 updating; + int32 user[5]; + // char allocation_map[4 * 2028]; + // total header should be exactly kBlockHeaderSize bytes long (8192). +}; + +/* Cache Entry Format. */ + +enum EntryState { + ENTRY_NORMAL = 0, + ENTRY_EVICTED, // The entry was recently evicted from the cache. + ENTRY_DOOMED // The entry was doomed. +}; + +enum EntryFlags { + PARENT_ENTRY = 1, // This entry has children (sparse) entries. + CHILD_ENTRY = 1 << 1 // Child entry that stores sparse data. +}; + +struct EntryStore { + uint32 hash; // Full hash of the key. + CacheAddr next; // Next entry with the same hash or bucket. + CacheAddr rankings_node; // Rankings node for this entry. + int32 reuse_count; // How often is this entry used. + int32 refetch_count; // How often is this fetched from the net. + int32 state; // Current state. + uint64 creation_time; + int32 key_len; + CacheAddr long_key; // Optional address of a long key. + + int32 data_size[4]; // We can store up to 4 data streams for + CacheAddr data_addr[4]; // each entry. + + uint32 flags; // Any combination of EntryFlags. + int32 padding[4]; + uint32 self_hash; // The hash of EntryStore up to this point. + char key[256 - 24 * 4]; // null terminated +}; +""" + +c_cache = cstruct(endian="<").load(cache_def) + + +def BlockSizeForFileType(file_type: int) -> int: + if file_type == 1: # RANKINGS + return 36 + + if file_type == 2: # BLOCK_256 + return 256 + + if file_type == 3: # BLOCK_1K + return 1024 + + if file_type == 4: # BLOCK_4K + return 4096 + + if file_type == 5: # BLOCK_FILES + return 8 + + if file_type == 6: # BLOCK_ENTRIES + return 104 + + if file_type == 7: # BLOCK_EVICETED + return 48 + + raise ValueError(f"Unknown file_type {file_type!r}") diff --git a/dissect/database/chromium/cache/c_cache.pyi b/dissect/database/chromium/cache/c_cache.pyi new file mode 100644 index 0000000..1c69fbd --- /dev/null +++ b/dissect/database/chromium/cache/c_cache.pyi @@ -0,0 +1,170 @@ +# Generated by cstruct-stubgen +from typing import BinaryIO, Literal, TypeAlias, overload + +import dissect.cstruct as __cs__ + +class _c_cache(__cs__.cstruct): + kInitializedMask: Literal[2147483648] = ... + kFileTypeMask: Literal[1879048192] = ... + kFileTypeOffset: Literal[28] = ... + kReservedBitsMask: Literal[201326592] = ... + kNumBlocksMask: Literal[50331648] = ... + kNumBlocksOffset: Literal[24] = ... + kFileSelectorMask: Literal[16711680] = ... + kFileSelectorOffset: Literal[16] = ... + kStartBlockMask: Literal[65535] = ... + kFileNameMask: Literal[268435455] = ... + class FileType(__cs__.Enum): + EXTERNAL = ... + RANKINGS = ... + BLOCK_256 = ... + BLOCK_1K = ... + BLOCK_4K = ... + BLOCK_FILES = ... + BLOCK_ENTRIES = ... + BLOCK_EVICTED = ... + + CacheAddr: TypeAlias = _c_cache.uint32 + class LruData(__cs__.Structure): + padding_1: __cs__.Array[_c_cache.int32] + filled: _c_cache.int32 + sizes: __cs__.Array[_c_cache.int32] + heads: __cs__.Array[_c_cache.uint32] + tails: __cs__.Array[_c_cache.uint32] + transaction: _c_cache.uint32 + operation: _c_cache.int32 + operation_list: _c_cache.int32 + padding_2: __cs__.Array[_c_cache.int32] + @overload + def __init__( + self, + padding_1: __cs__.Array[_c_cache.int32] | None = ..., + filled: _c_cache.int32 | None = ..., + sizes: __cs__.Array[_c_cache.int32] | None = ..., + heads: __cs__.Array[_c_cache.uint32] | None = ..., + tails: __cs__.Array[_c_cache.uint32] | None = ..., + transaction: _c_cache.uint32 | None = ..., + operation: _c_cache.int32 | None = ..., + operation_list: _c_cache.int32 | None = ..., + padding_2: __cs__.Array[_c_cache.int32] | None = ..., + ): ... + @overload + def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... + + class IndexHeader(__cs__.Structure): + magic: _c_cache.uint32 + version: _c_cache.uint32 + num_entries: _c_cache.int32 + num_bytes_legacy: _c_cache.int32 + last_file: _c_cache.int32 + dirty_flag: _c_cache.int32 + stats: _c_cache.uint32 + table_len: _c_cache.int32 + crash_flag: _c_cache.int32 + experiment_flag: _c_cache.int32 + create_time: _c_cache.uint64 + num_bytes: _c_cache.int64 + corruption_flag: _c_cache.int32 + padding: __cs__.Array[_c_cache.int32] + lru_data: _c_cache.LruData + @overload + def __init__( + self, + magic: _c_cache.uint32 | None = ..., + version: _c_cache.uint32 | None = ..., + num_entries: _c_cache.int32 | None = ..., + num_bytes_legacy: _c_cache.int32 | None = ..., + last_file: _c_cache.int32 | None = ..., + dirty_flag: _c_cache.int32 | None = ..., + stats: _c_cache.uint32 | None = ..., + table_len: _c_cache.int32 | None = ..., + crash_flag: _c_cache.int32 | None = ..., + experiment_flag: _c_cache.int32 | None = ..., + create_time: _c_cache.uint64 | None = ..., + num_bytes: _c_cache.int64 | None = ..., + corruption_flag: _c_cache.int32 | None = ..., + padding: __cs__.Array[_c_cache.int32] | None = ..., + lru_data: _c_cache.LruData | None = ..., + ): ... + @overload + def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... + + class BlockFileHeader(__cs__.Structure): + magic: _c_cache.uint32 + version: _c_cache.uint32 + this_file: _c_cache.int16 + next_file: _c_cache.int16 + entry_size: _c_cache.int32 + num_entries: _c_cache.int32 + max_entries: _c_cache.int32 + empty: __cs__.Array[_c_cache.int32] + hints: __cs__.Array[_c_cache.int32] + updating: _c_cache.int32 + user: __cs__.Array[_c_cache.int32] + @overload + def __init__( + self, + magic: _c_cache.uint32 | None = ..., + version: _c_cache.uint32 | None = ..., + this_file: _c_cache.int16 | None = ..., + next_file: _c_cache.int16 | None = ..., + entry_size: _c_cache.int32 | None = ..., + num_entries: _c_cache.int32 | None = ..., + max_entries: _c_cache.int32 | None = ..., + empty: __cs__.Array[_c_cache.int32] | None = ..., + hints: __cs__.Array[_c_cache.int32] | None = ..., + updating: _c_cache.int32 | None = ..., + user: __cs__.Array[_c_cache.int32] | None = ..., + ): ... + @overload + def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... + + class EntryState(__cs__.Enum): + ENTRY_NORMAL = ... + ENTRY_EVICTED = ... + ENTRY_DOOMED = ... + + class EntryFlags(__cs__.Enum): + PARENT_ENTRY = ... + CHILD_ENTRY = ... + + class EntryStore(__cs__.Structure): + hash: _c_cache.uint32 + next: _c_cache.uint32 + rankings_node: _c_cache.uint32 + reuse_count: _c_cache.int32 + refetch_count: _c_cache.int32 + state: _c_cache.int32 + creation_time: _c_cache.uint64 + key_len: _c_cache.int32 + long_key: _c_cache.uint32 + data_size: __cs__.Array[_c_cache.int32] + data_addr: __cs__.Array[_c_cache.uint32] + flags: _c_cache.uint32 + padding: __cs__.Array[_c_cache.int32] + self_hash: _c_cache.uint32 + key: __cs__.CharArray + @overload + def __init__( + self, + hash: _c_cache.uint32 | None = ..., + next: _c_cache.uint32 | None = ..., + rankings_node: _c_cache.uint32 | None = ..., + reuse_count: _c_cache.int32 | None = ..., + refetch_count: _c_cache.int32 | None = ..., + state: _c_cache.int32 | None = ..., + creation_time: _c_cache.uint64 | None = ..., + key_len: _c_cache.int32 | None = ..., + long_key: _c_cache.uint32 | None = ..., + data_size: __cs__.Array[_c_cache.int32] | None = ..., + data_addr: __cs__.Array[_c_cache.uint32] | None = ..., + flags: _c_cache.uint32 | None = ..., + padding: __cs__.Array[_c_cache.int32] | None = ..., + self_hash: _c_cache.uint32 | None = ..., + key: __cs__.CharArray | None = ..., + ): ... + @overload + def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... + +# Technically `c_cache` is an instance of `_c_cache`, but then we can't use it in type hints +c_cache: TypeAlias = _c_cache diff --git a/dissect/database/chromium/cache/c_simple.py b/dissect/database/chromium/cache/c_simple.py new file mode 100644 index 0000000..6b91022 --- /dev/null +++ b/dissect/database/chromium/cache/c_simple.py @@ -0,0 +1,87 @@ +from __future__ import annotations + +from dissect.cstruct import cstruct + +# References: +# - https://chromium.googlesource.com/chromium/src/+/HEAD/net/disk_cache/simple/simple_index_file.h +# - https://chromium.googlesource.com/chromium/src/+/HEAD/net/disk_cache/simple/simple_entry_format.h +simple_def = """ +/* Simple Indexes */ + +#define kSimpleIndexMagicNumber 0x656e74657220796f + +struct FakeIndexHeader { + uint64 magic; // kSimpleIndexMagicNumber + uint32 version; + int32 padding[2]; +}; + +struct IndexTableEntry { + uint64 hash; + int64 last_used; + int64 size; +}; + +struct RealIndexHeader { + uint32 size; + uint32 crc32; + uint64 magic; // kSimpleIndexMagicNumber + uint32 version; + int64 num_entries; + int64 cache_size; + int32 unknown; + IndexTableEntry entries[num_entries]; +}; + +/* Simple File Headers. */ + +#define kSimpleInitialMagicNumber 0xfcfb6d1ba7725c30 +#define kSimpleFinalMagicNumber 0xf4fa6f45970d41d8 + +struct SimpleFileHeader { + uint64 magic; // kSimpleInitialMagicNumber + uint32 version; + uint32 key_length; + uint32 key_hash; // md5 + uint32 unused_padding; + char key[key_length]; + + // followed by SimpleFileStream_* +}; + +#define kSimpleEOFSize 24 + +struct SimpleFileEOF { + uint64 magic; // kSimpleFinalMagicNumber + uint32 flags; // hash type: 0 = ?, 1 = crc32, 2 = sha256, 3 = 1 + 2 + uint32 crc32; + uint32 stream_size; // only used in the EOF record for stream 0. +}; + +struct SimpleFileStream_0_1 { + // preceded by SimpleFileHeader + // char data_stream_1[]; + // SimpleFileEOF + // char data_stream_0[]; + // SHA256 if flags = 2 or 3 + // SimpleFileEOF +}; + +struct SimpleFileStream_2 { + // preceded by SimpleFileHeader + // char data_stream_2[]; + // SimpleFileEOF +}; + +#define kSimpleSparseRangeMagicNumber 0xeb97bf016553676b + +struct SimpleFileSparseRangeHeader { + uint64 magic; // kSimpleSparseRangeMagicNumber + int64 offset; + int64 length; + uint32 crc32; + // char data[length]; +}; +""" + +c_simple = cstruct(endian="<").load(simple_def) diff --git a/dissect/database/chromium/cache/c_simple.pyi b/dissect/database/chromium/cache/c_simple.pyi new file mode 100644 index 0000000..6caaf71 --- /dev/null +++ b/dissect/database/chromium/cache/c_simple.pyi @@ -0,0 +1,149 @@ +# Generated by cstruct-stubgen +from typing import BinaryIO, Literal, TypeAlias, overload + +import dissect.cstruct as __cs__ + +class _c_simple(__cs__.cstruct): + kSimpleIndexMagicNumber: Literal[7308907224324143471] = ... + kSimpleInitialMagicNumber: Literal[18229283882253048880] = ... + kSimpleFinalMagicNumber: Literal[17652544034109735384] = ... + kSimpleEOFSize: Literal[24] = ... + kSimpleSparseRangeMagicNumber: Literal[16976247333112211307] = ... + class FakeIndexHeader(__cs__.Structure): + magic: _c_simple.uint64 + version: _c_simple.uint32 + padding: __cs__.Array[_c_simple.int32] + @overload + def __init__( + self, + magic: _c_simple.uint64 | None = ..., + version: _c_simple.uint32 | None = ..., + padding: __cs__.Array[_c_simple.int32] | None = ..., + ): ... + @overload + def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... + + class IndexTableEntry(__cs__.Structure): + hash: _c_simple.uint64 + last_used: _c_simple.int64 + size: _c_simple.int64 + @overload + def __init__( + self, + hash: _c_simple.uint64 | None = ..., + last_used: _c_simple.int64 | None = ..., + size: _c_simple.int64 | None = ..., + ): ... + @overload + def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... + + class RealIndexHeader(__cs__.Structure): + size: _c_simple.uint32 + crc32: _c_simple.uint32 + magic: _c_simple.uint64 + version: _c_simple.uint32 + num_entries: _c_simple.int64 + cache_size: _c_simple.int64 + unknown: _c_simple.int32 + class IndexTableEntry(__cs__.Structure): + hash: _c_simple.uint64 + last_used: _c_simple.int64 + size: _c_simple.int64 + @overload + def __init__( + self, + hash: _c_simple.uint64 | None = ..., + last_used: _c_simple.int64 | None = ..., + size: _c_simple.int64 | None = ..., + ): ... + @overload + def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... + + entries: __cs__.Array[IndexTableEntry] + @overload + def __init__( + self, + size: _c_simple.uint32 | None = ..., + crc32: _c_simple.uint32 | None = ..., + magic: _c_simple.uint64 | None = ..., + version: _c_simple.uint32 | None = ..., + num_entries: _c_simple.int64 | None = ..., + cache_size: _c_simple.int64 | None = ..., + unknown: _c_simple.int32 | None = ..., + entries: __cs__.Array[IndexTableEntry] | None = ..., + ): ... + @overload + def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... + + class SimpleFileHeader(__cs__.Structure): + magic: _c_simple.uint64 + version: _c_simple.uint32 + key_length: _c_simple.uint32 + key_hash: _c_simple.uint32 + unused_padding: _c_simple.uint32 + key: __cs__.CharArray + @overload + def __init__( + self, + magic: _c_simple.uint64 | None = ..., + version: _c_simple.uint32 | None = ..., + key_length: _c_simple.uint32 | None = ..., + key_hash: _c_simple.uint32 | None = ..., + unused_padding: _c_simple.uint32 | None = ..., + key: __cs__.CharArray | None = ..., + ): ... + @overload + def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... + + class SimpleFileEOF(__cs__.Structure): + magic: _c_simple.uint64 + flags: _c_simple.uint32 + crc32: _c_simple.uint32 + stream_size: _c_simple.int32 + @overload + def __init__( + self, + magic: _c_simple.uint64 | None = ..., + flags: _c_simple.uint32 | None = ..., + crc32: _c_simple.uint32 | None = ..., + stream_size: _c_simple.int32 | None = ..., + ): ... + @overload + def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... + + class SimpleFileStream_0_1(__cs__.Structure): + @overload + def __init__(self): ... + @overload + def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... + + class SimpleFileStream_2(__cs__.Structure): + @overload + def __init__(self): ... + @overload + def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... + + class SimpleFileSparseRangeHeader(__cs__.Structure): + magic: _c_simple.uint64 + offset: _c_simple.int64 + length: _c_simple.int64 + crc32: _c_simple.uint32 + @overload + def __init__( + self, + magic: _c_simple.uint64 | None = ..., + offset: _c_simple.int64 | None = ..., + length: _c_simple.int64 | None = ..., + crc32: _c_simple.uint32 | None = ..., + ): ... + @overload + def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... + + class SimpleFileStreamSparse(__cs__.Structure): + @overload + def __init__(self): ... + @overload + def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ... + +# Technically `c_simple` is an instance of `_c_simple`, but then we can't use it in type hints +c_simple: TypeAlias = _c_simple diff --git a/dissect/database/chromium/cache/cache.py b/dissect/database/chromium/cache/cache.py new file mode 100644 index 0000000..a657cec --- /dev/null +++ b/dissect/database/chromium/cache/cache.py @@ -0,0 +1,267 @@ +from __future__ import annotations + +import gzip +import zlib +from typing import TYPE_CHECKING +from urllib.parse import urlsplit + +from dissect.cstruct.utils import u32 +from dissect.util.stream import RangeStream +from dissect.util.ts import webkittimestamp + +from dissect.database.chromium.cache.c_cache import BlockSizeForFileType, c_cache +from dissect.database.chromium.cache.util import parse_cache_key + +try: + from cramjam import brotli + + HAS_CRAMJAM = True + +except ImportError: + HAS_CRAMJAM = False + +if TYPE_CHECKING: + from collections.abc import Iterator + from io import BufferedReader + from pathlib import Path + + +class DiskCache: + """Chromium Disk (Block File) Cache implementation. + + References: + - https://www.chromium.org/developers/design-documents/network-stack/disk-cache/ + - https://github.com/libyal/dtformats/blob/main/documentation/Chrome%20Cache%20file%20format.asciidoc + """ + + def __init__(self, path: Path): + if not path.exists(): + raise ValueError(f"Provided path does not exist: {path!r}") + + if not path.is_dir(): + raise ValueError(f"Provided path is not a directory: {path!r}") + + # Sanity check for expected directory structure. + files = {"index", "data_0", "data_1", "data_2", "data_3"} + self.children = set(path.iterdir()) + if not files.issubset({file.name for file in self.children}): + raise ValueError(f"Provided directory does not contain expected disk cache files: {path!r}") + + self.path = path + self.index = CacheIndexFile(self, path.joinpath("index")) + + if self.index.header.magic != 0xC103CAC3: + raise ValueError(f"Provided directory contains invalid index file: {path!r}") + + if self.index.header.version != 0x30000: + raise ValueError(f"Unsupported Disk Cache index version {self.index.header.version!r} in {path!r}") + + self.create_time = webkittimestamp(self.index.header.create_time) + self.num_entries = self.index.header.num_entries + + self.block_files = [ + CacheBlockFile(self, path.joinpath(name)) for name in ("data_0", "data_1", "data_2", "data_3") + ] + + def __repr__(self) -> str: + return f"" + + def block_file(self, id: int) -> CacheBlockFile | None: + for block_file in self.block_files: + if block_file.id == id: + return block_file + return None + + @property + def entries(self) -> Iterator[CacheEntryStore]: + for address in self.index.addresses: + while address.is_initialized: + entry = CacheEntryStore(self, address) + yield entry + + # An EntryStore can point to a next address for another EntryStore + if entry.next == 0: + break + address = CacheAddress(self.index, entry.next) + + def get_key(self, key: str) -> CacheEntryStore | None: + """Get the :class:`CacheEntryStore` for the given ``key``.""" + for entry in self.entries: + if key == entry.key: + return entry + return None + + def get_url(self, resource_url: str) -> CacheEntryStore | None: + """Get the :class:`CacheEntrystore` for the given resource url.""" + for entry in self.entries: + if resource_url == entry.resource_url: + return entry + return None + + def get_host(self, host: str) -> Iterator[CacheEntryStore]: + """Get all :class:`CacheEntryStore` for the given host.""" + for entry in self.entries: + if urlsplit(entry.resource_url).hostname == host: + yield entry + + +class CacheIndexFile: + """Chromium Disk Cache Index file. + + References: + - https://chromium.googlesource.com/chromium/src/+/HEAD/net/disk_cache/blockfile/disk_format.h + """ + + def __init__(self, disk_cache: DiskCache, path: Path): + self.disk_cache = disk_cache + self.path = path + + self.fh = path.open("rb") + self.header = c_cache.IndexHeader(self.fh) + + def __repr__(self) -> str: + return f"" + + @property + def addresses(self) -> Iterator[CacheAddress]: + """Yield :class:`CacheAddress` from the index table.""" + if hasattr(self, "_addresses"): + yield from self._addresses + return + + self._addresses = [] + + for _ in range(self.header.table_len): + addr = CacheAddress(self, u32(self.fh.read(4))) + self._addresses.append(addr) + yield addr + + # TODO: get(address)? + + +class CacheBlockFile: + """Chromium Disk Cache Data Block file. + + References: + - https://chromium.googlesource.com/chromium/src/+/HEAD/net/disk_cache/blockfile/disk_format.h + """ + + def __init__(self, disk_cache: DiskCache, path: Path): + self.disk_cache = disk_cache + self.path = path + + self.fh = path.open("rb") + self.header = c_cache.BlockFileHeader(self.fh) + + self.id = self.header.this_file + self.entry_size = self.header.entry_size + self.num_entries = self.header.num_entries + + def __repr__(self) -> str: + return f"" # noqa: E501 + + def read(self, addr: CacheAddress) -> RangeStream: + offset = c_cache.kBlockHeaderSize + (self.entry_size * addr.start_block) + size = self.entry_size * addr.num_blocks + return RangeStream(self.fh, offset, size) + + +class CacheAddress: + """Chromium Disk Cache Address. + + References: + - https://chromium.googlesource.com/chromium/src/+/HEAD/net/disk_cache/blockfile/addr.h + """ + + def __init__(self, index: CacheIndexFile, addr: int): + self.index = index + self.address = addr + + self.is_initialized = addr & c_cache.kInitializedMask != 0 + self.file_type = c_cache.FileType((addr & c_cache.kFileTypeMask) >> c_cache.kFileTypeOffset) + self.is_separate_file = (addr & c_cache.kFileTypeMask) == 0 + self.is_block_file = not self.is_separate_file + + if self.is_separate_file: + self.file_number = addr & c_cache.kFileNameMask + self.block_size = None + self.num_blocks = None + self.start_block = None + else: + self.file_number = (addr & c_cache.kFileSelectorMask) >> c_cache.kFileSelectorOffset + self.block_size = BlockSizeForFileType(self.file_type.value) + self.num_blocks = 1 + ((addr & c_cache.kNumBlocksMask) >> c_cache.kNumBlocksOffset) + self.start_block = addr & c_cache.kStartBlockMask + + def __repr__(self) -> str: + return f"" # noqa: E501 + + @property + def data(self) -> BufferedReader | RangeStream: + if not self.is_initialized: + raise ValueError("Cannot read data from non initialized address") + + if self.file_type == c_cache.FileType.EXTERNAL: + file_name = f"f_{self.file_number:06x}" + path = self.index.disk_cache.path.joinpath(file_name) + return path.open("rb") + + if self.file_type in (c_cache.FileType.BLOCK_256, c_cache.FileType.BLOCK_1K, c_cache.FileType.BLOCK_4K): + block_file = self.index.disk_cache.block_file(self.file_number) + if not block_file: + raise ValueError(f"Requested block file {self.file_number!r} does not exist") + return block_file.read(self) + + raise ValueError(f"No data for file type {self.file_type!r}") + + +class CacheEntryStore: + """Represents a Cache EntryStore object.""" + + def __init__(self, disk_cache: DiskCache, addr: CacheAddress): + self.disk_cache = disk_cache + self.address = addr + + self.header = c_cache.EntryStore(self.address.data) + self.state = c_cache.EntryState(self.header.state) + self.creation_time = webkittimestamp(self.header.creation_time) + self.next = self.header.next + + if self.header.long_key: + key_addr = CacheAddress(disk_cache.index, self.header.long_key) + self.key = key_addr.data.read(self.header.key_len).decode() + else: + self.key = self.header.key.decode().strip("\x00") + + self.credential_key, self.upload_data_identifier, self.isolation_key, self.resource_url = parse_cache_key( + self.key + ) + + def __repr__(self): + return f"" # noqa: E501 + + @property + def meta(self) -> bytes: + addr = CacheAddress(self.disk_cache.index, self.header.data_addr[0]) + # TODO: Properly unpickle HTTP response headers + return addr.data.read() + + @property + def data(self) -> bytes: + addr = CacheAddress(self.disk_cache.index, self.header.data_addr[1]) + header = addr.data.read(4) + + if header[0:2] == b"\x1f\x8b": + return gzip.decompress(addr.data.read()) + + meta = self.meta + if b"content-encoding:br" in meta: + if not HAS_CRAMJAM: + raise RuntimeError("Missing required dependency cramjam to decode brotli data") + + return brotli.decompress(addr.data.read()).read() + + if b"content-encoding:deflate" in meta: + return zlib.decompress(addr.data.read(), -zlib.MAX_WBITS) + + return addr.data.read() diff --git a/dissect/database/chromium/cache/simple.py b/dissect/database/chromium/cache/simple.py new file mode 100644 index 0000000..b8c462a --- /dev/null +++ b/dissect/database/chromium/cache/simple.py @@ -0,0 +1,230 @@ +from __future__ import annotations + +import gzip +import os +import zlib +from enum import IntEnum +from typing import TYPE_CHECKING +from urllib.parse import urlsplit + +from cramjam import brotli +from dissect.util.ts import webkittimestamp + +from dissect.database.chromium.cache.c_simple import c_simple +from dissect.database.chromium.cache.util import parse_cache_key + +if TYPE_CHECKING: + from collections.abc import Iterator + from pathlib import Path + + +class SimpleDiskCache: + """Chromium Very Simple Disk Cache Backend implementation. + + References: + - https://www.chromium.org/developers/design-documents/network-stack/disk-cache/very-simple-backend/ + - https://chromium.googlesource.com/chromium/src/+/HEAD/net/disk_cache/simple/ + """ + + def __init__(self, path: Path): + if not path.exists(): + raise ValueError(f"Provided path does not exist: {path!r}") + + if not path.is_dir(): + raise ValueError(f"Provided path is not a directory: {path!r}") + + # Sanity check for expected directory structure. + files = {"index-dir", "index"} + self.children = set(path.iterdir()) + if not files.issubset({file.name for file in self.children}): + raise ValueError(f"Provided directory does not contain expected disk cache files: {path!r}") + + self.path = path + self.index = SimpleIndexFile(self, path.joinpath("index-dir/the-real-index")) + self.last_used = self.index.last_used + self.cache_files = [ + SimpleCacheFile(self, child) for child in self.children if len(child.name) == 18 and "_" in child.name + ] + + def __repr__(self) -> str: + return ( + f"" + ) + + def get_key(self, key: str) -> SimpleCacheFile | None: + """Return the first matching :class:`SimpleCacheFile` for the given ``key`` identifier.""" + for cache_file in self.cache_files: + if cache_file.key == key: + return cache_file + return None + + def get_url(self, resource_url: str) -> SimpleCacheFile | None: + """Get the first matching :class:`SimpleCacheFile` for the given resource url.""" + for cache_file in self.cache_files: + if resource_url == cache_file.resource_url: + return cache_file + return None + + def get_host(self, host: str) -> Iterator[SimpleCacheFile]: + """Get all :class:`CacheEntryStore` for the given host.""" + for cache_file in self.cache_files: + if urlsplit(cache_file.resource_url).hostname == host: + yield cache_file + + +class SimpleIndexFile: + """Represents a Chromium Very Simple Disk Cache Backend index file.""" + + def __init__(self, disk_cache: SimpleDiskCache, path: Path): + self.disk_cache = disk_cache + self.path = path + + self.fh = path.open("rb") + self.header = c_simple.RealIndexHeader(self.fh) + + if self.header.magic != c_simple.kSimpleIndexMagicNumber: + raise ValueError(f"Unexpected magic header for {path!s}: {self.header.magic!r}") + + self.entries = self.header.entries + + if len(self.entries) != self.header.num_entries: + raise ValueError(f"Mismatch in amount of expected entries for {path!s}") + + self.last_used = webkittimestamp(self.entries[-1].last_used) + + def __repr__(self): + return f"" + + +class SimpleCacheFile: + """Represents a Chromium Very Simple Disk Cache Backend cache file. + + References: + - https://chromium.googlesource.com/chromium/src/+/HEAD/net/disk_cache/simple/simple_entry_format.h + - https://github.com/schorlet/simplecache + """ + + def __init__(self, disk_cache: SimpleDiskCache, path: Path): + self.disk_cache = disk_cache + self.path = path + + self.fh = path.open("rb") + self.header = c_simple.SimpleFileHeader(self.fh) + self.header_size = len(self.header.dumps()) + self.type = infer_file_type(self.path.name) + + self.key = self.header.key.decode("latin1") + self.credential_key, self.upload_data_identifier, self.isolation_key, self.resource_url = parse_cache_key( + self.key + ) + + def __repr__(self) -> str: + return f"" + + def _streams(self) -> None: + """Parse the stream(s) of this Simple Cache File.""" + if self.type == SimpleFileType.STREAM_0_1: + # We read backwards in the file handle (stream 0 is positioned after stream 1). + + # Stream 0 + self.fh.seek(-c_simple.kSimpleEOFSize, os.SEEK_END) + eof0 = c_simple.SimpleFileEOF(self.fh) + + if eof0.magic != c_simple.kSimpleFinalMagicNumber: + raise ValueError(f"Invalid EOF0 magic header {eof0!r}") + + offset = -c_simple.kSimpleEOFSize - eof0.stream_size + if eof0.flags in (2, 3): + offset -= 32 + self.fh.seek(offset, os.SEEK_END) + self._meta = self.fh.read(eof0.stream_size) + + # Stream 1 + self.fh.seek(-(c_simple.kSimpleEOFSize * 2) - eof0.stream_size, os.SEEK_END) + if eof0.flags in (2, 3): + self.fh.seek(-32, os.SEEK_CUR) + + eof1_offset = self.fh.tell() + eof1 = c_simple.SimpleFileEOF(self.fh) + + if eof1.magic != c_simple.kSimpleFinalMagicNumber: + raise ValueError(f"Invalid EOF1 magic header {eof1!r}") + + # Some EOF markers have a stream_size of 0x0 while the data is resident, this is intended behavior + # according to source, but older Chromium versions did populate stream_size. + # We can determine the size of stream 1 by reading until the beginning of the EOF marker for stream 1. + stream_size = eof1.stream_size or (eof1_offset - self.header_size) + + self.fh.seek(self.header_size) + self._data = self.fh.read(stream_size) + + elif self.type == SimpleFileType.STREAM_2: + # Should be simple + raise NotImplementedError + + elif self.type == SimpleFileType.STREAM_SPARSE: + ranges = [] + while True: + try: + range_header = c_simple.SimpleFileSparseRangeHeader(self.fh) + except EOFError: + break + + if range_header.magic != c_simple.kSimpleSparseRangeMagicNumber: + break + + offset = self.fh.tell() + ranges.append((range_header, offset)) + self.fh.seek(offset + range_header.length) + + if len(ranges) > 1: + raise ValueError("Did not expect another range in sparse stream") + + for range_header, offset in ranges: + self.fh.seek(offset) + self._meta = b"" + self._data = self.fh.read(range_header.length) + + @property + def meta(self) -> bytes: + if not hasattr(self, "_meta"): + self._streams() + return self._meta + + @property + def data(self) -> bytes: + if not hasattr(self, "_data"): + self._streams() + + if self._data[0:2] == b"\x1f\x8b": + return gzip.decompress(self._data) + + if b"content-encoding:br" in self.meta: + return brotli.decompress(self._data).read() + + if b"content-encoding:deflate" in self.meta: + return zlib.decompress(self._data, -zlib.MAX_WBITS) + + return self._data + + +class SimpleFileType(IntEnum): + """SimpleFileType enum.""" + + STREAM_0_1 = 0 + STREAM_2 = 1 + STREAM_SPARSE = 2 + + +def infer_file_type(file_name: str) -> SimpleFileType: + """Infer the :class:`SimpleFileType` based on the name of the :class:`SimpleCacheFile`.""" + if file_name.endswith("_0"): + return SimpleFileType.STREAM_0_1 + + if file_name.endswith("_1"): + return SimpleFileType.STREAM_2 + + if file_name.endswith("_s"): + return SimpleFileType.STREAM_SPARSE + + raise ValueError(f"Unknown SimpleFileType for filename {file_name!r}") diff --git a/dissect/database/chromium/cache/util.py b/dissect/database/chromium/cache/util.py new file mode 100644 index 0000000..70a7b8f --- /dev/null +++ b/dissect/database/chromium/cache/util.py @@ -0,0 +1,52 @@ +from __future__ import annotations + +import re + + +def parse_cache_key(key: str) -> tuple[int | None, int | None, bool, str]: + """Parse a Cache or Simple Cache key to a standardized tuple. + + Arguments: + key: string in the format 'credential_key/upload_data_identifier/[isolation_key]url' + + Returns: Tuple of ``credential_key``, ``upload_data_identifier``, ``isolation_key`` and ``resource_url`` + + References: + - GenerateCacheKey + - GetResourceURLFromHttpCacheKey + - https://chromium.googlesource.com/chromium/src/+/main/net/http/http_cache.cc + """ + kDoubleKeyPrefix = "_dk_" + kDoubleKeySeparator = " " + + credential_key = None + upload_data_identifier = None + isolation_key = False + url = None + + if not isinstance(key, str): + raise TypeError("Input key is not a string") + + # Key looks like 'credential_key/upload_data_identifier/...', after 2021-09 + if match := re.match(r"^(\d+)/(\d+)/(.+)", key): + credential_key = int(match.group(1)) + upload_data_identifier = int(match.group(2)) + url = match.group(3) + + # Key looks like 'upload_data_identifier/...', before 2021-09 + elif match := re.match(r"^(\d+)/(.+)", key): + upload_data_identifier = int(match.group(1)) + url = match.group(2) + + # Key could be a regular URL + else: + url = key + + # Check for double key presence in url. The last part is the resource url + if url.startswith(kDoubleKeyPrefix): + isolation_key = True + _, _, resource_url = url.rpartition(kDoubleKeySeparator) + else: + resource_url = url + + return credential_key, upload_data_identifier, isolation_key, resource_url diff --git a/pyproject.toml b/pyproject.toml index bd75c47..7f0a3e9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,7 +37,8 @@ repository = "https://github.com/fox-it/dissect.database" [project.optional-dependencies] full = [ - "pycryptodome" + "pycryptodome", + "cramjam", ] dev = [ "dissect.database[full]", diff --git a/tests/_data/chromium/cache/Linux_Cache_Data/26872f460690bf09_0 b/tests/_data/chromium/cache/Linux_Cache_Data/26872f460690bf09_0 new file mode 100644 index 0000000..2aa3f49 --- /dev/null +++ b/tests/_data/chromium/cache/Linux_Cache_Data/26872f460690bf09_0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00e7e90f68a28bdbd3f0970a8c043bd1d21db64ad2f4f796f1e171f8a8fb4aa4 +size 20355 diff --git a/tests/_data/chromium/cache/Linux_Cache_Data/3787c38c3aa59a39_0 b/tests/_data/chromium/cache/Linux_Cache_Data/3787c38c3aa59a39_0 new file mode 100644 index 0000000..a2738a7 --- /dev/null +++ b/tests/_data/chromium/cache/Linux_Cache_Data/3787c38c3aa59a39_0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:165f1bc648dc57fbb12ac4dc116301fca956223a88f192e822d7bc1195d1ebc1 +size 132243 diff --git a/tests/_data/chromium/cache/Linux_Cache_Data/438aa2410a31657f_0 b/tests/_data/chromium/cache/Linux_Cache_Data/438aa2410a31657f_0 new file mode 100644 index 0000000..f5af3c8 --- /dev/null +++ b/tests/_data/chromium/cache/Linux_Cache_Data/438aa2410a31657f_0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e95d21d7a6214fa2431d2366b453b5c0b326a2ad97b895167a869fa8ac0d65da +size 55061 diff --git a/tests/_data/chromium/cache/Linux_Cache_Data/570b07ed93acb263_0 b/tests/_data/chromium/cache/Linux_Cache_Data/570b07ed93acb263_0 new file mode 100644 index 0000000..d3cafe4 --- /dev/null +++ b/tests/_data/chromium/cache/Linux_Cache_Data/570b07ed93acb263_0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70cfef69f196af729971fa79db1baf864b9f177ca37a54f0fe71350a5d11751a +size 36558 diff --git a/tests/_data/chromium/cache/Linux_Cache_Data/820f1068453e8aac_0 b/tests/_data/chromium/cache/Linux_Cache_Data/820f1068453e8aac_0 new file mode 100644 index 0000000..ebdacb0 --- /dev/null +++ b/tests/_data/chromium/cache/Linux_Cache_Data/820f1068453e8aac_0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0049dd7f3dad39bdccf048b0e2d9ecc796f2a63c737104703244e32c24d7c6fd +size 49210 diff --git a/tests/_data/chromium/cache/Linux_Cache_Data/b4f8a2c3f5fbecf5_0 b/tests/_data/chromium/cache/Linux_Cache_Data/b4f8a2c3f5fbecf5_0 new file mode 100644 index 0000000..8fa02aa --- /dev/null +++ b/tests/_data/chromium/cache/Linux_Cache_Data/b4f8a2c3f5fbecf5_0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e55e771e10811987e5c02099ecbf045fb431ac0621d74a703893bf1bec67efb0 +size 845 diff --git a/tests/_data/chromium/cache/Linux_Cache_Data/badf5a33b306bb86_0 b/tests/_data/chromium/cache/Linux_Cache_Data/badf5a33b306bb86_0 new file mode 100644 index 0000000..34d2556 --- /dev/null +++ b/tests/_data/chromium/cache/Linux_Cache_Data/badf5a33b306bb86_0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae1068300953434d2757d2953f6683edb98cfe69aa729e7ffe623c808048d896 +size 2353 diff --git a/tests/_data/chromium/cache/Linux_Cache_Data/bf13d585df77b48b_0 b/tests/_data/chromium/cache/Linux_Cache_Data/bf13d585df77b48b_0 new file mode 100644 index 0000000..93cfacc --- /dev/null +++ b/tests/_data/chromium/cache/Linux_Cache_Data/bf13d585df77b48b_0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15206053f2ce89ecdae645c89cb99f0315505a491d75c2f9e4ddcacd714400e9 +size 120162 diff --git a/tests/_data/chromium/cache/Linux_Cache_Data/c1995abad5ab9177_0 b/tests/_data/chromium/cache/Linux_Cache_Data/c1995abad5ab9177_0 new file mode 100644 index 0000000..2be76ce --- /dev/null +++ b/tests/_data/chromium/cache/Linux_Cache_Data/c1995abad5ab9177_0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0116b93527b4d252538d84a12e0b44ad5fb7aaff78ee82fa63e608685c036a0a +size 80521 diff --git a/tests/_data/chromium/cache/Linux_Cache_Data/c937c35f739f5138_0 b/tests/_data/chromium/cache/Linux_Cache_Data/c937c35f739f5138_0 new file mode 100644 index 0000000..7c5c285 --- /dev/null +++ b/tests/_data/chromium/cache/Linux_Cache_Data/c937c35f739f5138_0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef1f16443f32654faa802f88d066d5f1cf96e787f034552c6cf33b3c50912ad6 +size 297230 diff --git a/tests/_data/chromium/cache/Linux_Cache_Data/c9b9cf70b386fbe0_0 b/tests/_data/chromium/cache/Linux_Cache_Data/c9b9cf70b386fbe0_0 new file mode 100644 index 0000000..36a7d24 --- /dev/null +++ b/tests/_data/chromium/cache/Linux_Cache_Data/c9b9cf70b386fbe0_0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d54ff0e0893c0bb3e811ece1f8c0021304aa06d8c7c6038bd6c84f184cdfbebb +size 8778 diff --git a/tests/_data/chromium/cache/Linux_Cache_Data/d53ede4f7af38aef_0 b/tests/_data/chromium/cache/Linux_Cache_Data/d53ede4f7af38aef_0 new file mode 100644 index 0000000..6153040 --- /dev/null +++ b/tests/_data/chromium/cache/Linux_Cache_Data/d53ede4f7af38aef_0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a35b54eeec57a5e5e809a369b1a9130e99de8e5a3a5d65b1daabb593a0ae4071 +size 77069 diff --git a/tests/_data/chromium/cache/Linux_Cache_Data/d5c4d42d72fe5bf7_0 b/tests/_data/chromium/cache/Linux_Cache_Data/d5c4d42d72fe5bf7_0 new file mode 100644 index 0000000..c8c83cf --- /dev/null +++ b/tests/_data/chromium/cache/Linux_Cache_Data/d5c4d42d72fe5bf7_0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87909254c51335a95cf212b1f1c9fb57cd87d6f83013ea9da82ef2704fa74ccc +size 362301 diff --git a/tests/_data/chromium/cache/Linux_Cache_Data/de1786f816a0c882_0 b/tests/_data/chromium/cache/Linux_Cache_Data/de1786f816a0c882_0 new file mode 100644 index 0000000..503292f --- /dev/null +++ b/tests/_data/chromium/cache/Linux_Cache_Data/de1786f816a0c882_0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a5c21debbfad655c2eaebe8f1b24aa5bdbff71dca1a9bd9e35938fe2f6fc3a7 +size 1934 diff --git a/tests/_data/chromium/cache/Linux_Cache_Data/de1d31de88adb9e3_0 b/tests/_data/chromium/cache/Linux_Cache_Data/de1d31de88adb9e3_0 new file mode 100644 index 0000000..1ba0bdd --- /dev/null +++ b/tests/_data/chromium/cache/Linux_Cache_Data/de1d31de88adb9e3_0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a9d1ee8239db7270c8be207c291c0f00a82a030de7c7f73ae3df49edbc1f3dd +size 17751 diff --git a/tests/_data/chromium/cache/Linux_Cache_Data/dedf005cc082b363_0 b/tests/_data/chromium/cache/Linux_Cache_Data/dedf005cc082b363_0 new file mode 100644 index 0000000..af33a6d --- /dev/null +++ b/tests/_data/chromium/cache/Linux_Cache_Data/dedf005cc082b363_0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9ae8fa6e9fac5240b14e9b4f5fee4be983f635a6d9a8c2ffc9a2bc04faaec30 +size 25881 diff --git a/tests/_data/chromium/cache/Linux_Cache_Data/ec6b52d868d5ef73_0 b/tests/_data/chromium/cache/Linux_Cache_Data/ec6b52d868d5ef73_0 new file mode 100644 index 0000000..d173937 --- /dev/null +++ b/tests/_data/chromium/cache/Linux_Cache_Data/ec6b52d868d5ef73_0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e47f04970e63ea7e5a02f3f3d0ab6d6468cc78a3e1d32a493cced63efaef0bd1 +size 103019 diff --git a/tests/_data/chromium/cache/Linux_Cache_Data/ee17f64527326480_0 b/tests/_data/chromium/cache/Linux_Cache_Data/ee17f64527326480_0 new file mode 100644 index 0000000..44fd3af --- /dev/null +++ b/tests/_data/chromium/cache/Linux_Cache_Data/ee17f64527326480_0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed4024263360684b7a8044ce9003462fd22a72a576a2247855c6588667c9ea19 +size 5388 diff --git a/tests/_data/chromium/cache/Linux_Cache_Data/ee80cc0f35f40c9b_0 b/tests/_data/chromium/cache/Linux_Cache_Data/ee80cc0f35f40c9b_0 new file mode 100644 index 0000000..92a1d00 --- /dev/null +++ b/tests/_data/chromium/cache/Linux_Cache_Data/ee80cc0f35f40c9b_0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:079a76219507d2d46d27f1d8647eccda54db297afc8aa36a73d92ced7cb0de8c +size 132083 diff --git a/tests/_data/chromium/cache/Linux_Cache_Data/index b/tests/_data/chromium/cache/Linux_Cache_Data/index new file mode 100644 index 0000000..0200e2d --- /dev/null +++ b/tests/_data/chromium/cache/Linux_Cache_Data/index @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbcfe23a2ecb82b7100c50811691dde0a33aa3da8d176be9882a9db485dc0f2d +size 24 diff --git a/tests/_data/chromium/cache/Linux_Cache_Data/index-dir/the-real-index b/tests/_data/chromium/cache/Linux_Cache_Data/index-dir/the-real-index new file mode 100644 index 0000000..b74c5ab --- /dev/null +++ b/tests/_data/chromium/cache/Linux_Cache_Data/index-dir/the-real-index @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab98f11329de9bd02c17214a8e3fe625653fa9030e7b8cc7d4b2bb33792c2703 +size 504 diff --git a/tests/_data/chromium/cache/Windows_Cache_Data/data_0 b/tests/_data/chromium/cache/Windows_Cache_Data/data_0 new file mode 100644 index 0000000..6f094a4 --- /dev/null +++ b/tests/_data/chromium/cache/Windows_Cache_Data/data_0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:478843f8d83e3d966d958d467812c7631f8ef503442c0f613e886ed36abfdf1c +size 45056 diff --git a/tests/_data/chromium/cache/Windows_Cache_Data/data_1 b/tests/_data/chromium/cache/Windows_Cache_Data/data_1 new file mode 100644 index 0000000..144eae3 --- /dev/null +++ b/tests/_data/chromium/cache/Windows_Cache_Data/data_1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08fbcb23b6bac0f810f11b114bb13547a0de599a0cd2831ab023f2089d96c5db +size 270336 diff --git a/tests/_data/chromium/cache/Windows_Cache_Data/data_2 b/tests/_data/chromium/cache/Windows_Cache_Data/data_2 new file mode 100644 index 0000000..a2c1510 --- /dev/null +++ b/tests/_data/chromium/cache/Windows_Cache_Data/data_2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44a638044e709f981af3572b17969c36d3a63b21a93096e2a9cd63186f3af0f8 +size 1056768 diff --git a/tests/_data/chromium/cache/Windows_Cache_Data/data_3 b/tests/_data/chromium/cache/Windows_Cache_Data/data_3 new file mode 100644 index 0000000..79da2ec --- /dev/null +++ b/tests/_data/chromium/cache/Windows_Cache_Data/data_3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9932196c6e22ddc450e44e245e53ab8bc220b37d27c4cdfd092ec10ddfa681e +size 4202496 diff --git a/tests/_data/chromium/cache/Windows_Cache_Data/f_000001 b/tests/_data/chromium/cache/Windows_Cache_Data/f_000001 new file mode 100644 index 0000000..cfb9c3d --- /dev/null +++ b/tests/_data/chromium/cache/Windows_Cache_Data/f_000001 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eaa8633dd50c2aa87b4be7dc02b54d44f6b4ca6694dae4cc4af78772911742a3 +size 25451 diff --git a/tests/_data/chromium/cache/Windows_Cache_Data/f_000002 b/tests/_data/chromium/cache/Windows_Cache_Data/f_000002 new file mode 100644 index 0000000..844d112 --- /dev/null +++ b/tests/_data/chromium/cache/Windows_Cache_Data/f_000002 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b18b7853fc31198f241f6f95bc5b92b6dbfa6c32c89f04fcc2be91a667ef0486 +size 361821 diff --git a/tests/_data/chromium/cache/Windows_Cache_Data/f_000003 b/tests/_data/chromium/cache/Windows_Cache_Data/f_000003 new file mode 100644 index 0000000..c6a92a0 --- /dev/null +++ b/tests/_data/chromium/cache/Windows_Cache_Data/f_000003 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5da99dee005d03aa622109114fa244c7993549bd799077448574598a0cbb5b7 +size 131769 diff --git a/tests/_data/chromium/cache/Windows_Cache_Data/f_000004 b/tests/_data/chromium/cache/Windows_Cache_Data/f_000004 new file mode 100644 index 0000000..bd6fa3d --- /dev/null +++ b/tests/_data/chromium/cache/Windows_Cache_Data/f_000004 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9be4fed52b1b87947c8d7c607e689f99b8d758e0e858fbb9880dae79f0b705ab +size 17216 diff --git a/tests/_data/chromium/cache/Windows_Cache_Data/f_000005 b/tests/_data/chromium/cache/Windows_Cache_Data/f_000005 new file mode 100644 index 0000000..4ae4946 --- /dev/null +++ b/tests/_data/chromium/cache/Windows_Cache_Data/f_000005 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67475300db44d15a11c46553e07cfe9527ec4223f0080a078937468c2c27a421 +size 102493 diff --git a/tests/_data/chromium/cache/Windows_Cache_Data/f_000006 b/tests/_data/chromium/cache/Windows_Cache_Data/f_000006 new file mode 100644 index 0000000..49a89be --- /dev/null +++ b/tests/_data/chromium/cache/Windows_Cache_Data/f_000006 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:114737c46cc41a374093fb22c90349e578ae84f7791da1f343a11e1402be9b58 +size 119619 diff --git a/tests/_data/chromium/cache/Windows_Cache_Data/f_000007 b/tests/_data/chromium/cache/Windows_Cache_Data/f_000007 new file mode 100644 index 0000000..ec16812 --- /dev/null +++ b/tests/_data/chromium/cache/Windows_Cache_Data/f_000007 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44514feb51cced393262882b21a2fe728d397dd290fc409852cafbdf43f1fe3e +size 296691 diff --git a/tests/_data/chromium/cache/Windows_Cache_Data/f_000008 b/tests/_data/chromium/cache/Windows_Cache_Data/f_000008 new file mode 100644 index 0000000..6e795f7 --- /dev/null +++ b/tests/_data/chromium/cache/Windows_Cache_Data/f_000008 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04e68ddc7fa2faab0fb5c952e4dccfb0f1ce556c7fd61432302a6ab61be16e59 +size 131546 diff --git a/tests/_data/chromium/cache/Windows_Cache_Data/f_000009 b/tests/_data/chromium/cache/Windows_Cache_Data/f_000009 new file mode 100644 index 0000000..88a7f18 --- /dev/null +++ b/tests/_data/chromium/cache/Windows_Cache_Data/f_000009 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fb8a3335699022cb397f5b2a3be476c55989467c4d10b4a13afbe04ba4bdbe2 +size 54548 diff --git a/tests/_data/chromium/cache/Windows_Cache_Data/f_00000a b/tests/_data/chromium/cache/Windows_Cache_Data/f_00000a new file mode 100644 index 0000000..be768df --- /dev/null +++ b/tests/_data/chromium/cache/Windows_Cache_Data/f_00000a @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afa61a40c2649ac43d2feb0f722e4b94a5bf0de16590ad97f040fbba06b0a309 +size 48700 diff --git a/tests/_data/chromium/cache/Windows_Cache_Data/f_00000b b/tests/_data/chromium/cache/Windows_Cache_Data/f_00000b new file mode 100644 index 0000000..d663a77 --- /dev/null +++ b/tests/_data/chromium/cache/Windows_Cache_Data/f_00000b @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5021951d0a8e7fa8da061a86d28319128b06ebe0737aaed2b6516d4e1f36ff8e +size 76560 diff --git a/tests/_data/chromium/cache/Windows_Cache_Data/f_00000c b/tests/_data/chromium/cache/Windows_Cache_Data/f_00000c new file mode 100644 index 0000000..038339b --- /dev/null +++ b/tests/_data/chromium/cache/Windows_Cache_Data/f_00000c @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81bafbb0feeb1bf0a39384b99bbc66f85b32babc22623c51787d3dfca9d004a1 +size 36069 diff --git a/tests/_data/chromium/cache/Windows_Cache_Data/f_00000d b/tests/_data/chromium/cache/Windows_Cache_Data/f_00000d new file mode 100644 index 0000000..0cbc37d --- /dev/null +++ b/tests/_data/chromium/cache/Windows_Cache_Data/f_00000d @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36b707da0acad63a8a31350f72f2175b835e50630b9b42296d6c4619ed56c569 +size 80012 diff --git a/tests/_data/chromium/cache/Windows_Cache_Data/f_00000e b/tests/_data/chromium/cache/Windows_Cache_Data/f_00000e new file mode 100644 index 0000000..67c0188 --- /dev/null +++ b/tests/_data/chromium/cache/Windows_Cache_Data/f_00000e @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0008a2b91e736b24d332f96ead73cd8f014a38174d54453953e19173c7dcaca +size 19864 diff --git a/tests/_data/chromium/cache/Windows_Cache_Data/index b/tests/_data/chromium/cache/Windows_Cache_Data/index new file mode 100644 index 0000000..35a5409 --- /dev/null +++ b/tests/_data/chromium/cache/Windows_Cache_Data/index @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1842e7996f1d16c2d39bd63d969e3d906780c27466f5a317ece937de3c3a58d8 +size 524656 diff --git a/tests/chromium/__init__.py b/tests/chromium/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/chromium/test_cache.py b/tests/chromium/test_cache.py new file mode 100644 index 0000000..79d1fcd --- /dev/null +++ b/tests/chromium/test_cache.py @@ -0,0 +1,31 @@ +from __future__ import annotations + +from datetime import datetime, timezone + +from dissect.database.chromium.cache.c_cache import c_cache +from dissect.database.chromium.cache.cache import DiskCache +from tests._util import absolute_path + + +def test_chromium_cache() -> None: + """Test if we can parse Chromium Cache Data from Google Chrome 148 on Windows 11 (24H2).""" + path = absolute_path("_data/chromium/cache/Windows_Cache_Data") + disk_cache = DiskCache(path) + + assert disk_cache.create_time == datetime(2026, 4, 30, 12, 10, 45, 77412, tzinfo=timezone.utc) + assert disk_cache.num_entries == 1 + assert len(disk_cache.block_files) == 4 + + entry_store = next(disk_cache.entries) + assert entry_store.address.address == 0xA0010002 + assert entry_store.state == c_cache.EntryState.ENTRY_NORMAL + assert entry_store.creation_time == datetime(2026, 4, 30, 12, 11, 48, 207695, tzinfo=timezone.utc) + assert entry_store.key == "1/0/_dk_http://172.16.82.1 http://172.16.82.1 http://172.16.82.1:8000/" + assert entry_store.next == 0 + + assert entry_store.data.startswith(b"\n\n") + assert b"HTTP/1.0 200 OK\00" in entry_store.meta + + assert disk_cache.get_key("1/0/_dk_http://172.16.82.1 http://172.16.82.1 http://172.16.82.1:8000/") + assert disk_cache.get_url("http://172.16.82.1:8000/") + assert next(disk_cache.get_host("172.16.82.1")) diff --git a/tests/chromium/test_simple.py b/tests/chromium/test_simple.py new file mode 100644 index 0000000..cf1bb82 --- /dev/null +++ b/tests/chromium/test_simple.py @@ -0,0 +1,45 @@ +from __future__ import annotations + +from dissect.database.chromium.cache.simple import SimpleDiskCache +from tests._util import absolute_path + + +def test_chromium_simple_cache() -> None: + """Test if we can parse Chromium Cache Data from Google Chrome 147 on Ubuntu 24.04 LTS.""" + path = absolute_path("_data/chromium/cache/Linux_Cache_Data") + simple_disk_cache = SimpleDiskCache(path) + + assert len(simple_disk_cache.cache_files) == 19 + assert len(list(simple_disk_cache.get_host("172.16.82.1"))) == 19 + + assert sorted(cache_file.resource_url for cache_file in simple_disk_cache.cache_files) == sorted( + [ + "http://172.16.82.1:8000/webfiles/1750011834072/presentation/shared-ro/webfonts/RO-SerifWeb-Italic.woff2", + "http://172.16.82.1:8000/binaries/medium/content/gallery/rijksoverheid/content-afbeeldingen/home/2026/energiemaatregelen-anp-556197185.jpg", + "http://172.16.82.1:8000/webfiles/1750011834072/presentation/responsive.css", + "http://172.16.82.1:8000/binaries/content/assets/rijksoverheid/iconen/favicon.ico", + "http://172.16.82.1:8000/webfiles/1750011834072/presentation/shared-ro/webfonts/RO-SansWebText-Regular.woff2", + "http://172.16.82.1:8000/webfiles/1750011834072/behaviour/shared-ro/jquery-ui.js", + "http://172.16.82.1:8000/webfiles/1750011834072/presentation/shared-ro/icons/ro-icons-2.4.woff2", + "http://172.16.82.1:8000/binaries/medium/content/gallery/rijksoverheid/content-afbeeldingen/home/2026/douane-koraal-1.jpg", + "http://172.16.82.1:8000/binaries/content/gallery/rijksoverheid/channel-afbeeldingen/logos/beeldmerk-rijksoverheid-desktop.svg", + "http://172.16.82.1:8000/binaries/content/assets/rijksoverheid/behaviour/rop-page-feedback.min-20230526.js", + "http://172.16.82.1:8000/binaries/widescreen/content/gallery/rijksoverheid/content-afbeeldingen/home/evergreens/header-meivakantie.jpg", + "http://172.16.82.1:8000/webfiles/1750011834072/behaviour/core.js", + "http://172.16.82.1:8000/binaries/medium/content/gallery/rijksoverheid/content-afbeeldingen/onderwerpen/fiets/campagne-fietshelm.jpg", + "http://172.16.82.1:8000/webfiles/1750011834072/behaviour/shared-ro/img-helpers.js", + "http://172.16.82.1:8000/webfiles/1750011834072/presentation/themes/logoblauw.css", + "http://172.16.82.1:8000/webfiles/1750011834072/presentation/shared-ro/webfonts/rijks-sans-regular.woff2", + "http://172.16.82.1:8000/", + "http://172.16.82.1:8000/binaries/large/content/gallery/rijksoverheid/content-afbeeldingen/home/evergreens/header-meivakantie.jpg", + "http://172.16.82.1:8000/webfiles/1750011834072/presentation/shared-ro/webfonts/RO-SansWebText-Bold.woff2", + ] + ) + + cache_file = simple_disk_cache.get_url("http://172.16.82.1:8000/") + assert b"HTTP/1.0 200 OK\x00" in cache_file.meta + assert cache_file.data.startswith(b"") + assert cache_file.data.endswith(b"\n\n\n") + assert len(cache_file.data) == 25451 + + assert simple_disk_cache.get_key("1/0/_dk_http://172.16.82.1 http://172.16.82.1 http://172.16.82.1:8000/") diff --git a/tests/chromium/test_util.py b/tests/chromium/test_util.py new file mode 100644 index 0000000..b2ceac4 --- /dev/null +++ b/tests/chromium/test_util.py @@ -0,0 +1,30 @@ +from __future__ import annotations + +import pytest + +from dissect.database.chromium.cache.util import parse_cache_key + + +@pytest.mark.parametrize( + ("input_key", "expected_output"), + [ + pytest.param( + "http://172.16.82.1", + (None, None, False, "http://172.16.82.1"), + id="regular_url", + ), + pytest.param( + "1/0/_dk_http://172.16.82.1 http://172.16.82.1 http://172.16.82.1:8000/webfiles/1750011834072/behaviour/shared-ro/jquery-ui.js", + (1, 0, True, "http://172.16.82.1:8000/webfiles/1750011834072/behaviour/shared-ro/jquery-ui.js"), + id="double_keyed_key", + ), + pytest.param( + "0/http://172.16.82.1", + (None, 0, False, "http://172.16.82.1"), + id="old_format", + ), + ], +) +def test_cache_key_parsing(input_key: str, expected_output: tuple) -> None: + """Test if we parse Chromium cache keys correctly.""" + assert parse_cache_key(input_key) == expected_output