"""Offline tests for the VLM cache-key invariants used by analyze_multimodal.
These tests verify the hash inputs we feed into ``compute_args_hash`` actually
deliver the contract documented in the LLM/VLM vision plan:
- same prompt + same image content => cache HIT (identical args_hash)
- same prompt + different image content => cache MISS (different args_hash)
- same prompt + same image content under a different file path/source_id =>
cache HIT (provenance is for audit only and must not affect the hash)
- the audit blob written into ``original_prompt`` never embeds the raw base64
payload, only digests and provenance pointers
"""
from __future__ import annotations
import base64
import json
from typing import Any
import pytest
from lightrag.llm._vision_utils import (
image_audit_metadata,
image_cache_metadata,
normalize_image_inputs,
)
from lightrag.utils import (
_serialize_cache_variant,
compute_args_hash,
get_llm_cache_identity,
serialize_llm_cache_identity,
)
pytestmark = pytest.mark.offline
PNG_A = (
b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01"
b"\x08\x06\x00\x00\x00\x1f\x15\xc4\x89\x00\x00\x00\rIDATx\x9cc\xf8"
b"\xcf\xc0\x00\x00\x00\x03\x00\x01\x5c\xcc\xd9\x9e\x00\x00\x00\x00"
b"IEND\xaeB`\x82"
)
PNG_B = PNG_A[:-12] + b"\x01" + PNG_A[-11:] # 1-byte tweak => different hash
def _b64(raw: bytes) -> str:
return base64.b64encode(raw).decode("ascii")
def _hash_for(prompt: str, images: list[dict[str, Any]] | None) -> str:
normalized = normalize_image_inputs(images) if images else []
identity = get_llm_cache_identity({}, role="vlm")
return compute_args_hash(
prompt,
"",
"",
serialize_llm_cache_identity(identity),
_serialize_cache_variant({"type": "json_object"}),
_serialize_cache_variant(image_cache_metadata(normalized)),
)
def test_same_prompt_same_image_yields_same_hash():
h1 = _hash_for("describe", [{"base64": _b64(PNG_A)}])
h2 = _hash_for("describe", [{"base64": _b64(PNG_A)}])
assert h1 == h2
def test_same_prompt_different_image_yields_different_hash():
h1 = _hash_for("describe", [{"base64": _b64(PNG_A)}])
h2 = _hash_for("describe", [{"base64": _b64(PNG_B)}])
assert h1 != h2
def test_same_image_different_source_file_still_hits():
h1 = _hash_for(
"describe",
[
{
"base64": _b64(PNG_A),
"source_id": "img-001",
"source_file": "/path/a/img.png",
"modality": "image",
"doc_id": "doc-1",
}
],
)
h2 = _hash_for(
"describe",
[
{
"base64": _b64(PNG_A),
"source_id": "img-002",
"source_file": "/different/elsewhere/copy.png",
"modality": "image",
"doc_id": "doc-2",
}
],
)
assert h1 == h2
def test_different_prompt_with_same_image_yields_different_hash():
h1 = _hash_for("describe", [{"base64": _b64(PNG_A)}])
h2 = _hash_for("describe in english", [{"base64": _b64(PNG_A)}])
assert h1 != h2
def test_image_present_vs_absent_yields_different_hash():
h_text_only = _hash_for("describe", None)
h_with_image = _hash_for("describe", [{"base64": _b64(PNG_A)}])
assert h_text_only != h_with_image
def test_audit_block_in_original_prompt_does_not_leak_raw_base64():
"""Mirrors how _analyze_item builds the cache-entry original_prompt."""
normalized = normalize_image_inputs(
[
{
"base64": _b64(PNG_A),
"source_id": "img-001",
"source_file": "/tmp/a.png",
"modality": "image",
"doc_id": "doc-1",
}
]
)
audit_blob = image_audit_metadata(normalized)
prompt = "describe"
original_prompt = (
prompt
+ f"\n{json.dumps(audit_blob, ensure_ascii=False)}"
)
assert "" in original_prompt
assert "" in original_prompt
# sha256 digest is present; raw base64 must not be.
assert audit_blob[0]["sha256"] in original_prompt
assert _b64(PNG_A) not in original_prompt
def test_image_metadata_includes_width_height():
"""Design §5.2 contract: image digest metadata must surface
width/height alongside mime/sha256/bytes so cache keys and audit blocks
capture the full pixel footprint."""
normalized = normalize_image_inputs([{"base64": _b64(PNG_A)}])
cache_blob = image_cache_metadata(normalized)
audit_blob = image_audit_metadata(normalized)
assert len(cache_blob) == 1
# 1x1 PNG fixture — dimensions are decodable from the IHDR chunk.
assert cache_blob[0]["width"] == 1
assert cache_blob[0]["height"] == 1
assert audit_blob[0]["width"] == 1
assert audit_blob[0]["height"] == 1
def test_image_dimensions_change_changes_cache_key():
"""Two PNGs with the same pixel byte payload but different declared
dimensions still differ at the byte level and therefore must hash to
distinct args_hashes — the width/height fields in cache metadata
document the difference without being the sole identity source."""
# Build a 32x16 PNG and compare it against the 1x1 PNG_A.
import struct
import zlib
sig = b"\x89PNG\r\n\x1a\n"
ihdr_payload = struct.pack(">II", 32, 16) + b"\x08\x06\x00\x00\x00"
ihdr_crc = zlib.crc32(b"IHDR" + ihdr_payload).to_bytes(4, "big")
ihdr = struct.pack(">I", len(ihdr_payload)) + b"IHDR" + ihdr_payload + ihdr_crc
idat_payload = b"\x00" * (32 * 16 * 4 + 16)
idat_compressed = zlib.compress(idat_payload)
idat_crc = zlib.crc32(b"IDAT" + idat_compressed).to_bytes(4, "big")
idat = (
struct.pack(">I", len(idat_compressed)) + b"IDAT" + idat_compressed + idat_crc
)
iend = b"\x00\x00\x00\x00IEND\xaeB`\x82"
big_png = sig + ihdr + idat + iend
normalized_small = normalize_image_inputs([{"base64": _b64(PNG_A)}])
normalized_big = normalize_image_inputs([{"base64": _b64(big_png)}])
assert image_cache_metadata(normalized_small)[0]["width"] == 1
assert image_cache_metadata(normalized_big)[0]["width"] == 32
assert image_cache_metadata(normalized_big)[0]["height"] == 16