wxcz_admin
/
lightrag-cn-git


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185
							"""Offline tests for the VLM cache-key invariants used by analyze_multimodal.

These tests verify the hash inputs we feed into ``compute_args_hash`` actually
deliver the contract documented in the LLM/VLM vision plan:

- same prompt + same image content => cache HIT (identical args_hash)
- same prompt + different image content => cache MISS (different args_hash)
- same prompt + same image content under a different file path/source_id =>
  cache HIT (provenance is for audit only and must not affect the hash)
- the audit blob written into ``original_prompt`` never embeds the raw base64
  payload, only digests and provenance pointers
"""

from __future__ import annotations

import base64
import json
from typing import Any

import pytest

from lightrag.llm._vision_utils import (
    image_audit_metadata,
    image_cache_metadata,
    normalize_image_inputs,
)
from lightrag.utils import (
    _serialize_cache_variant,
    compute_args_hash,
    get_llm_cache_identity,
    serialize_llm_cache_identity,
)


pytestmark = pytest.mark.offline


PNG_A = (
    b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01"
    b"\x08\x06\x00\x00\x00\x1f\x15\xc4\x89\x00\x00\x00\rIDATx\x9cc\xf8"
    b"\xcf\xc0\x00\x00\x00\x03\x00\x01\x5c\xcc\xd9\x9e\x00\x00\x00\x00"
    b"IEND\xaeB`\x82"
)
PNG_B = PNG_A[:-12] + b"\x01" + PNG_A[-11:]  # 1-byte tweak => different hash


def _b64(raw: bytes) -> str:
    return base64.b64encode(raw).decode("ascii")


def _hash_for(prompt: str, images: list[dict[str, Any]] | None) -> str:
    normalized = normalize_image_inputs(images) if images else []
    identity = get_llm_cache_identity({}, role="vlm")
    return compute_args_hash(
        prompt,
        "",
        "",
        serialize_llm_cache_identity(identity),
        _serialize_cache_variant({"type": "json_object"}),
        _serialize_cache_variant(image_cache_metadata(normalized)),
    )


def test_same_prompt_same_image_yields_same_hash():
    h1 = _hash_for("describe", [{"base64": _b64(PNG_A)}])
    h2 = _hash_for("describe", [{"base64": _b64(PNG_A)}])
    assert h1 == h2


def test_same_prompt_different_image_yields_different_hash():
    h1 = _hash_for("describe", [{"base64": _b64(PNG_A)}])
    h2 = _hash_for("describe", [{"base64": _b64(PNG_B)}])
    assert h1 != h2


def test_same_image_different_source_file_still_hits():
    h1 = _hash_for(
        "describe",
        [
            {
                "base64": _b64(PNG_A),
                "source_id": "img-001",
                "source_file": "/path/a/img.png",
                "modality": "image",
                "doc_id": "doc-1",
            }
        ],
    )
    h2 = _hash_for(
        "describe",
        [
            {
                "base64": _b64(PNG_A),
                "source_id": "img-002",
                "source_file": "/different/elsewhere/copy.png",
                "modality": "image",
                "doc_id": "doc-2",
            }
        ],
    )
    assert h1 == h2


def test_different_prompt_with_same_image_yields_different_hash():
    h1 = _hash_for("describe", [{"base64": _b64(PNG_A)}])
    h2 = _hash_for("describe in english", [{"base64": _b64(PNG_A)}])
    assert h1 != h2


def test_image_present_vs_absent_yields_different_hash():
    h_text_only = _hash_for("describe", None)
    h_with_image = _hash_for("describe", [{"base64": _b64(PNG_A)}])
    assert h_text_only != h_with_image


def test_audit_block_in_original_prompt_does_not_leak_raw_base64():
    """Mirrors how _analyze_item builds the cache-entry original_prompt."""
    normalized = normalize_image_inputs(
        [
            {
                "base64": _b64(PNG_A),
                "source_id": "img-001",
                "source_file": "/tmp/a.png",
                "modality": "image",
                "doc_id": "doc-1",
            }
        ]
    )
    audit_blob = image_audit_metadata(normalized)
    prompt = "describe"
    original_prompt = (
        prompt
        + f"\n<vlm_images>{json.dumps(audit_blob, ensure_ascii=False)}</vlm_images>"
    )

    assert "<vlm_images>" in original_prompt
    assert "</vlm_images>" in original_prompt
    # sha256 digest is present; raw base64 must not be.
    assert audit_blob[0]["sha256"] in original_prompt
    assert _b64(PNG_A) not in original_prompt


def test_image_metadata_includes_width_height():
    """Design §5.2 contract: image digest metadata must surface
    width/height alongside mime/sha256/bytes so cache keys and audit blocks
    capture the full pixel footprint."""
    normalized = normalize_image_inputs([{"base64": _b64(PNG_A)}])
    cache_blob = image_cache_metadata(normalized)
    audit_blob = image_audit_metadata(normalized)
    assert len(cache_blob) == 1
    # 1x1 PNG fixture — dimensions are decodable from the IHDR chunk.
    assert cache_blob[0]["width"] == 1
    assert cache_blob[0]["height"] == 1
    assert audit_blob[0]["width"] == 1
    assert audit_blob[0]["height"] == 1


def test_image_dimensions_change_changes_cache_key():
    """Two PNGs with the same pixel byte payload but different declared
    dimensions still differ at the byte level and therefore must hash to
    distinct args_hashes — the width/height fields in cache metadata
    document the difference without being the sole identity source."""
    # Build a 32x16 PNG and compare it against the 1x1 PNG_A.
    import struct
    import zlib

    sig = b"\x89PNG\r\n\x1a\n"
    ihdr_payload = struct.pack(">II", 32, 16) + b"\x08\x06\x00\x00\x00"
    ihdr_crc = zlib.crc32(b"IHDR" + ihdr_payload).to_bytes(4, "big")
    ihdr = struct.pack(">I", len(ihdr_payload)) + b"IHDR" + ihdr_payload + ihdr_crc
    idat_payload = b"\x00" * (32 * 16 * 4 + 16)
    idat_compressed = zlib.compress(idat_payload)
    idat_crc = zlib.crc32(b"IDAT" + idat_compressed).to_bytes(4, "big")
    idat = (
        struct.pack(">I", len(idat_compressed)) + b"IDAT" + idat_compressed + idat_crc
    )
    iend = b"\x00\x00\x00\x00IEND\xaeB`\x82"
    big_png = sig + ihdr + idat + iend

    normalized_small = normalize_image_inputs([{"base64": _b64(PNG_A)}])
    normalized_big = normalize_image_inputs([{"base64": _b64(big_png)}])

    assert image_cache_metadata(normalized_small)[0]["width"] == 1
    assert image_cache_metadata(normalized_big)[0]["width"] == 32
    assert image_cache_metadata(normalized_big)[0]["height"] == 16