"""Tests for entity extraction gleaning token limit guard."""

import logging
from unittest.mock import AsyncMock

import pytest

from lightrag.utils import Tokenizer, TokenizerInterface


@pytest.fixture
def _propagate_lightrag_logger(monkeypatch):
    """``lightrag.utils.logger`` sets ``propagate = False`` to avoid noisy
    test output; restore propagation locally so ``caplog`` can capture
    WARNING records emitted from inside ``lightrag.operate``."""
    monkeypatch.setattr(logging.getLogger("lightrag"), "propagate", True)


class DummyTokenizer(TokenizerInterface):
    """Simple 1:1 character-to-token mapping for testing."""

    def encode(self, content: str):
        return [ord(ch) for ch in content]

    def decode(self, tokens):
        return "".join(chr(token) for token in tokens)


def _make_global_config(
    entity_extract_max_gleaning: int = 1,
) -> dict:
    """Build a minimal global_config dict for extract_entities."""
    tokenizer = Tokenizer("dummy", DummyTokenizer())
    extract_func = AsyncMock(return_value="")
    return {
        "llm_model_func": extract_func,
        "role_llm_funcs": {
            "extract": extract_func,
            "keyword": extract_func,
            "query": extract_func,
            "vlm": extract_func,
        },
        "entity_extract_max_gleaning": entity_extract_max_gleaning,
        "entity_extract_max_records": 100,
        "entity_extract_max_entities": 40,
        "addon_params": {},
        "tokenizer": tokenizer,
        "llm_model_max_async": 1,
    }


# Minimal valid extraction result that _process_extraction_result can parse
_EXTRACTION_RESULT = (
    "(entity<|#|>TEST_ENTITY<|#|>CONCEPT<|#|>A test entity)<|COMPLETE|>"
)


def _make_chunks(content: str = "Test content.") -> dict[str, dict]:
    return {
        "chunk-001": {
            "tokens": len(content),
            "content": content,
            "full_doc_id": "doc-001",
            "chunk_order_index": 0,
        }
    }


@pytest.mark.offline
@pytest.mark.asyncio
async def test_gleaning_skipped_when_tokens_exceed_limit(
    monkeypatch, caplog, _propagate_lightrag_logger
):
    """Gleaning must be skipped (with a WARNING) when the projected
    gleaning input — system + history(user+assistant) + continue prompt —
    exceeds ``MAX_EXTRACT_INPUT_TOKENS``.  This prevents
    ``context_length_exceeded`` errors from the LLM provider on the second
    round when the initial response was long.
    """
    from lightrag.operate import extract_entities

    # 10 tokens cannot fit any realistic prompt — guard must trip.
    monkeypatch.setenv("MAX_EXTRACT_INPUT_TOKENS", "10")

    global_config = _make_global_config(entity_extract_max_gleaning=1)
    llm_func = global_config["llm_model_func"]
    llm_func.return_value = _EXTRACTION_RESULT

    with caplog.at_level("WARNING", logger="lightrag"):
        await extract_entities(
            chunks=_make_chunks(),
            global_config=global_config,
        )

    # Only the initial extraction round ran; gleaning was skipped.
    assert llm_func.await_count == 1

    warnings_emitted = [
        rec.getMessage()
        for rec in caplog.records
        if rec.levelname == "WARNING"
        and rec.getMessage().startswith("Gleaning stopped for chunk chunk-001:")
    ]
    assert warnings_emitted, (
        "expected a WARNING log explaining gleaning was skipped due to "
        "token limit; got: "
        f"{[r.getMessage() for r in caplog.records]}"
    )
    # Message must surface both the measured token count and the limit so
    # operators can size MAX_EXTRACT_INPUT_TOKENS appropriately.
    msg = warnings_emitted[0]
    assert "exceeded limit (10)" in msg
    assert "Input tokens (" in msg


@pytest.mark.offline
@pytest.mark.asyncio
async def test_gleaning_proceeds_when_tokens_within_limit(monkeypatch):
    """Gleaning runs normally when the projected input fits the cap."""
    from lightrag.operate import extract_entities

    monkeypatch.setenv("MAX_EXTRACT_INPUT_TOKENS", "999999")

    global_config = _make_global_config(entity_extract_max_gleaning=1)
    llm_func = global_config["llm_model_func"]
    llm_func.return_value = _EXTRACTION_RESULT

    await extract_entities(
        chunks=_make_chunks(),
        global_config=global_config,
    )

    # Both rounds run: initial extraction + one gleaning pass.
    assert llm_func.await_count == 2


@pytest.mark.offline
@pytest.mark.asyncio
async def test_no_gleaning_when_max_gleaning_zero(monkeypatch):
    """``entity_extract_max_gleaning=0`` disables gleaning regardless of
    token budget — the guard is downstream of the feature flag."""
    from lightrag.operate import extract_entities

    monkeypatch.setenv("MAX_EXTRACT_INPUT_TOKENS", "999999")

    global_config = _make_global_config(entity_extract_max_gleaning=0)
    llm_func = global_config["llm_model_func"]
    llm_func.return_value = _EXTRACTION_RESULT

    await extract_entities(
        chunks=_make_chunks(),
        global_config=global_config,
    )

    assert llm_func.await_count == 1


@pytest.mark.offline
@pytest.mark.asyncio
async def test_gleaning_guard_disabled_when_max_tokens_zero(monkeypatch):
    """Setting ``MAX_EXTRACT_INPUT_TOKENS=0`` opts out of the guard so
    gleaning always runs regardless of input size — useful for callers
    whose provider has no hard input ceiling."""
    from lightrag.operate import extract_entities

    monkeypatch.setenv("MAX_EXTRACT_INPUT_TOKENS", "0")

    global_config = _make_global_config(entity_extract_max_gleaning=1)
    llm_func = global_config["llm_model_func"]
    llm_func.return_value = _EXTRACTION_RESULT

    await extract_entities(
        chunks=_make_chunks(),
        global_config=global_config,
    )

    # Guard disabled → gleaning still runs even with tight projected input.
    assert llm_func.await_count == 2