vllm-rerank:
    image: vllm/vllm-openai:latest
    runtime: nvidia
    command: >
      --model ${VLLM_RERANK_MODEL:-BAAI/bge-reranker-v2-m3}
      --port ${VLLM_RERANK_PORT:-8000}
      --dtype float16
      --api-key ${VLLM_RERANK_API_KEY}
      ${VLLM_RERANK_EXTRA_ARGS:-}
    environment:
      NVIDIA_VISIBLE_DEVICES: ${NVIDIA_VISIBLE_DEVICES:-all}
      NVIDIA_DRIVER_CAPABILITIES: ${NVIDIA_DRIVER_CAPABILITIES:-compute,utility}
    ports:
      - "${VLLM_RERANK_PORT:-8000}:${VLLM_RERANK_PORT:-8000}"
    volumes:
      - vllm_rerank_cache:/root/.cache/huggingface
    ipc: host
    healthcheck:
      test:
        - CMD-SHELL
        - 'PORT_HEX="$(printf ''%04X'' ${VLLM_RERANK_PORT:-8000})"; cat /proc/net/tcp /proc/net/tcp6 2>/dev/null | grep -q ":$${PORT_HEX} "'
      interval: 5s
      timeout: 3s
      retries: 120
      start_period: 10s
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: all
              capabilities: [gpu]
    restart: unless-stopped