vllm-rerank: image: vllm/vllm-openai:latest runtime: nvidia command: > --model ${VLLM_RERANK_MODEL:-BAAI/bge-reranker-v2-m3} --port ${VLLM_RERANK_PORT:-8000} --dtype float16 --api-key ${VLLM_RERANK_API_KEY} ${VLLM_RERANK_EXTRA_ARGS:-} environment: NVIDIA_VISIBLE_DEVICES: ${NVIDIA_VISIBLE_DEVICES:-all} NVIDIA_DRIVER_CAPABILITIES: ${NVIDIA_DRIVER_CAPABILITIES:-compute,utility} ports: - "${VLLM_RERANK_PORT:-8000}:${VLLM_RERANK_PORT:-8000}" volumes: - vllm_rerank_cache:/root/.cache/huggingface ipc: host healthcheck: test: - CMD-SHELL - 'PORT_HEX="$(printf ''%04X'' ${VLLM_RERANK_PORT:-8000})"; cat /proc/net/tcp /proc/net/tcp6 2>/dev/null | grep -q ":$${PORT_HEX} "' interval: 5s timeout: 3s retries: 120 start_period: 10s deploy: resources: reservations: devices: - driver: nvidia count: all capabilities: [gpu] restart: unless-stopped