| 123456789101112131415161718192021222324252627282930313233 |
- vllm-rerank:
- image: vllm/vllm-openai:latest
- runtime: nvidia
- command: >
- --model ${VLLM_RERANK_MODEL:-BAAI/bge-reranker-v2-m3}
- --port ${VLLM_RERANK_PORT:-8000}
- --dtype float16
- --api-key ${VLLM_RERANK_API_KEY}
- ${VLLM_RERANK_EXTRA_ARGS:-}
- environment:
- NVIDIA_VISIBLE_DEVICES: ${NVIDIA_VISIBLE_DEVICES:-all}
- NVIDIA_DRIVER_CAPABILITIES: ${NVIDIA_DRIVER_CAPABILITIES:-compute,utility}
- ports:
- - "${VLLM_RERANK_PORT:-8000}:${VLLM_RERANK_PORT:-8000}"
- volumes:
- - vllm_rerank_cache:/root/.cache/huggingface
- ipc: host
- healthcheck:
- test:
- - CMD-SHELL
- - 'PORT_HEX="$(printf ''%04X'' ${VLLM_RERANK_PORT:-8000})"; cat /proc/net/tcp /proc/net/tcp6 2>/dev/null | grep -q ":$${PORT_HEX} "'
- interval: 5s
- timeout: 3s
- retries: 120
- start_period: 10s
- deploy:
- resources:
- reservations:
- devices:
- - driver: nvidia
- count: all
- capabilities: [gpu]
- restart: unless-stopped
|