vllm-rerank-gpu.yml 1.0 KB

123456789101112131415161718192021222324252627282930313233
  1. vllm-rerank:
  2. image: vllm/vllm-openai:latest
  3. runtime: nvidia
  4. command: >
  5. --model ${VLLM_RERANK_MODEL:-BAAI/bge-reranker-v2-m3}
  6. --port ${VLLM_RERANK_PORT:-8000}
  7. --dtype float16
  8. --api-key ${VLLM_RERANK_API_KEY}
  9. ${VLLM_RERANK_EXTRA_ARGS:-}
  10. environment:
  11. NVIDIA_VISIBLE_DEVICES: ${NVIDIA_VISIBLE_DEVICES:-all}
  12. NVIDIA_DRIVER_CAPABILITIES: ${NVIDIA_DRIVER_CAPABILITIES:-compute,utility}
  13. ports:
  14. - "${VLLM_RERANK_PORT:-8000}:${VLLM_RERANK_PORT:-8000}"
  15. volumes:
  16. - vllm_rerank_cache:/root/.cache/huggingface
  17. ipc: host
  18. healthcheck:
  19. test:
  20. - CMD-SHELL
  21. - 'PORT_HEX="$(printf ''%04X'' ${VLLM_RERANK_PORT:-8000})"; cat /proc/net/tcp /proc/net/tcp6 2>/dev/null | grep -q ":$${PORT_HEX} "'
  22. interval: 5s
  23. timeout: 3s
  24. retries: 120
  25. start_period: 10s
  26. deploy:
  27. resources:
  28. reservations:
  29. devices:
  30. - driver: nvidia
  31. count: all
  32. capabilities: [gpu]
  33. restart: unless-stopped