vllm-embed: image: vllm/vllm-openai:latest runtime: nvidia command: > --model ${VLLM_EMBED_MODEL:-BAAI/bge-m3} --port ${VLLM_EMBED_PORT:-8001} --dtype float16 --api-key ${VLLM_EMBED_API_KEY} ${VLLM_EMBED_EXTRA_ARGS:-} environment: NVIDIA_VISIBLE_DEVICES: ${NVIDIA_VISIBLE_DEVICES:-all} NVIDIA_DRIVER_CAPABILITIES: ${NVIDIA_DRIVER_CAPABILITIES:-compute,utility} ports: - "${VLLM_EMBED_PORT:-8001}:${VLLM_EMBED_PORT:-8001}" volumes: - vllm_embed_cache:/root/.cache/huggingface ipc: host healthcheck: test: - CMD-SHELL - 'PORT_HEX="$(printf ''%04X'' ${VLLM_EMBED_PORT:-8001})"; cat /proc/net/tcp /proc/net/tcp6 2>/dev/null | grep -q ":$${PORT_HEX} "' interval: 5s timeout: 3s retries: 120 start_period: 10s deploy: resources: reservations: devices: - driver: nvidia count: all capabilities: [gpu] restart: unless-stopped