#necesario
export SSL_CERT_FILE=/fs/agustina/arivero/supercomplex/.local/lib/python3.11/site-packages/certifi/cacert.pem
export RAY_NODE_MANAGER_HEARTBEAT_TIMEOUT_MILLISECONDS=20000 # 20 seconds
export RAY_GCS_HEARTBEAT_TIMEOUT_SECONDS=60
1064 export RAY_DISABLE_PLACEMENT_GROUP_BUNDLING=1
#opcionales por lios de debug, igual ni existen.
1116 export RAY_NODE_IP_ADDRESS=172.22.5.1
1134 export NCCL_DEBUG=INFO
1135 export NCCL_DEBUG_SUBSYS=ALL
1136 export NCCL_P2P_DISABLE=0 # or 1 if P2P is not supported
1137 export RAY_DEDUP_LOGS=0
1154 export NCCL_SOCKET_IFNAME=ib1
module load python-math/3.11.4
ray start --head --node-ip-address=172.22.5.1 --num-gpus=4
ray list placement-groups --detail
vllm serve deepseek-ai/DeepSeek-R1 --distributed-executor-backend=ray --trust-remote-code --pipeline-parallel-size 2 --tensor-parallel-size 4
Deja una respuesta