sft_gsm8k.yaml 425 B

1234567891011121314151617181920
  1. mlflow_project: sft-gsm8k
  2. checkpoint_dir: ${hydra:run.dir}/checkpoint/
  3. csv_dir: ${hydra:run.dir}/csv/ # for saving evaluation results
  4. model_path: huggingface_models/Qwen/Qwen2.5-Math-1.5B
  5. dataset_path: "data_sft/openai/gsm8k"
  6. train_device: cuda:0
  7. eval_device: cuda:1
  8. micro_batch_size: 2
  9. gradient_accumulation_steps: 4
  10. eval_batch_size: 2
  11. num_epochs: 1
  12. learning_rate: 2e-5
  13. weight_decay: 0.01
  14. eval_steps: 1
  15. save_steps: 10