evaluate_qwen2_5.yaml 1.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849
  1. # hydra:
  2. # run:
  3. # dir: ./my_output_dir
  4. root_dir: ${hydra:runtime.cwd}
  5. eval:
  6. save_path: checkpoints
  7. iteration: 5000
  8. prompt: Once upon a time, there is a little boy
  9. max_new_tokens: 64
  10. temperature: 1.0
  11. top_k: 50
  12. model_type: qwen2_5
  13. model:
  14. architectures:
  15. - Qwen2ForCausalLM
  16. attention_dropout: 0.0
  17. bos_token_id: 151643
  18. eos_token_id: 151645
  19. hidden_act: silu
  20. hidden_size: 128 # 896
  21. initializer_range: 0.02
  22. intermediate_size: 384 # 4864
  23. max_position_embeddings: 512 # 32768
  24. max_window_layers: 21
  25. model_type: qwen2
  26. num_attention_heads: 8 # 14
  27. num_hidden_layers: 8 # 24
  28. num_key_value_heads: 2
  29. rms_norm_eps: 1e-06
  30. rope_theta: 1000000.0
  31. sliding_window: 512 # 32768
  32. tie_word_embeddings: true
  33. torch_dtype: bfloat16
  34. transformers_version: 4.43.1
  35. use_cache: true
  36. use_sliding_window: false
  37. vocab_size: 151936
  38. dataset_name: TinyStories # `TinyStories` or `owt`, change as needed
  39. dataset_split: train # `train` or `valid`, change as needed
  40. tokenizer:
  41. tokenizer_dir: ${root_dir}/tokenizers/${dataset_name}_${dataset_split}/ # save path for tokenizer
  42. vocab_path: ${tokenizer_dir}/vocab.pkl
  43. merges_path: ${tokenizer_dir}/merges.pkl
  44. special_tokens: ["<|endoftext|>"]