| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849 |
- # hydra:
- # run:
- # dir: ./my_output_dir
- root_dir: ${hydra:runtime.cwd}
- eval:
- save_path: checkpoints
- iteration: 5000
- prompt: Once upon a time, there is a little boy
- max_new_tokens: 64
- temperature: 1.0
- top_k: 50
- model_type: qwen2_5
- model:
- architectures:
- - Qwen2ForCausalLM
- attention_dropout: 0.0
- bos_token_id: 151643
- eos_token_id: 151645
- hidden_act: silu
- hidden_size: 128 # 896
- initializer_range: 0.02
- intermediate_size: 384 # 4864
- max_position_embeddings: 512 # 32768
- max_window_layers: 21
- model_type: qwen2
- num_attention_heads: 8 # 14
- num_hidden_layers: 8 # 24
- num_key_value_heads: 2
- rms_norm_eps: 1e-06
- rope_theta: 1000000.0
- sliding_window: 512 # 32768
- tie_word_embeddings: true
- torch_dtype: bfloat16
- transformers_version: 4.43.1
- use_cache: true
- use_sliding_window: false
- vocab_size: 151936
- dataset_name: TinyStories # `TinyStories` or `owt`, change as needed
- dataset_split: train # `train` or `valid`, change as needed
- tokenizer:
- tokenizer_dir: ${root_dir}/tokenizers/${dataset_name}_${dataset_split}/ # save path for tokenizer
- vocab_path: ${tokenizer_dir}/vocab.pkl
- merges_path: ${tokenizer_dir}/merges.pkl
- special_tokens: ["<|endoftext|>"]
|