# hydra: # run: # dir: ./my_output_dir root_dir: ${hydra:runtime.cwd} eval: save_path: checkpoints iteration: 5000 prompt: Once upon a time, there is a little boy max_new_tokens: 64 temperature: 1.0 top_k: 50 model_type: qwen2_5 model: architectures: - Qwen2ForCausalLM attention_dropout: 0.0 bos_token_id: 151643 eos_token_id: 151645 hidden_act: silu hidden_size: 128 # 896 initializer_range: 0.02 intermediate_size: 384 # 4864 max_position_embeddings: 512 # 32768 max_window_layers: 21 model_type: qwen2 num_attention_heads: 8 # 14 num_hidden_layers: 8 # 24 num_key_value_heads: 2 rms_norm_eps: 1e-06 rope_theta: 1000000.0 sliding_window: 512 # 32768 tie_word_embeddings: true torch_dtype: bfloat16 transformers_version: 4.43.1 use_cache: true use_sliding_window: false vocab_size: 151936 dataset_name: TinyStories # `TinyStories` or `owt`, change as needed dataset_split: train # `train` or `valid`, change as needed tokenizer: tokenizer_dir: ${root_dir}/tokenizers/${dataset_name}_${dataset_split}/ # save path for tokenizer vocab_path: ${tokenizer_dir}/vocab.pkl merges_path: ${tokenizer_dir}/merges.pkl special_tokens: ["<|endoftext|>"]