evaluate_cs336_lm.yaml 763 B

12345678910111213141516171819202122232425262728293031
  1. # hydra:
  2. # run:
  3. # dir: ./my_output_dir
  4. root_dir: ${hydra:runtime.cwd}
  5. eval:
  6. save_path: checkpoints/cs336_lm_owt/
  7. iteration: 5000
  8. prompt: That empowerment comes with added scrutiny
  9. max_new_tokens: 64
  10. temperature: 1.0
  11. top_k: 50
  12. model_type: cs336_lm
  13. model:
  14. vocab_size: 32000
  15. context_length: 256
  16. d_model: 512
  17. num_layers: 4
  18. num_heads: 16
  19. d_ff: 1344
  20. rope_theta: 10000.0
  21. dataset_name: owt # `TinyStories` or `owt`, change as needed
  22. dataset_split: valid # `train` or `valid`, change as needed
  23. tokenizer_dir: ${root_dir}/tokenizers/${dataset_name}_${dataset_split}/ # save path for tokenizer
  24. tokenizer:
  25. vocab_path: ${tokenizer_dir}/vocab.pkl
  26. merges_path: ${tokenizer_dir}/merges.pkl
  27. special_tokens: ["<|endoftext|>"]