download_owt.sh 612 B

12345678910111213141516
  1. mkdir -p owt/txt
  2. cd owt/txt
  3. # wget https://huggingface.co/datasets/stanford-cs336/owt-sample/resolve/main/owt_train.txt.gz
  4. # wget https://huggingface.co/datasets/stanford-cs336/owt-sample/resolve/main/owt_valid.txt.gz
  5. # 如果你链接huggingface.co失败,可以尝试使用hf-mirror.com,具体操作如下:
  6. wget https://hf-mirror.com/datasets/stanford-cs336/owt-sample/resolve/main/owt_train.txt.gz
  7. wget https://hf-mirror.com/datasets/stanford-cs336/owt-sample/resolve/main/owt_valid.txt.gz
  8. # 解压
  9. gunzip owt_train.txt.gz
  10. gunzip owt_valid.txt.gz
  11. mv owt_train.txt train.txt
  12. mv owt_valid.txt valid.txt