- mkdir -p owt/txt
- cd owt/txt
- # wget https://huggingface.co/datasets/stanford-cs336/owt-sample/resolve/main/owt_train.txt.gz
- # wget https://huggingface.co/datasets/stanford-cs336/owt-sample/resolve/main/owt_valid.txt.gz
- # 如果你链接huggingface.co失败,可以尝试使用hf-mirror.com,具体操作如下:
- wget https://hf-mirror.com/datasets/stanford-cs336/owt-sample/resolve/main/owt_train.txt.gz
- wget https://hf-mirror.com/datasets/stanford-cs336/owt-sample/resolve/main/owt_valid.txt.gz
- # 解压
- gunzip owt_train.txt.gz
- gunzip owt_valid.txt.gz
- mv owt_train.txt train.txt
- mv owt_valid.txt valid.txt
|