uv run python -m scripts.tokenize uv run python -m scripts.pretrain