Точная настройка DeepSpeed для нескольких графических процессоров не работает
В настоящее время я пытаюсь точно настроить модель корейской ламы (13B) на частном наборе данных с помощью DeepSpeed и Flash Attention 2, TRL SFTTrainer. Я использую графические процессоры 2 * A100 80G для тонкой настройки, однако мне не удалось провести тонкую настройку. Я не могу найти проблему и какое-либо решение этой ситуации с помощью Google. Пожалуйста, дайте мне знать, что проблема и как ее решить.
Как я уже упоминал выше, я застрял в такой ситуации. Я использовалaccelerate launch
использовать конфигурацию с несколькими графическими процессорами и DeepSpeed, предоставленную примером кода TRL . Я также пытался использоватьdeepspeeed
zero2.yaml
и
deepspeed
zero3.yaml
однако оба они не сработали..
Работающий код выглядит следующим образом:
accelerate launch --config_file=accelerate_configs/deepspeed_zero3.yaml --num_processes 2 finetuning/finetune_SFT.py \
--model_path beomi/llama-2-koen-13b \
--data_path Cartinoe5930/KoRAE_filtered_12k \
--output_dir finetuning/result/llama2/ \
--wandb_project KoRAE_llama2 \
--wandb_run_name KoRAE_llama2 \
--hub_path HUB_PATH_TO_UPLOAD_MODEL \
--auth_token MY_HF_ACCESS_TOKEN \
Конкретный кодfinetune_SFT.py
как следует:
import os
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, DataCollatorForLanguageModeling
from accelerate import Accelerator
from datasets import load_dataset
from tqdm import tqdm
import random
from trl import SFTTrainer
from accelerate import Accelerator
from utils.prompter import Prompter
import argparse
def args_parse():
parser = argparse.ArgumentParser()
parser.add_argument("--hf_token", type=str, help="Required to upload models to hub.")
parser.add_argument("--model_path", type=str, default="beomi/llama-2-koen-13b")
parser.add_argument("--data_path", type=str, default="Cartinoe5930/KoRAE_filtered_12k")
parser.add_argument("--num_proc", type=int)
parser.add_argument("--seq_length", type=int, default=4096)
parser.add_argument("--num_epochs", type=int, default=3)
parser.add_argument("--batch_size", type=int, default=32)
parser.add_argument("--micro_batch_size", type=int, default=2)
parser.add_argument("--val_set_size", type=float, default=0)
parser.add_argument("--logging_steps", type=int, default=10)
parser.add_argument("--save_strategy", type=str, default="epoch", help="You can choose the strategy of saving model.")
parser.add_argument("--gradient_checkpointing", type=bool, default=True)
parser.add_argument("--group_by_length", type=bool, default=False)
parser.add_argument("--packing", type=bool, default=False)
parser.add_argument("--learning_rate", type=float, default=3e-4)
parser.add_argument("--lr_scheduler_type", type=str, default="cosine")
parser.add_argument("--warmup_ratio", type=float, default=0.03)
parser.add_argument("--weight_decay", type=float, default=0)
parser.add_argument("--wandb_project", type=str)
parser.add_argument("--wandb_run_name", type=str)
parser.add_argument(
"--output_dir",
type=str,
required=True
)
parser.add_argument(
"--hf_hub_path",
type=str,
required=True,
help="The hub path to upload the model"
)
return parser.parse_args()
def process_dataset(example):
prompter = Prompter("KoRAE_template")
result_data = []
for i in range(len(example)):
full_prompt = prompter.generate_prompt(
example["instruction"][i],
example["prompt"][i],
example["input"][i],
example["output"][i])
result_data.append(full_prompt)
return result_data
def create_datasets(args):
dataset = load_dataset(
args.data_path,
split="train",
num_proc=args.num_proc if args.num_proc else None,
)
if args.val_set_size > 0:
train_val = dataset.train_test_split(test_size=args.val_set_size, seed=42)
train_data = train_val["train"]
val_data = train_val["test"]
else:
train_data = dataset
val_data = None
return train_data, val_data
if __name__ == "__main__":
args = args_parse()
gradient_accumulation_steps = args.batch_size // args.micro_batch_size // args.num_proc
model = AutoModelForCausalLM.from_pretrained(
args.model_path,
# device_map={"": Accelerator().process_index},
torch_dtype=torch.bfloat16,
use_auth_token=args.hf_token,
use_flash_attention_2=True
)
model.config.use_cache = False
model.enable_input_require_grads()
tokenizer = AutoTokenizer.from_pretrained(
args.model_path,
use_auth_token=args.hf_token,
)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
# Check if parameter passed or if set within environ
use_wandb = len(args.wandb_project) > 0 or (
"WANDB_PROJECT" in os.environ and len(os.environ["WANDB_PROJECT"]) > 0
)
# Only overwrite environ if wandb param passed
if len(args.wandb_project) > 0:
os.environ["WANDB_PROJECT"] = args.wandb_project
train_dataset, eval_dataset = create_datasets(args)
training_args = TrainingArguments(
output_dir=args.output_dir,
num_train_epochs=args.num_epochs,
per_device_train_batch_size=args.micro_batch_size,
per_device_eval_batch_size=args.micro_batch_size if eval_dataset else None,
gradient_accumulation_steps=gradient_accumulation_steps,
gradient_checkpointing=args.gradient_checkpointing,
learning_rate=args.learning_rate,
logging_steps=args.logging_steps,
save_strategy=args.save_strategy,
save_steps=args.save_steps if args.save_strategy == "steps" else None,
evaluation_strategy="epoch" if eval_dataset else None,
group_by_length=args.group_by_length,
lr_scheduler_type=args.lr_scheduler_type,
warmup_ratio=args.warmup_ratio,
bf16=True,
save_total_limit=2,
remove_unused_columns=False,
report_to="wandb" if use_wandb else None,
run_name=args.wandb_run_name if use_wandb else None,
)
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
trainer = SFTTrainer(
model=model,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
formatting_func=process_dataset,
data_collator=data_collator,
packing=args.packing,
max_seq_length=args.seq_length,
tokenizer=tokenizer,
args=training_args
)
trainer.train()
trainer.save_model(args.output_dir)
model.push_to_hub(
args.hf_hub_path,
use_temp_dir=True,
use_auth_token=args.hf_token,
)
tokenizer.push_to_hub(
args.hf_hup_path,
use_temp_dir=True,
use_auth_token=args.hf_token,
)
Журнал приведенного выше кода выглядит следующим образом. Однако конкретных ошибок или чего-то странного нет, эта ситуация просто продолжается, а панель обучения и другие журналы не появляются. Поэтому я не могу знать, действительно ли проводится обучение..
[2023-11-08 05:02:56,499] torch.distributed.run: [WARNING]
[2023-11-08 05:02:56,499] torch.distributed.run: [WARNING] *****************************************
[2023-11-08 05:02:56,499] torch.distributed.run: [WARNING] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
[2023-11-08 05:02:56,499] torch.distributed.run: [WARNING] *****************************************
[2023-11-08 05:03:00,158] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[2023-11-08 05:03:00,422] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)
/usr/local/lib/python3.10/dist-packages/transformers/models/auto/auto_factory.py:472: FutureWarning: The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.
warnings.warn(
You are attempting to use Flash Attention 2.0 with a model initialized on CPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`.
/usr/local/lib/python3.10/dist-packages/transformers/models/auto/auto_factory.py:472: FutureWarning: The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.
warnings.warn(
You are attempting to use Flash Attention 2.0 with a model initialized on CPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`.
Loading checkpoint shards: 100%|████████████████| 10/10 [00:07<00:00, 1.30it/s]
/usr/local/lib/python3.10/dist-packages/transformers/utils/hub.py:374: FutureWarning: The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.
warnings.warn(
/usr/local/lib/python3.10/dist-packages/transformers/models/auto/tokenization_auto.py:671: FutureWarning: The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.
warnings.warn(
[2023-11-08 05:04:45,333] [INFO] [comm.py:637:init_distributed] cdb=None
[2023-11-08 05:04:45,333] [INFO] [comm.py:668:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
Loading checkpoint shards: 100%|████████████████| 10/10 [00:10<00:00, 1.09s/it]
/usr/local/lib/python3.10/dist-packages/transformers/utils/hub.py:374: FutureWarning: The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.
warnings.warn(
/usr/local/lib/python3.10/dist-packages/transformers/models/auto/tokenization_auto.py:671: FutureWarning: The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.
warnings.warn(
[2023-11-08 05:04:49,096] [INFO] [comm.py:637:init_distributed] cdb=None
Parameter Offload: Total persistent parameters: 414720 in 81 params
Пожалуйста, дайте мне знать блестящее решение этой проблемы!