From a6c4b141cd5e75a411277a0b43d9967a8abdaae6 Mon Sep 17 00:00:00 2001 From: hiyouga Date: Fri, 16 Jun 2023 17:38:21 +0800 Subject: [PATCH] fix freeze layers --- src/utils/config.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/utils/config.py b/src/utils/config.py index 0778cb7..7d121ad 100644 --- a/src/utils/config.py +++ b/src/utils/config.py @@ -187,6 +187,13 @@ class FinetuningArguments: default="lora", metadata={"help": "Which fine-tuning method to use."} ) + num_hidden_layers: Optional[int] = field( + default=32, + metadata={"help": "Number of decoder blocks in the model. \ + LLaMA choices: [\"32\", \"40\", \"60\", \"80\"], \ + BLOOM choices: [\"24\", \"30\", \"70\"], \ + Baichuan choices: [\"32\"]"} + ) num_layer_trainable: Optional[int] = field( default=3, metadata={"help": "Number of trainable layers for Freeze fine-tuning."} @@ -223,11 +230,11 @@ class FinetuningArguments: self.lora_target = [target.strip() for target in self.lora_target.split(",")] if self.num_layer_trainable > 0: # fine-tuning the last n layers if num_layer_trainable > 0 - trainable_layer_ids = [27 - k for k in range(self.num_layer_trainable)] + trainable_layer_ids = [self.num_hidden_layers - k for k in range(self.num_layer_trainable)] else: # fine-tuning the first n layers if num_layer_trainable < 0 trainable_layer_ids = [k for k in range(-self.num_layer_trainable)] - self.trainable_layers = ["layers.{:d}.{}".format(idx, self.name_module_trainable) for idx in trainable_layer_ids] + self.trainable_layers = ["{:d}.{}".format(idx, self.name_module_trainable) for idx in trainable_layer_ids] assert self.finetuning_type in ["none", "freeze", "lora", "full"], "Invalid fine-tuning method."