导言

演化核算能够更好地调节一个机器学习体系中的参数表明,以文本生成使命中的一个模型CEGAN为例介绍一下演化核算在PyTorch中的运用,参阅代码为masaikk/ceGan: Composite Evolutionary GAN for Natural Language Generation with Temper Control (github.com)。它是一个生成式对抗网络,在它的生成器里面运用到了演化核算的办法。以下的解释代码均出自于以上链接。

目的

在这篇文章中,运用了Gumble-Softmax关于传统的sotfmax进行改进,这里的目的是要得到更多的文本生成多样性。关于Gumble-Softmax来说,它有一个超参数\tau,用于控制Gumble-Sofxmax温度。在这篇文章中,便是进行演化核算的目标。演化核算在这篇文章中的效果在于得到做迭代出来的悉数子代中好的挑选。在另一篇也做了文本生成的文章[1803.00657] Evolutionary Generative Adversarial Networks (arxiv.org)中也有相似的做法。

PyTorch实现演化计算--以文本生成为例

办法

在代码中,界说了具体的演化的办法,有lin、exp等办法,它们都对应着一个温度核算函数f(n,n−1,adapt)f(n,\tau_{n-1},adapt),它是依据当前的迭代次数n以及之前的温度以及核算办法adapt所核算当前的温度。具体核算的代码如下所示


def get_fixed_temperature(temper, i, N, adapt):
    """A function to set up different temperature control policies"""
    if adapt == 'no':
        temper_var_np = 1.0  # no increase, origin: temper
    elif adapt == 'lin':
        temper_var_np = 1 + i / (N - 1) * (temper - 1)  # linear increase
    elif adapt == 'exp':
        temper_var_np = temper ** (i / N)  # exponential increase
    elif adapt == 'log':
        temper_var_np = 1 + (temper - 1) / np.log(N) * np.log(i + 1)  # logarithm increase
    elif adapt == 'sigmoid':
        temper_var_np = (temper - 1) * 1 / (1 + np.exp((N / 2 - i) * 20 / N)) + 1  # sigmoid increase
    elif adapt == 'quad':
        temper_var_np = (temper - 1) / (N - 1) ** 2 * i ** 2 + 1
    elif adapt == 'sqrt':
        temper_var_np = (temper - 1) / np.sqrt(N - 1) * np.sqrt(i) + 1
    else:
        raise Exception("Unknown adapt type!")
    print('******* chosen evolution choice as {} and temp is {}'.format(adapt, temper_var_np))
    print('####### i =  {} and raw_temp is {} while N is {}'.format(i, temper, N))
    log_info = '******* chosen evolution choice as {} and temp is {}\n'.format(adapt,
                                                                               temper_var_np) + '####### i =  {} and raw_temp is {} while N is {}\n'.format(
        i, temper, N)
    return temper_var_np, log_info

从备选的子代演化办法中随机挑选出一个核算办法(模仿自然界中演化的规律),再经过上述公式在一轮中多次调用,能够得到这一轮中的所演化子代的参阅。之后需求在这些子代中挑选最好的子代中挑选得到体现最好的子代作为本轮训练中运用到的最好的子代(即温度表明)。在这篇文章中,默许在每轮中得到三个子代,并且从这三个子代中挑选出最好的子代。

@staticmethod
def get_evo_temp(cur_step):
    """
    :param cur_step:
    :return:
    """
    """randomly get different temperature according to current adversarial step"""
    fn_mu_temp_type = cfg.fn_mu_temp.split()
    mu_temp_type = cfg.mu_temp.split()
    all_temp = list()
    temp_info_to_log = ''
    temp_n, temp_log_n = get_fixed_temperature(cfg.temperature, cur_step, cfg.ADV_train_epoch,
                                               random.choice(fn_mu_temp_type))
    # all_temp.append(get_fixed_temperature(1.0, 0, 0, 'no'))  # temp=1.0
    all_temp.append(temp_n)  # current step
    temp_info_to_log += temp_log_n
    temp_n1, temp_log_n1 = get_fixed_temperature(cfg.temperature, cur_step + cfg.evo_temp_step, cfg.ADV_train_epoch,
                                                 random.choice(mu_temp_type))
    all_temp.append(temp_n1)
    temp_info_to_log += temp_log_n1
    if cur_step > cfg.evo_temp_step:
        temp_n2, temp_log_n2 = get_fixed_temperature(cfg.temperature, cur_step - cfg.evo_temp_step,
                                                     cfg.ADV_train_epoch,
                                                     random.choice(mu_temp_type))
        all_temp.append(temp_n2)
        temp_info_to_log += temp_log_n2
    return torch.Tensor(all_temp), temp_info_to_log  # three temp

从上面的代码中能够得到这一轮中三个子代中所得到的信息以及别离温度。 之后就需求在真实的数据里面中抓取数据进行子代的测验,需求注意的是,挑选子代的进程中的梯度是参与核算图的,所以需求运用torch.no_grad()

def prepare_eval_real_data(self):
    with torch.no_grad():
        self.eval_real_samples = torch.cat(
            [F.one_hot(self.train_data.random_batch()['target'], cfg.vocab_size).float()
             for _ in range(cfg.eval_b_num)], dim=0)
        if cfg.CUDA:
            self.eval_real_samples = self.eval_real_samples.cuda()
        if cfg.eval_type == 'rsgan' or cfg.eval_type == 'Ra':
            self.eval_d_out_real_vec = self.vec_dis(self.eval_real_samples)
            self.eval_d_out_real_sen = self.sen_dis(torch.argmax(self.eval_real_samples, dim=2))

具体在生成器中挑选子代的进程,如下所示

def evolve_generator_with_temp(self, cur_adv_step, evo_g_step):
    # evaluation real data
    self.prepare_eval_real_data()
    best_score = np.zeros(cfg.n_parent)
    best_fit = []
    best_child = []
    best_child_opt = []
    best_fake_samples = []
    selected_mutation = []
    count = 0
    self.temp_log.log(logging.INFO, '{} Epoch {} train...'.format(get_format_time(), cur_adv_step))
    # all children share the same real data output from Discriminator
    with torch.no_grad():
        real_samples = F.one_hot(self.train_data.random_batch()['target'], cfg.vocab_size).float()
        if cfg.CUDA:
            real_samples = real_samples.cuda()
        self.d_out_real_vec = self.vec_dis(real_samples)
        self.d_out_real_sen = self.sen_dis(torch.argmax(real_samples, dim=2))
    for i, (parent, parent_opt) in enumerate(zip(self.parents, self.parent_adv_opts)):
        for j, criterionG in enumerate(self.G_criterion):
            all_temp, info_to_log = self.get_evo_temp(cur_adv_step)  # get evo temp
            self.temp_log.log(logging.INFO, info_to_log)
            all_temp_with_score = []
            temp_score = float('-inf')
            temp_fit = None
            temp_child = None
            temp_child_opt = None
            temp_fake_samples = None
            best_child_index = 0
            # Selection based on temperature, use eval_type=nll
            for temp_index in range(len(all_temp)):
                temp = all_temp[temp_index]
                # Variation
                self.load_gen(parent, parent_opt)  # load state dict to self.gen
                self.gen.temperature.data = temp  # update Generator temperature
                self.variation(evo_g_step, criterionG)
                # Evaluation
                self.prepare_eval_fake_data()  # evaluation fake data
                # print('epoch {} to evolution temp'.format(evo_g_step))
                # _, _, t_score = self.evaluation('Ra')  # for temp evolutionary
                print('\n\nEpoch {}, criterionG {} to evolution loss...'.format(cur_adv_step, j))
                # self.temp_log.log(logging.INFO,
                #     '\n{} Epoch {}, criterionG {} to evolution loss...'.format(get_format_time(), cur_adv_step, j))
                loss_Fq, loss_Fd, loss_score = self.evaluation(cfg.eval_type)  # for loss evolutionary
                t_score = loss_score
                all_temp_with_score.append([temp.item(), t_score])
                if t_score > temp_score:
                    temp_score = loss_score
                    temp_fit = [loss_Fq, loss_Fd, loss_score]
                    temp_child = copy.deepcopy(self.gen.state_dict())
                    temp_child_opt = copy.deepcopy(self.gen_adv_opt.state_dict())
                    temp_fake_samples = copy.deepcopy(self.eval_fake_samples)
                    best_child_index = temp_index
                    # print('       This epoch temperature:' + str(temp.item()))
                    # self.temp_log.log(logging.INFO, '       This epoch temperature:' + str(temp.item()))
            self.temp_log.log(logging.INFO,
                              ' This epoch temperature: {} with score: {} with child_index as {}'.format(
                                  all_temp_with_score[best_child_index][0],
                                  all_temp_with_score[best_child_index][1], best_child_index))
            print('             This epoch all temperature with score are {}'.format(str(all_temp_with_score)))
            self.temp_log.log(logging.INFO,
                              '{} This epoch {} all temperature with score are {}'.format(get_format_time(),
                                                                                          cur_adv_step,
                                                                                          str(all_temp_with_score)))
            # all_temp_with_score=[]
            # Selection based on mu_type, use eval_type=cfg.eval_type
            if count < cfg.n_parent:
                best_score[count] = temp_score
                best_fit.append(temp_fit)
                best_child.append(temp_child)
                best_child_opt.append(temp_child_opt)
                best_fake_samples.append(temp_fake_samples)
                selected_mutation.append(criterionG.loss_mode)
            else:  # larger than previous child, replace it
                fit_com = temp_score - best_score
                if max(fit_com) > 0:
                    id_replace = np.where(fit_com == max(fit_com))[0][0]
                    best_score[id_replace] = temp_score
                    best_fit[id_replace] = temp_fit
                    best_child[id_replace] = temp_child
                    best_child_opt[id_replace] = temp_child_opt
                    best_fake_samples[id_replace] = temp_fake_samples
                    selected_mutation[id_replace] = criterionG.loss_mode
            count += 1
    self.parents = copy.deepcopy(best_child)
    self.parent_adv_opts = copy.deepcopy(best_child_opt)
    self.best_fake_samples = torch.cat(best_fake_samples, dim=0)
    return best_score, np.array(best_fit), selected_mutation

在这个函数中,关于每个子代,调用评测的函数以核算分数。

# Selection based on temperature, use eval_type=nll
for temp_index in range(len(all_temp)):
    temp = all_temp[temp_index]
    # Variation
    self.load_gen(parent, parent_opt)  # load state dict to self.gen
    self.gen.temperature.data = temp  # update Generator temperature
    self.variation(evo_g_step, criterionG)
    # Evaluation
    self.prepare_eval_fake_data()  # evaluation fake data
    # print('epoch {} to evolution temp'.format(evo_g_step))
    # _, _, t_score = self.evaluation('Ra')  # for temp evolutionary
    print('\n\nEpoch {}, criterionG {} to evolution loss...'.format(cur_adv_step, j))
    # self.temp_log.log(logging.INFO,
    #     '\n{} Epoch {}, criterionG {} to evolution loss...'.format(get_format_time(), cur_adv_step, j))
    loss_Fq, loss_Fd, loss_score = self.evaluation(cfg.eval_type)  # for loss evolutionary
    t_score = loss_score
    all_temp_with_score.append([temp.item(), t_score])
    if t_score > temp_score:
        temp_score = loss_score
        temp_fit = [loss_Fq, loss_Fd, loss_score]
        temp_child = copy.deepcopy(self.gen.state_dict())
        temp_child_opt = copy.deepcopy(self.gen_adv_opt.state_dict())
        temp_fake_samples = copy.deepcopy(self.eval_fake_samples)
        best_child_index = temp_index
        # print('       This epoch temperature:' + str(temp.item()))
        # self.temp_log.log(logging.INFO, '       This epoch temperature:' + str(temp.item()))
self.temp_log.log(logging.INFO,
                  ' This epoch temperature: {} with score: {} with child_index as {}'.format(
                      all_temp_with_score[best_child_index][0],
                      all_temp_with_score[best_child_index][1], best_child_index))
print('             This epoch all temperature with score are {}'.format(str(all_temp_with_score)))
self.temp_log.log(logging.INFO,
                  '{} This epoch {} all temperature with score are {}'.format(get_format_time(),
                                                                              cur_adv_step,
                                                                              str(all_temp_with_score)))

得分表明loss_Fq, loss_Fd, loss_score = self.evaluation(cfg.eval_type) 。在本使命中,这个生成的评测函数的点评依据由多样性以及真实度两个部分表明,能够依据这两个点评指标核算出分数,以判别子代的好坏。

最终挑选对比分数,挑选出最好的子代,当作这一轮的父代(以下一轮生成新的子代)

        if count < cfg.n_parent:
            best_score[count] = temp_score
            best_fit.append(temp_fit)
            best_child.append(temp_child)
            best_child_opt.append(temp_child_opt)
            best_fake_samples.append(temp_fake_samples)
            selected_mutation.append(criterionG.loss_mode)
        else:  # larger than previous child, replace it
            fit_com = temp_score - best_score
            if max(fit_com) > 0:
                id_replace = np.where(fit_com == max(fit_com))[0][0]
                best_score[id_replace] = temp_score
                best_fit[id_replace] = temp_fit
                best_child[id_replace] = temp_child
                best_child_opt[id_replace] = temp_child_opt
                best_fake_samples[id_replace] = temp_fake_samples
                selected_mutation[id_replace] = criterionG.loss_mode
        count += 1
self.parents = copy.deepcopy(best_child)
self.parent_adv_opts = copy.deepcopy(best_child_opt)
self.best_fake_samples = torch.cat(best_fake_samples, dim=0)
return best_score, np.array(best_fit), selected_mutation

结束

总的来说,在文本生成使命中实验演化核算,是用来挑选一个超参数的好坏的。相似生物界中的种族繁衍,它经过父代得到诺干个子代,再依据点评函数(后天挑选)选出最好的子代(分数最高)作为下一轮的父代并运用。毕竟,只要强者才能生计。