Advertisement

《歌声合成系列教程2》歌声合成特征提取

阅读量:

音频特征提取包括时间模型、时长模型和语音特征的提取,参考了 Merlin 的相关内容。文本还提供了下载相关数据集的链接( Timelag、Duration 和 Acoustic 模型。此外,代码中定义了多个数据源和模型,用于生成相应的特征文件,并提供了保存路径和特征维度的信息。

音频特征提取

  • 歌声样本集下载链接

  • 1.time lag:时间模型特征,参考自merlin

  • 2.duration length:时长模型特征,参考自merlin

    1. acoustic 语音特征 参考merlin
复制代码
    # -*- coding:utf-8 -*-
    # /usr/bin/python
    '''
    -------------------------------------------------
       File Name   :  prepare_features
       Description :  AIM: 特征提取
                  Functions: 1. 
                             2. 
       Functions   :
       Envs        :  python == 
                  pip install  
       Date        :  2020/11/17  下午3:09
       CodeStyle   :  规范,简洁,易懂,可阅读,可维护,可移植!
    -------------------------------------------------
       Change Activity:
          2020/11/17 : tag
    -------------------------------------------------
    __Author__ = "Yan Errol"
    __Email__ = "2681506@gmail.com"
    __Copyright__ = "Copyright 2020, Yan Errol"
    -------------------------------------------------
    '''
    
    
    import numpy as np
    from os.path import join
    from tqdm import tqdm
    from os.path import basename, splitext, exists
    import os
    import sys
    from nnmnkwii.datasets import FileSourceDataset
    from data.data_source import MusicalLinguisticSource, TimeLagFeatureSource,DurationFeatureSource, WORLDAcousticSource
    
    
    def prepare_features_fun(utt_list,out_dir,config,logger):
    question_path = config.data.question_path
    in_timelag_source = MusicalLinguisticSource(utt_list,
        config.data.timelag_label_phone_score_dir,
        add_frame_features=False, subphone_features=None,
        question_path=question_path,
        log_f0_conditioning=config.data.log_f0_conditioning)
    out_timelag_source = TimeLagFeatureSource(utt_list,
        config.data.timelag_label_phone_score_dir,
        config.data.timelag_label_phone_align_dir)
    
    in_timelag = FileSourceDataset(in_timelag_source)
    out_timelag = FileSourceDataset(out_timelag_source)
    
    # Duration model
    # in: musical/linguistic context
    # out: duration
    
    in_duration_source = MusicalLinguisticSource(utt_list,
        config.data.duration_label_dir,
        add_frame_features=False, subphone_features=None,
        question_path=question_path,
        log_f0_conditioning=config.data.log_f0_conditioning)
    out_duration_source = DurationFeatureSource(
        utt_list, config.data.duration_label_dir)
    
    in_duration = FileSourceDataset(in_duration_source)
    out_duration = FileSourceDataset(out_duration_source)
    
    # Acoustic model
    # in: musical/linguistic context
    # out: acoustic features
    
    in_acoustic_source = MusicalLinguisticSource(utt_list,
        config.data.acoustic_label_dir, question_path,
        add_frame_features=True, subphone_features=config.data.acoustic_subphone_features,
        log_f0_conditioning=config.data.log_f0_conditioning)
    out_acoustic_source = WORLDAcousticSource(utt_list,
        config.data.wav_dir, config.data.acoustic_label_dir,
        question_path, use_harvest=config.data.acoustic_use_harvest,
        f0_ceil=config.data.acoustic_f0_ceil, f0_floor=config.data.acoustic_f0_floor,
        frame_period=config.data.acoustic_frame_period, mgc_order=config.data.acoustic_mgc_order,
        num_windows=config.data.acoustic_num_windows,
        relative_f0=config.data.acoustic_relative_f0)
    in_acoustic = FileSourceDataset(in_acoustic_source)
    out_acoustic = FileSourceDataset(out_acoustic_source)
    
    # Save as files
    in_timelag_root = join(out_dir, "in_timelag")
    out_timelag_root = join(out_dir, "out_timelag")
    in_duration_root = join(out_dir, "in_duration")
    out_duration_root = join(out_dir, "out_duration")
    in_acoustic_root = join(out_dir, "in_acoustic")
    out_acoustic_root = join(out_dir, "out_acoustic")
    
    for d in [in_timelag_root, out_timelag_root, in_duration_root, out_duration_root,
            in_acoustic_root, out_acoustic_root]:
        if not os.path.exists(d):
            logger.info("mkdirs: {}".format(d))
            os.makedirs(d)
    
    # Save features for timelag model
    if config.data.timelag_enabled:
        logger.info("Timelag linguistic feature dim: {}".format(in_timelag[0].shape[1]))
        logger.info("Timelag feature dim: {}".format(out_timelag[0].shape[1]))
        for idx in tqdm(range(len(in_timelag))):
            x, y = in_timelag[idx], out_timelag[idx]
            name = splitext(basename(in_timelag.collected_files[idx][0]))[0]
            xpath = join(in_timelag_root, name + "-feats.npy")
            ypath = join(out_timelag_root, name + "-feats.npy")
            np.save(xpath, x, allow_pickle=False)
            np.save(ypath, y, allow_pickle=False)
    
    # Save features for duration model
    if config.data.duration_enabled:
        logger.info("Duration linguistic feature dim: {}".format(in_duration[0].shape[1]))
        logger.info("Duration feature dim: {}".format(out_duration[0].shape[1]))
        for idx in tqdm(range(len(in_duration))):
            x, y = in_duration[idx], out_duration[idx]
            name = splitext(basename(in_duration.collected_files[idx][0]))[0]
            xpath = join(in_duration_root, name + "-feats.npy")
            ypath = join(out_duration_root, name + "-feats.npy")
            np.save(xpath, x, allow_pickle=False)
            np.save(ypath, y, allow_pickle=False)
    
    # Save features for acoustic model
    if config.data.acoustic_enabled:
        logger.info("Acoustic linguistic feature dim: {}".format(in_acoustic[0].shape[1]))
        logger.info("Acoustic feature dim: {}".format(out_acoustic[0][0].shape[1]))
        for idx in tqdm(range(len(in_acoustic))):
            x, (y, wave) = in_acoustic[idx], out_acoustic[idx]
            name = splitext(basename(in_acoustic.collected_files[idx][0]))[0]
            xpath = join(in_acoustic_root, name + "-feats.npy")
            ypath = join(out_acoustic_root, name + "-feats.npy")
            wpath = join(out_acoustic_root, name + "-wave.npy")
            np.save(xpath, x, allow_pickle=False)
            np.save(ypath, y, allow_pickle=False)
            np.save(wpath, wave, allow_pickle=False)

全部评论 (0)

还没有任何评论哟~