《歌声合成系列教程2》歌声合成特征提取
发布时间
阅读量:
阅读量
音频特征提取包括时间模型、时长模型和语音特征的提取,参考了 Merlin 的相关内容。文本还提供了下载相关数据集的链接( Timelag、Duration 和 Acoustic 模型。此外,代码中定义了多个数据源和模型,用于生成相应的特征文件,并提供了保存路径和特征维度的信息。
音频特征提取
-
1.time lag:时间模型特征,参考自merlin
-
2.duration length:时长模型特征,参考自merlin
- acoustic 语音特征 参考merlin
# -*- coding:utf-8 -*-
# /usr/bin/python
'''
-------------------------------------------------
File Name : prepare_features
Description : AIM: 特征提取
Functions: 1.
2.
Functions :
Envs : python ==
pip install
Date : 2020/11/17 下午3:09
CodeStyle : 规范,简洁,易懂,可阅读,可维护,可移植!
-------------------------------------------------
Change Activity:
2020/11/17 : tag
-------------------------------------------------
__Author__ = "Yan Errol"
__Email__ = "2681506@gmail.com"
__Copyright__ = "Copyright 2020, Yan Errol"
-------------------------------------------------
'''
import numpy as np
from os.path import join
from tqdm import tqdm
from os.path import basename, splitext, exists
import os
import sys
from nnmnkwii.datasets import FileSourceDataset
from data.data_source import MusicalLinguisticSource, TimeLagFeatureSource,DurationFeatureSource, WORLDAcousticSource
def prepare_features_fun(utt_list,out_dir,config,logger):
question_path = config.data.question_path
in_timelag_source = MusicalLinguisticSource(utt_list,
config.data.timelag_label_phone_score_dir,
add_frame_features=False, subphone_features=None,
question_path=question_path,
log_f0_conditioning=config.data.log_f0_conditioning)
out_timelag_source = TimeLagFeatureSource(utt_list,
config.data.timelag_label_phone_score_dir,
config.data.timelag_label_phone_align_dir)
in_timelag = FileSourceDataset(in_timelag_source)
out_timelag = FileSourceDataset(out_timelag_source)
# Duration model
# in: musical/linguistic context
# out: duration
in_duration_source = MusicalLinguisticSource(utt_list,
config.data.duration_label_dir,
add_frame_features=False, subphone_features=None,
question_path=question_path,
log_f0_conditioning=config.data.log_f0_conditioning)
out_duration_source = DurationFeatureSource(
utt_list, config.data.duration_label_dir)
in_duration = FileSourceDataset(in_duration_source)
out_duration = FileSourceDataset(out_duration_source)
# Acoustic model
# in: musical/linguistic context
# out: acoustic features
in_acoustic_source = MusicalLinguisticSource(utt_list,
config.data.acoustic_label_dir, question_path,
add_frame_features=True, subphone_features=config.data.acoustic_subphone_features,
log_f0_conditioning=config.data.log_f0_conditioning)
out_acoustic_source = WORLDAcousticSource(utt_list,
config.data.wav_dir, config.data.acoustic_label_dir,
question_path, use_harvest=config.data.acoustic_use_harvest,
f0_ceil=config.data.acoustic_f0_ceil, f0_floor=config.data.acoustic_f0_floor,
frame_period=config.data.acoustic_frame_period, mgc_order=config.data.acoustic_mgc_order,
num_windows=config.data.acoustic_num_windows,
relative_f0=config.data.acoustic_relative_f0)
in_acoustic = FileSourceDataset(in_acoustic_source)
out_acoustic = FileSourceDataset(out_acoustic_source)
# Save as files
in_timelag_root = join(out_dir, "in_timelag")
out_timelag_root = join(out_dir, "out_timelag")
in_duration_root = join(out_dir, "in_duration")
out_duration_root = join(out_dir, "out_duration")
in_acoustic_root = join(out_dir, "in_acoustic")
out_acoustic_root = join(out_dir, "out_acoustic")
for d in [in_timelag_root, out_timelag_root, in_duration_root, out_duration_root,
in_acoustic_root, out_acoustic_root]:
if not os.path.exists(d):
logger.info("mkdirs: {}".format(d))
os.makedirs(d)
# Save features for timelag model
if config.data.timelag_enabled:
logger.info("Timelag linguistic feature dim: {}".format(in_timelag[0].shape[1]))
logger.info("Timelag feature dim: {}".format(out_timelag[0].shape[1]))
for idx in tqdm(range(len(in_timelag))):
x, y = in_timelag[idx], out_timelag[idx]
name = splitext(basename(in_timelag.collected_files[idx][0]))[0]
xpath = join(in_timelag_root, name + "-feats.npy")
ypath = join(out_timelag_root, name + "-feats.npy")
np.save(xpath, x, allow_pickle=False)
np.save(ypath, y, allow_pickle=False)
# Save features for duration model
if config.data.duration_enabled:
logger.info("Duration linguistic feature dim: {}".format(in_duration[0].shape[1]))
logger.info("Duration feature dim: {}".format(out_duration[0].shape[1]))
for idx in tqdm(range(len(in_duration))):
x, y = in_duration[idx], out_duration[idx]
name = splitext(basename(in_duration.collected_files[idx][0]))[0]
xpath = join(in_duration_root, name + "-feats.npy")
ypath = join(out_duration_root, name + "-feats.npy")
np.save(xpath, x, allow_pickle=False)
np.save(ypath, y, allow_pickle=False)
# Save features for acoustic model
if config.data.acoustic_enabled:
logger.info("Acoustic linguistic feature dim: {}".format(in_acoustic[0].shape[1]))
logger.info("Acoustic feature dim: {}".format(out_acoustic[0][0].shape[1]))
for idx in tqdm(range(len(in_acoustic))):
x, (y, wave) = in_acoustic[idx], out_acoustic[idx]
name = splitext(basename(in_acoustic.collected_files[idx][0]))[0]
xpath = join(in_acoustic_root, name + "-feats.npy")
ypath = join(out_acoustic_root, name + "-feats.npy")
wpath = join(out_acoustic_root, name + "-wave.npy")
np.save(xpath, x, allow_pickle=False)
np.save(ypath, y, allow_pickle=False)
np.save(wpath, wave, allow_pickle=False)
全部评论 (0)
还没有任何评论哟~
