RNNOISE模型训练
0
RNNOISE模型训练
环境
sudo apt install vim parallel
sudo apt install build-essential
sudo apt install python3 python3-pip
sudo apt install ffmpeg libtool autoconf
vim ~/.pip/pip.conf
---
[global]
index-url=https://pypi.tuna.tsinghua.edu.cn/simple
---
pip3 install tqdm torch numpy
数据
# 提取
ffmpeg.exe -i .\source.wav -ar 48000 -ac 1 -f s16le -c:a pcm_s16le noise.pcm
ffmpeg.exe -i .\source.ts -vn -ar 48000 -ac 1 -f s16le -c:a pcm_s16le noise.pcm
# 去掉超过0.4秒的静音
ffmpeg -ar 48000 -ac 1 -f s16le -i noise.1.pcm -af silenceremove=stop_periods=-1:stop_duration=0.4:stop_threshold=-60dB -ar 48000 -ac 1 -f s16le noise.2.pcm
# 列表
ls *.wav > list.0.txt
vim list.0.txt
:%s/noise/file noise/g
# 乱序
cat list.0.txt | sort -R > list.1.txt
cat list.0.txt | sort -R > list.2.txt
cat list.0.txt | sort -R > list.3.txt
# 合并
ffmpeg -f concat -safe 0 -i list.0.txt -ar 48000 -ac 1 -f s16le -c:a pcm_s16le noise.0.pcm
ffmpeg -f concat -safe 0 -i list.1.txt -ar 48000 -ac 1 -f s16le -c:a pcm_s16le noise.1.pcm
ffmpeg -f concat -safe 0 -i list.2.txt -ar 48000 -ac 1 -f s16le -c:a pcm_s16le noise.2.pcm
ffmpeg -f concat -safe 0 -i list.3.txt -ar 48000 -ac 1 -f s16le -c:a pcm_s16le noise.3.pcm
# 拼接
cat noise.*.pcm > noise.pcm
rm noise.*.pcm
# 振幅
ffmpeg -ar 48000 -ac 1 -f s16le -i noise.pcm -filter:a "volume=0.5" -ar 48000 -ac 1 -f s16le -c:a pcm_s16le noise.s.pcm
ffmpeg -ar 48000 -ac 1 -f s16le -i noise.pcm -filter:a "volume=2.0" -ar 48000 -ac 1 -f s16le -c:a pcm_s16le noise.l.pcm
# 频率
ffmpeg -ar 48000 -ac 1 -f s16le -i noise.pcm -filter_complex "aresample=32000" -ar 32000 -ac 1 -f s16le -c:a pcm_s16le noise.32000.pcm
ffmpeg -ar 48000 -ac 1 -f s16le -i noise.pcm -filter_complex "aresample=64000" -ar 64000 -ac 1 -f s16le -c:a pcm_s16le noise.64000.pcm
# 拼接
cat noise.pcm noise.l.pcm noise.s.pcm noise.32000.pcm noise.64000.pcm > noise.train.pcm
rm noise.pcm noise.l.pcm noise.s.pcm noise.32000.pcm noise.64000.pcm
# 切割
split -b 5G noise.train.pcm noise.pcm
# 列表
find ./ -name *.mp3 > list.txt
vim list.txt
:%s/\.\//file \.\//g
# 合并
ffmpeg -f concat -safe 0 -i list.txt -ar 48000 -ac 1 -f s16le -c:a pcm_s16le speech.train.pcm
# 切割
split -b 5G speech.train.pcm speech.pcm
# 播放
ffplay -ar 48000 -ac 1 -f s16le noise.pcm
# 混音
ffmpeg -ar 48000 -ac 1 -f s16le -i noise.pcm -ar 48000 -ac 1 -f s16le -i speech.pcm -filter_complex amix=inputs=2:duration=shortest -ar 48000 -ac 1 -f s16le -c:a pcm_s16le mix.pcm
训练
训练音频48000
采样单声道的PCM
音频数据
# 混合数据:mix.pcm
# 噪音数据:noise.pcm
# 原始数据:speech.pcm
# 克隆仓库
cd /data
git clone https://github.com/xiph/rnnoise.git
cd rnnoise
# 编译代码
./autogen.sh
./configure
make
# 提取特征
./dump_features speech.pcm noise.pcm features.f32 200000
./scripts/dump_features_parallel.sh ./dump_features speech.pcm noise.pcm features.f32 20000 10
# 模型训练
python3 train_rnnoise.py --gru-size=64 --cond-size=32 --epochs=256 features.f32 ./ > train.log 2>&1
python3 train_rnnoise.py --gru-size=64 --cond-size=32 --epochs=256 --batch-size=100 --lr=0.001 features.f32 ./ > train.log 2>&1
python3 train_rnnoise.py --gru-size=64 --cond-size=32 --epochs=256 --batch-size=100 --lr=0.001 --initial-checkpoint=./checkpoints/rnnoise_1.pth features.f32 ./ > train.log 2>&1
# 导出权重
python3 dump_rnnoise_weights.py --quantize ./checkpoints/rnnoise_1.pth rnnoise_c
# 验证效果
cp torch/rnnoise/rnnoise_c/* ./src
make
./examples/rnnoise_demo mix.pcm output.pcm
数据集
- https://www.openslr.org/resources.php
- https://github.com/fighting41love/zhvoice
- https://aistudio.baidu.com/datasetdetail/133922
- https://aistudio.baidu.com/datasetdetail/150751