RNNOISE模型训练

0

RNNOISE模型训练

环境

sudo apt install vim parallel
sudo apt install build-essential
sudo apt install python3 python3-pip
sudo apt install ffmpeg libtool autoconf

vim ~/.pip/pip.conf

---
[global]
index-url=https://pypi.tuna.tsinghua.edu.cn/simple
---

pip3 install tqdm torch numpy

数据

# 提取
ffmpeg.exe -i .\source.wav    -ar 48000 -ac 1 -f s16le -c:a pcm_s16le noise.pcm
ffmpeg.exe -i .\source.ts -vn -ar 48000 -ac 1 -f s16le -c:a pcm_s16le noise.pcm

# 去掉超过0.4秒的静音
ffmpeg -ar 48000 -ac 1 -f s16le -i noise.1.pcm -af silenceremove=stop_periods=-1:stop_duration=0.4:stop_threshold=-60dB -ar 48000 -ac 1 -f s16le noise.2.pcm

# 列表
ls *.wav > list.0.txt
vim list.0.txt
:%s/noise/file noise/g

# 乱序
cat list.0.txt | sort -R > list.1.txt
cat list.0.txt | sort -R > list.2.txt
cat list.0.txt | sort -R > list.3.txt

# 合并
ffmpeg -f concat -safe 0 -i list.0.txt -ar 48000 -ac 1 -f s16le -c:a pcm_s16le noise.0.pcm
ffmpeg -f concat -safe 0 -i list.1.txt -ar 48000 -ac 1 -f s16le -c:a pcm_s16le noise.1.pcm
ffmpeg -f concat -safe 0 -i list.2.txt -ar 48000 -ac 1 -f s16le -c:a pcm_s16le noise.2.pcm
ffmpeg -f concat -safe 0 -i list.3.txt -ar 48000 -ac 1 -f s16le -c:a pcm_s16le noise.3.pcm

# 拼接
cat noise.*.pcm > noise.pcm
rm  noise.*.pcm

# 振幅
ffmpeg -ar 48000 -ac 1 -f s16le -i noise.pcm -filter:a "volume=0.5" -ar 48000 -ac 1 -f s16le -c:a pcm_s16le noise.s.pcm
ffmpeg -ar 48000 -ac 1 -f s16le -i noise.pcm -filter:a "volume=2.0" -ar 48000 -ac 1 -f s16le -c:a pcm_s16le noise.l.pcm

# 频率
ffmpeg -ar 48000 -ac 1 -f s16le -i noise.pcm -filter_complex "aresample=32000" -ar 32000 -ac 1 -f s16le -c:a pcm_s16le noise.32000.pcm
ffmpeg -ar 48000 -ac 1 -f s16le -i noise.pcm -filter_complex "aresample=64000" -ar 64000 -ac 1 -f s16le -c:a pcm_s16le noise.64000.pcm

# 拼接
cat noise.pcm noise.l.pcm noise.s.pcm noise.32000.pcm noise.64000.pcm > noise.train.pcm
rm  noise.pcm noise.l.pcm noise.s.pcm noise.32000.pcm noise.64000.pcm

# 切割
split -b 5G noise.train.pcm noise.pcm

# 列表
find ./ -name *.mp3 > list.txt
vim list.txt
:%s/\.\//file \.\//g

# 合并
ffmpeg -f concat -safe 0 -i list.txt -ar 48000 -ac 1 -f s16le -c:a pcm_s16le speech.train.pcm

# 切割
split -b 5G speech.train.pcm speech.pcm

# 播放
ffplay -ar 48000 -ac 1 -f s16le noise.pcm

# 混音
ffmpeg -ar 48000 -ac 1 -f s16le -i noise.pcm -ar 48000 -ac 1 -f s16le -i speech.pcm -filter_complex amix=inputs=2:duration=shortest -ar 48000 -ac 1 -f s16le -c:a pcm_s16le mix.pcm

训练

训练音频48000采样单声道的PCM音频数据

# 混合数据:mix.pcm
# 噪音数据:noise.pcm
# 原始数据:speech.pcm

# 克隆仓库
cd /data
git clone https://github.com/xiph/rnnoise.git
cd rnnoise

# 编译代码
./autogen.sh
./configure
make

# 提取特征
./dump_features speech.pcm noise.pcm features.f32 200000
./scripts/dump_features_parallel.sh ./dump_features speech.pcm noise.pcm features.f32 20000 10
# 模型训练
python3 train_rnnoise.py --gru-size=64 --cond-size=32 --epochs=256 features.f32 ./ > train.log 2>&1
python3 train_rnnoise.py --gru-size=64 --cond-size=32 --epochs=256 --batch-size=100 --lr=0.001 features.f32 ./ > train.log 2>&1
python3 train_rnnoise.py --gru-size=64 --cond-size=32 --epochs=256 --batch-size=100 --lr=0.001 --initial-checkpoint=./checkpoints/rnnoise_1.pth features.f32 ./ > train.log 2>&1
# 导出权重
python3 dump_rnnoise_weights.py --quantize ./checkpoints/rnnoise_1.pth rnnoise_c

# 验证效果
cp torch/rnnoise/rnnoise_c/* ./src
make
./examples/rnnoise_demo mix.pcm output.pcm

数据集

  • https://www.openslr.org/resources.php
  • https://github.com/fighting41love/zhvoice
  • https://aistudio.baidu.com/datasetdetail/133922
  • https://aistudio.baidu.com/datasetdetail/150751