Source code for so_vits_svc_fork.preprocessing.preprocess_split

from __future__ import annotations

from logging import getLogger
from pathlib import Path

import librosa
import soundfile as sf
from joblib import Parallel, delayed
from tqdm import tqdm
from tqdm_joblib import tqdm_joblib

LOG = getLogger(__name__)


def _process_one(
    input_path: Path,
    output_dir: Path,
    sr: int,
    *,
    max_length: float = 10.0,
    top_db: int = 30,
    frame_seconds: float = 0.5,
    hop_seconds: float = 0.1,
):
    try:
        audio, sr = librosa.load(input_path, sr=sr, mono=True)
    except Exception as e:
        LOG.warning(f"Failed to read {input_path}: {e}")
        return
    intervals = librosa.effects.split(
        audio,
        top_db=top_db,
        frame_length=int(sr * frame_seconds),
        hop_length=int(sr * hop_seconds),
    )
    output_dir.mkdir(parents=True, exist_ok=True)
    for start, end in tqdm(intervals, desc=f"Writing {input_path}"):
        for sub_start in range(start, end, int(sr * max_length)):
            sub_end = min(sub_start + int(sr * max_length), end)
            audio_cut = audio[sub_start:sub_end]
            sf.write(
                (output_dir / f"{input_path.stem}_{sub_start / sr:.3f}_{sub_end / sr:.3f}.wav"),
                audio_cut,
                sr,
            )


[docs] def preprocess_split( input_dir: Path | str, output_dir: Path | str, sr: int, *, max_length: float = 10.0, top_db: int = 30, frame_seconds: float = 0.5, hop_seconds: float = 0.1, n_jobs: int = -1, ): input_dir = Path(input_dir) output_dir = Path(output_dir) output_dir.mkdir(parents=True, exist_ok=True) input_paths = list(input_dir.rglob("*.*")) with tqdm_joblib(desc="Splitting", total=len(input_paths)): Parallel(n_jobs=n_jobs)( delayed(_process_one)( input_path, output_dir / input_path.relative_to(input_dir).parent, sr, max_length=max_length, top_db=top_db, frame_seconds=frame_seconds, hop_seconds=hop_seconds, ) for input_path in input_paths )