so_vits_svc_fork.inference package

Submodules

so_vits_svc_fork.inference.core module

class so_vits_svc_fork.inference.core.Chunk(*, is_speech: bool, audio: ndarray[Any, dtype[float32]], start: int, end: int)[source]

Bases: object

audio: ndarray[Any, dtype[float32]]
property duration: float32
end: int
is_speech: bool
start: int
class so_vits_svc_fork.inference.core.Crossfader(*, additional_infer_before_len: int, additional_infer_after_len: int, crossfade_len: int, sola_search_len: int = 384)[source]

Bases: object

infer(input_audio: ndarray[Any, dtype[float32]]) ndarray[Any, dtype[float32]][source]
process(input_audio: ndarray[Any, dtype[float32]], *args, **kwargs: Any) ndarray[Any, dtype[float32]][source]

Chunks : ■■■■■■□□□□□□ add last input:□■■■■■■

■□□□□□□

infer :□■■■■■■

■□□□□□□

crossfade :▲■■■■■

▲□□□□□

class so_vits_svc_fork.inference.core.RealtimeVC(*, svc_model: Svc, crossfade_len: int = 3840, additional_infer_before_len: int = 7680, additional_infer_after_len: int = 7680, split: bool = True)[source]

Bases: Crossfader

infer(input_audio: ndarray[Any, dtype[float32]], speaker: int | str, transpose: int, cluster_infer_ratio: float = 0, auto_predict_f0: bool = False, noise_scale: float = 0.4, f0_method: Literal['crepe', 'crepe-tiny', 'parselmouth', 'dio', 'harvest'] = 'dio', db_thresh: int = -40, pad_seconds: float = 0.5, chunk_seconds: float = 0.5) ndarray[Any, dtype[float32]][source]
process(input_audio: ndarray[Any, dtype[float32]], *args: Any, **kwargs: Any) ndarray[Any, dtype[float32]][source]

Chunks : ■■■■■■□□□□□□ add last input:□■■■■■■

■□□□□□□

infer :□■■■■■■

■□□□□□□

crossfade :▲■■■■■

▲□□□□□

class so_vits_svc_fork.inference.core.RealtimeVC2(svc_model: Svc)[source]

Bases: object

chunk_store: list[Chunk]
process(input_audio: ndarray[Any, dtype[float32]], speaker: int | str, transpose: int, cluster_infer_ratio: float = 0, auto_predict_f0: bool = False, noise_scale: float = 0.4, f0_method: Literal['crepe', 'crepe-tiny', 'parselmouth', 'dio', 'harvest'] = 'dio', db_thresh: int = -40, chunk_seconds: float = 0.5) ndarray[Any, dtype[float32]][source]
class so_vits_svc_fork.inference.core.Svc(*, net_g_path: Path | str, config_path: Path | str, device: device | str | None = None, cluster_model_path: Path | str | None = None, half: bool = False)[source]

Bases: object

get_unit_f0(audio: ndarray[Any, dtype[float32]], tran: int, cluster_infer_ratio: float, speaker: int | str, f0_method: Literal['crepe', 'crepe-tiny', 'parselmouth', 'dio', 'harvest'] = 'dio')[source]
infer(speaker: int | str, transpose: int, audio: ndarray[Any, dtype[float32]], cluster_infer_ratio: float = 0, auto_predict_f0: bool = False, noise_scale: float = 0.4, f0_method: Literal['crepe', 'crepe-tiny', 'parselmouth', 'dio', 'harvest'] = 'dio') tuple[Tensor, int][source]
infer_silence(audio: ndarray[Any, dtype[float32]], *, speaker: int | str, transpose: int = 0, auto_predict_f0: bool = False, cluster_infer_ratio: float = 0, noise_scale: float = 0.4, f0_method: Literal['crepe', 'crepe-tiny', 'parselmouth', 'dio', 'harvest'] = 'dio', db_thresh: int = -40, pad_seconds: float = 0.5, chunk_seconds: float = 0.5, absolute_thresh: bool = False, max_chunk_seconds: float = 40) ndarray[Any, dtype[float32]][source]
load_model()[source]
so_vits_svc_fork.inference.core.pad_array(array_, target_length: int)[source]
so_vits_svc_fork.inference.core.sola_crossfade(first: ndarray[Any, dtype[float32]], second: ndarray[Any, dtype[float32]], crossfade_len: int, sola_search_len: int) ndarray[Any, dtype[float32]][source]
so_vits_svc_fork.inference.core.split_silence(audio: ~numpy.ndarray[~typing.Any, ~numpy.dtype[~numpy.float32]], top_db: int = 40, ref: float | ~typing.Callable[[~numpy.ndarray[~typing.Any, ~numpy.dtype[~numpy.float32]]], float] = 1, frame_length: int = 2048, hop_length: int = 512, aggregate: ~typing.Callable[[~numpy.ndarray[~typing.Any, ~numpy.dtype[~numpy.float32]]], float] = <function mean>, max_chunk_length: int = 0) Iterable[Chunk][source]

so_vits_svc_fork.inference.main module

so_vits_svc_fork.inference.main.infer(*, input_path: Path | str | Sequence[Path | str], output_path: Path | str | Sequence[Path | str], model_path: Path | str, config_path: Path | str, recursive: bool = False, speaker: int | str, cluster_model_path: Path | str | None = None, transpose: int = 0, auto_predict_f0: bool = False, cluster_infer_ratio: float = 0, noise_scale: float = 0.4, f0_method: Literal['crepe', 'crepe-tiny', 'parselmouth', 'dio', 'harvest'] = 'dio', db_thresh: int = -40, pad_seconds: float = 0.5, chunk_seconds: float = 0.5, absolute_thresh: bool = False, max_chunk_seconds: float = 40, device: str | device = device(type='cpu'))[source]
so_vits_svc_fork.inference.main.realtime(*, model_path: Path | str, config_path: Path | str, speaker: str, cluster_model_path: Path | str | None = None, transpose: int = 0, auto_predict_f0: bool = False, cluster_infer_ratio: float = 0, noise_scale: float = 0.4, f0_method: Literal['crepe', 'crepe-tiny', 'parselmouth', 'dio', 'harvest'] = 'dio', db_thresh: int = -40, pad_seconds: float = 0.5, chunk_seconds: float = 0.5, crossfade_seconds: float = 0.05, additional_infer_before_seconds: float = 0.2, additional_infer_after_seconds: float = 0.1, block_seconds: float = 0.5, version: int = 2, input_device: int | str | None = None, output_device: int | str | None = None, device: str | device = device(type='cpu'), passthrough_original: bool = False)[source]