[docs]classTextAudioDataset(Dataset):def__init__(self,hps:HParams,is_validation:bool=False):self.datapaths=[Path(x).parent/(Path(x).name+".data.pt")forxinPath(hps.data.validation_filesifis_validationelsehps.data.training_files).read_text("utf-8").splitlines()]self.hps=hpsself.random=Random(hps.train.seed)self.random.shuffle(self.datapaths)self.max_spec_len=800def__getitem__(self,index:int)->dict[str,torch.Tensor]:withPath(self.datapaths[index]).open("rb")asf:data=torch.load(f,weights_only=True,map_location="cpu")# cut long data randomlyspec_len=data["mel_spec"].shape[1]hop_len=self.hps.data.hop_lengthifspec_len>self.max_spec_len:start=self.random.randint(0,spec_len-self.max_spec_len)end=start+self.max_spec_len-10forkeyindata.keys():ifkey=="audio":data[key]=data[key][:,start*hop_len:end*hop_len]elifkey=="spk":continueelse:data[key]=data[key][...,start:end]torch.cuda.empty_cache()returndatadef__len__(self)->int:returnlen(self.datapaths)