"""Class designed to facilitate the sampling of the model'sparameterspace"""__author__=["Karl Naumann-Woleske"]__credits__=["Karl Naumann-Woleske"]__license__="MIT"__version__="0.1.0"__maintainer__=["Karl Naumann-Woleske"]# Default librariesimportcopyimportinspectimportloggingimportmultiprocessingasmpimportosfrompathlibimportPathimportpicklelogger=logging.getLogger(__name__)# Third-party librariesimportnumpyasnpimportpandasaspdimportscipy.statsasstatsfromtqdmimporttqdm# Custom importsimportmacrostat.models.modelasmsmodelimportmacrostat.sample.samplerasmssamplerimportmacrostat.util.batchprocessingasmsbatchprocessing
[docs]classSobolSampler(mssampler.Sampler):def__init__(self,model:msmodel.Model,bounds:dict,sample_power:int=10,seed:int=0,logspace:bool=False,worker_function:callable=msbatchprocessing.timeseries_worker,simulation_args:tuple=(),output_folder:str="sobol_samples",cpu_count:int=1,batchsize:int=None,):"""Generalized class to facilitate the sampling of the model's parameterspace using python's multiprocessing library. Parameters ---------- model: msmodel.Model Model to be sampled bounds: dict[str, tuple] Dictionary containing the bounds for each parameter to be sampled sample_power: int (default 10) A power of 2 to determine the number of samples to be generated, i.e. 2**sample_power samples will be generated seed: int (default 0) Seed for the random number generator logspace: bool (default False) Whether to sample the parameters in logspace worker_function: callable (default batchprocessing.timeseries_worker) Function to be used for the parallel processing simulation_args: tuple (default ()) Arguments to be passed to the model's simulate method irrespective of the parameters output_folder: str (default "samples") Folder to save the output files cpu_count: int (default 1) Number of CPUs to use for the parallel processing batchsize: int (default None) Size of each batch to be processed in parallel """super().__init__(model=model,worker_function=worker_function,output_folder=output_folder,cpu_count=cpu_count,batchsize=batchsize,)# Boundaries for the parametersself.logspace=logspaceself._verify_bounds(bounds)self.bounds=bounds# Sampling parametersself.sample_power=sample_powerself.seed=seedself.simulation_args=simulation_args
[docs]defgenerate_tasks(self):"""Generate tasks for the parallel processor based on the Sobol sequence for the model's parameterspace using the bounds set in the class. Here the scipy.stats.qmc.Sobol class is used to generate the Sobol sequence, specifically the random_base2 method is used to generate the samples, as it is has slightly better space filling properties than with a custom number of samples. Returns ------- list[tuple] List of tuples containing the model and the task to be processed """# Generate the Sobol pointspoints=self._generate_sobol_points()tasks=[]foriinpoints.index:# Copy base parameters to ensure a full set of parametersnewparams=copy.deepcopy(self.base_parameters)forkeyinpoints.columns:newparams[key]=points.loc[i,key]# Generate the task to executenewmodel=self.modelclass(parameters=newparams,**self.model_kwargs)tasks.append((i,newmodel,*self.simulation_args))returntasks
def_verify_bounds(self,bounds:dict)->None:""" Verify that the bounds are correctly set, in particular 0. Check that the parameters are in the model 1. That there is a lower and upper bound for each parameter 2. That the lower bound is smaller than the upper bound 3. That the bounds are in the correct order 4. If the bounds are in logspace, that the bounds are either both positive or both negative 5. If the bounds are in logspace, that either bound is not zero Parameters ---------- bounds: dict[str, tuple] Dictionary containing the bounds for each parameter to be sampled logspace: bool Whether to sample the parameters in logspace Returns ------- None Raises ------ ValueError If the bounds are not correctly set """# Check that the bounds are correctly setforparam,boundinbounds.items():ifparamnotinself.model.parameters:raiseValueError(f"Parameter {param} not in the model's parameters")iflen(bound)!=2:raiseValueError(f"Bounds should be a list-like of length 2. {param}: {bound}")ifself.logspaceand(bound[0]<0)!=(bound[1]<0):raiseValueError(f"Bounds should be either both positive or both negative. {param}: {bound}")ifself.logspaceand(bound[0]==0orbound[1]==0):raiseValueError(f"Bounds cannot be zero when using logspace. {param}: {bound}")ifbound[0]>=bound[1]:raiseValueError(f"Lower bound should be smaller than the upper bound. {param}: {bound}")def_generate_sobol_points(self):"""Generate points in the parameterspace for the parallel processor based on a Sobol sequence. Here the scipy.stats.qmc.Sobol class is used to generate the Sobol sequence, specifically the random_base2 method is used to generate the samples, as it is has slightly better space filling properties than with a custom number of samples. Returns ------- pd.DataFrame DataFrame containing the Sobol points in the parameterspace. Rows are the samples, columns are the parameters """# Generate the bounds to scale bybounds_array=np.array(list(self.bounds.values()))ifself.logspace:# Take the sign and then log the bounds. bounds_sign=np.sign(bounds_array)bounds_array=np.log(np.abs(bounds_array))# Generate the Sobol sequencenp.random.seed(self.seed)sobol_sampler=stats.qmc.Sobol(len(self.bounds))sobol_sample=sobol_sampler.random_base2(self.sample_power)sample=stats.qmc.scale(sobol_sample,bounds_array[:,0],bounds_array[:,1])ifself.logspace:# Take the exponential and multiply by the sign to get the correct boundssample=np.exp(sample)*bounds_sign[:,0]returnpd.DataFrame(sample,columns=self.bounds.keys())