pyerrors.obs

View Source

   1import warnings
   2import hashlib
   3import pickle
   4import numpy as np
   5import autograd.numpy as anp  # Thinly-wrapped numpy
   6import scipy
   7from autograd import jacobian
   8import matplotlib.pyplot as plt
   9from scipy.stats import skew, skewtest, kurtosis, kurtosistest
  10import numdifftools as nd
  11from itertools import groupby
  12from .covobs import Covobs
  13
  14# Improve print output of numpy.ndarrays containing Obs objects.
  15np.set_printoptions(formatter={'object': lambda x: str(x)})
  16
  17
  18class Obs:
  19    """Class for a general observable.
  20
  21    Instances of Obs are the basic objects of a pyerrors error analysis.
  22    They are initialized with a list which contains arrays of samples for
  23    different ensembles/replica and another list of same length which contains
  24    the names of the ensembles/replica. Mathematical operations can be
  25    performed on instances. The result is another instance of Obs. The error of
  26    an instance can be computed with the gamma_method. Also contains additional
  27    methods for output and visualization of the error calculation.
  28
  29    Attributes
  30    ----------
  31    S_global : float
  32        Standard value for S (default 2.0)
  33    S_dict : dict
  34        Dictionary for S values. If an entry for a given ensemble
  35        exists this overwrites the standard value for that ensemble.
  36    tau_exp_global : float
  37        Standard value for tau_exp (default 0.0)
  38    tau_exp_dict : dict
  39        Dictionary for tau_exp values. If an entry for a given ensemble exists
  40        this overwrites the standard value for that ensemble.
  41    N_sigma_global : float
  42        Standard value for N_sigma (default 1.0)
  43    N_sigma_dict : dict
  44        Dictionary for N_sigma values. If an entry for a given ensemble exists
  45        this overwrites the standard value for that ensemble.
  46    """
  47    __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue',
  48                 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma',
  49                 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint',
  50                 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint',
  51                 'idl', 'tag', '_covobs', '__dict__']
  52
  53    S_global = 2.0
  54    S_dict = {}
  55    tau_exp_global = 0.0
  56    tau_exp_dict = {}
  57    N_sigma_global = 1.0
  58    N_sigma_dict = {}
  59
  60    def __init__(self, samples, names, idl=None, **kwargs):
  61        """ Initialize Obs object.
  62
  63        Parameters
  64        ----------
  65        samples : list
  66            list of numpy arrays containing the Monte Carlo samples
  67        names : list
  68            list of strings labeling the individual samples
  69        idl : list, optional
  70            list of ranges or lists on which the samples are defined
  71        """
  72
  73        if kwargs.get("means") is None and len(samples):
  74            if len(samples) != len(names):
  75                raise ValueError('Length of samples and names incompatible.')
  76            if idl is not None:
  77                if len(idl) != len(names):
  78                    raise ValueError('Length of idl incompatible with samples and names.')
  79            name_length = len(names)
  80            if name_length > 1:
  81                if name_length != len(set(names)):
  82                    raise ValueError('Names are not unique.')
  83                if not all(isinstance(x, str) for x in names):
  84                    raise TypeError('All names have to be strings.')
  85            else:
  86                if not isinstance(names[0], str):
  87                    raise TypeError('All names have to be strings.')
  88            if min(len(x) for x in samples) <= 4:
  89                raise ValueError('Samples have to have at least 5 entries.')
  90
  91        self.names = sorted(names)
  92        self.shape = {}
  93        self.r_values = {}
  94        self.deltas = {}
  95        self._covobs = {}
  96
  97        self._value = 0
  98        self.N = 0
  99        self.idl = {}
 100        if idl is not None:
 101            for name, idx in sorted(zip(names, idl)):
 102                if isinstance(idx, range):
 103                    self.idl[name] = idx
 104                elif isinstance(idx, (list, np.ndarray)):
 105                    dc = np.unique(np.diff(idx))
 106                    if np.any(dc < 0):
 107                        raise ValueError("Unsorted idx for idl[%s] at position %s" % (name, ' '.join(['%s' % (pos + 1) for pos in np.where(np.diff(idx) < 0)[0]])))
 108                    elif np.any(dc == 0):
 109                        raise ValueError("Duplicate entries in idx for idl[%s] at position %s" % (name, ' '.join(['%s' % (pos + 1) for pos in np.where(np.diff(idx) == 0)[0]])))
 110                    if len(dc) == 1:
 111                        self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0])
 112                    else:
 113                        self.idl[name] = list(idx)
 114                else:
 115                    raise TypeError('incompatible type for idl[%s].' % (name))
 116        else:
 117            for name, sample in sorted(zip(names, samples)):
 118                self.idl[name] = range(1, len(sample) + 1)
 119
 120        if kwargs.get("means") is not None:
 121            for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
 122                self.shape[name] = len(self.idl[name])
 123                self.N += self.shape[name]
 124                self.r_values[name] = mean
 125                self.deltas[name] = sample
 126        else:
 127            for name, sample in sorted(zip(names, samples)):
 128                self.shape[name] = len(self.idl[name])
 129                self.N += self.shape[name]
 130                if len(sample) != self.shape[name]:
 131                    raise ValueError('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name]))
 132                self.r_values[name] = np.mean(sample)
 133                self.deltas[name] = sample - self.r_values[name]
 134                self._value += self.shape[name] * self.r_values[name]
 135            self._value /= self.N
 136
 137        self._dvalue = 0.0
 138        self.ddvalue = 0.0
 139        self.reweighted = False
 140
 141        self.tag = None
 142
 143    @property
 144    def value(self):
 145        return self._value
 146
 147    @property
 148    def dvalue(self):
 149        return self._dvalue
 150
 151    @property
 152    def e_names(self):
 153        return sorted(set([o.split('|')[0] for o in self.names]))
 154
 155    @property
 156    def cov_names(self):
 157        return sorted(set([o for o in self.covobs.keys()]))
 158
 159    @property
 160    def mc_names(self):
 161        return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names]))
 162
 163    @property
 164    def e_content(self):
 165        res = {}
 166        for e, e_name in enumerate(self.e_names):
 167            res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names))
 168            if e_name in self.names:
 169                res[e_name].append(e_name)
 170        return res
 171
 172    @property
 173    def covobs(self):
 174        return self._covobs
 175
 176    def gamma_method(self, **kwargs):
 177        """Estimate the error and related properties of the Obs.
 178
 179        Parameters
 180        ----------
 181        S : float
 182            specifies a custom value for the parameter S (default 2.0).
 183            If set to 0 it is assumed that the data exhibits no
 184            autocorrelation. In this case the error estimates coincides
 185            with the sample standard error.
 186        tau_exp : float
 187            positive value triggers the critical slowing down analysis
 188            (default 0.0).
 189        N_sigma : float
 190            number of standard deviations from zero until the tail is
 191            attached to the autocorrelation function (default 1).
 192        fft : bool
 193            determines whether the fft algorithm is used for the computation
 194            of the autocorrelation function (default True)
 195        """
 196
 197        e_content = self.e_content
 198        self.e_dvalue = {}
 199        self.e_ddvalue = {}
 200        self.e_tauint = {}
 201        self.e_dtauint = {}
 202        self.e_windowsize = {}
 203        self.e_n_tauint = {}
 204        self.e_n_dtauint = {}
 205        e_gamma = {}
 206        self.e_rho = {}
 207        self.e_drho = {}
 208        self._dvalue = 0
 209        self.ddvalue = 0
 210
 211        self.S = {}
 212        self.tau_exp = {}
 213        self.N_sigma = {}
 214
 215        if kwargs.get('fft') is False:
 216            fft = False
 217        else:
 218            fft = True
 219
 220        def _parse_kwarg(kwarg_name):
 221            if kwarg_name in kwargs:
 222                tmp = kwargs.get(kwarg_name)
 223                if isinstance(tmp, (int, float)):
 224                    if tmp < 0:
 225                        raise Exception(kwarg_name + ' has to be larger or equal to 0.')
 226                    for e, e_name in enumerate(self.e_names):
 227                        getattr(self, kwarg_name)[e_name] = tmp
 228                else:
 229                    raise TypeError(kwarg_name + ' is not in proper format.')
 230            else:
 231                for e, e_name in enumerate(self.e_names):
 232                    if e_name in getattr(Obs, kwarg_name + '_dict'):
 233                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name]
 234                    else:
 235                        getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global')
 236
 237        _parse_kwarg('S')
 238        _parse_kwarg('tau_exp')
 239        _parse_kwarg('N_sigma')
 240
 241        for e, e_name in enumerate(self.mc_names):
 242            gapsize = _determine_gap(self, e_content, e_name)
 243
 244            r_length = []
 245            for r_name in e_content[e_name]:
 246                if isinstance(self.idl[r_name], range):
 247                    r_length.append(len(self.idl[r_name]) * self.idl[r_name].step // gapsize)
 248                else:
 249                    r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1) // gapsize)
 250
 251            e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]])
 252            w_max = max(r_length) // 2
 253            e_gamma[e_name] = np.zeros(w_max)
 254            self.e_rho[e_name] = np.zeros(w_max)
 255            self.e_drho[e_name] = np.zeros(w_max)
 256
 257            for r_name in e_content[e_name]:
 258                e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
 259
 260            gamma_div = np.zeros(w_max)
 261            for r_name in e_content[e_name]:
 262                gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft, gapsize)
 263            gamma_div[gamma_div < 1] = 1.0
 264            e_gamma[e_name] /= gamma_div[:w_max]
 265
 266            if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny:  # Prevent division by zero
 267                self.e_tauint[e_name] = 0.5
 268                self.e_dtauint[e_name] = 0.0
 269                self.e_dvalue[e_name] = 0.0
 270                self.e_ddvalue[e_name] = 0.0
 271                self.e_windowsize[e_name] = 0
 272                continue
 273
 274            self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0]
 275            self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:])))
 276            # Make sure no entry of tauint is smaller than 0.5
 277            self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps
 278            # hep-lat/0306017 eq. (42)
 279            self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N)
 280            self.e_n_dtauint[e_name][0] = 0.0
 281
 282            def _compute_drho(i):
 283                tmp = (self.e_rho[e_name][i + 1:w_max]
 284                       + np.concatenate([self.e_rho[e_name][i - 1:None if i - (w_max - 1) // 2 <= 0 else (2 * i - (2 * w_max) // 2):-1],
 285                                         self.e_rho[e_name][1:max(1, w_max - 2 * i)]])
 286                       - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i])
 287                self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N)
 288
 289            if self.tau_exp[e_name] > 0:
 290                _compute_drho(1)
 291                texp = self.tau_exp[e_name]
 292                # Critical slowing down analysis
 293                if w_max // 2 <= 1:
 294                    raise Exception("Need at least 8 samples for tau_exp error analysis")
 295                for n in range(1, w_max // 2):
 296                    _compute_drho(n + 1)
 297                    if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2:
 298                        # Bias correction hep-lat/0306017 eq. (49) included
 299                        self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1])  # The absolute makes sure, that the tail contribution is always positive
 300                        self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2)
 301                        # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2
 302                        self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
 303                        self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
 304                        self.e_windowsize[e_name] = n
 305                        break
 306            else:
 307                if self.S[e_name] == 0.0:
 308                    self.e_tauint[e_name] = 0.5
 309                    self.e_dtauint[e_name] = 0.0
 310                    self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1))
 311                    self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N)
 312                    self.e_windowsize[e_name] = 0
 313                else:
 314                    # Standard automatic windowing procedure
 315                    tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1))
 316                    g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N)
 317                    for n in range(1, w_max):
 318                        if g_w[n - 1] < 0 or n >= w_max - 1:
 319                            _compute_drho(n)
 320                            self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N)  # Bias correction hep-lat/0306017 eq. (49)
 321                            self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n]
 322                            self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N)
 323                            self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N)
 324                            self.e_windowsize[e_name] = n
 325                            break
 326
 327            self._dvalue += self.e_dvalue[e_name] ** 2
 328            self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
 329
 330        for e_name in self.cov_names:
 331            self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
 332            self.e_ddvalue[e_name] = 0
 333            self._dvalue += self.e_dvalue[e_name]**2
 334
 335        self._dvalue = np.sqrt(self._dvalue)
 336        if self._dvalue == 0.0:
 337            self.ddvalue = 0.0
 338        else:
 339            self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue
 340        return
 341
 342    gm = gamma_method
 343
 344    def _calc_gamma(self, deltas, idx, shape, w_max, fft, gapsize):
 345        """Calculate Gamma_{AA} from the deltas, which are defined on idx.
 346           idx is assumed to be a contiguous range (possibly with a stepsize != 1)
 347
 348        Parameters
 349        ----------
 350        deltas : list
 351            List of fluctuations
 352        idx : list
 353            List or range of configurations on which the deltas are defined.
 354        shape : int
 355            Number of configurations in idx.
 356        w_max : int
 357            Upper bound for the summation window.
 358        fft : bool
 359            determines whether the fft algorithm is used for the computation
 360            of the autocorrelation function.
 361        gapsize : int
 362            The target distance between two configurations. If longer distances
 363            are found in idx, the data is expanded.
 364        """
 365        gamma = np.zeros(w_max)
 366        deltas = _expand_deltas(deltas, idx, shape, gapsize)
 367        new_shape = len(deltas)
 368        if fft:
 369            max_gamma = min(new_shape, w_max)
 370            # The padding for the fft has to be even
 371            padding = new_shape + max_gamma + (new_shape + max_gamma) % 2
 372            gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma]
 373        else:
 374            for n in range(w_max):
 375                if new_shape - n >= 0:
 376                    gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape])
 377
 378        return gamma
 379
 380    def details(self, ens_content=True):
 381        """Output detailed properties of the Obs.
 382
 383        Parameters
 384        ----------
 385        ens_content : bool
 386            print details about the ensembles and replica if true.
 387        """
 388        if self.tag is not None:
 389            print("Description:", self.tag)
 390        if not hasattr(self, 'e_dvalue'):
 391            print('Result\t %3.8e' % (self.value))
 392        else:
 393            if self.value == 0.0:
 394                percentage = np.nan
 395            else:
 396                percentage = np.abs(self._dvalue / self.value) * 100
 397            print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage))
 398            if len(self.e_names) > 1:
 399                print(' Ensemble errors:')
 400            e_content = self.e_content
 401            for e_name in self.mc_names:
 402                gap = _determine_gap(self, e_content, e_name)
 403
 404                if len(self.e_names) > 1:
 405                    print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
 406                tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name])
 407                tau_string += f" in units of {gap} config"
 408                if gap > 1:
 409                    tau_string += "s"
 410                if self.tau_exp[e_name] > 0:
 411                    tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name])
 412                else:
 413                    tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name])
 414                print(tau_string)
 415            for e_name in self.cov_names:
 416                print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
 417        if ens_content is True:
 418            if len(self.e_names) == 1:
 419                print(self.N, 'samples in', len(self.e_names), 'ensemble:')
 420            else:
 421                print(self.N, 'samples in', len(self.e_names), 'ensembles:')
 422            my_string_list = []
 423            for key, value in sorted(self.e_content.items()):
 424                if key not in self.covobs:
 425                    my_string = '  ' + "\u00B7 Ensemble '" + key + "' "
 426                    if len(value) == 1:
 427                        my_string += f': {self.shape[value[0]]} configurations'
 428                        if isinstance(self.idl[value[0]], range):
 429                            my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')'
 430                        else:
 431                            my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})'
 432                    else:
 433                        sublist = []
 434                        for v in value:
 435                            my_substring = '    ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' "
 436                            my_substring += f': {self.shape[v]} configurations'
 437                            if isinstance(self.idl[v], range):
 438                                my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')'
 439                            else:
 440                                my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})'
 441                            sublist.append(my_substring)
 442
 443                        my_string += '\n' + '\n'.join(sublist)
 444                else:
 445                    my_string = '  ' + "\u00B7 Covobs   '" + key + "' "
 446                my_string_list.append(my_string)
 447            print('\n'.join(my_string_list))
 448
 449    def reweight(self, weight):
 450        """Reweight the obs with given rewighting factors.
 451
 452        Parameters
 453        ----------
 454        weight : Obs
 455            Reweighting factor. An Observable that has to be defined on a superset of the
 456            configurations in obs[i].idl for all i.
 457        all_configs : bool
 458            if True, the reweighted observables are normalized by the average of
 459            the reweighting factor on all configurations in weight.idl and not
 460            on the configurations in obs[i].idl. Default False.
 461        """
 462        return reweight(weight, [self])[0]
 463
 464    def is_zero_within_error(self, sigma=1):
 465        """Checks whether the observable is zero within 'sigma' standard errors.
 466
 467        Parameters
 468        ----------
 469        sigma : int
 470            Number of standard errors used for the check.
 471
 472        Works only properly when the gamma method was run.
 473        """
 474        return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
 475
 476    def is_zero(self, atol=1e-10):
 477        """Checks whether the observable is zero within a given tolerance.
 478
 479        Parameters
 480        ----------
 481        atol : float
 482            Absolute tolerance (for details see numpy documentation).
 483        """
 484        return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
 485
 486    def plot_tauint(self, save=None):
 487        """Plot integrated autocorrelation time for each ensemble.
 488
 489        Parameters
 490        ----------
 491        save : str
 492            saves the figure to a file named 'save' if.
 493        """
 494        if not hasattr(self, 'e_dvalue'):
 495            raise Exception('Run the gamma method first.')
 496
 497        for e, e_name in enumerate(self.mc_names):
 498            fig = plt.figure()
 499            plt.xlabel(r'$W$')
 500            plt.ylabel(r'$\tau_\mathrm{int}$')
 501            length = int(len(self.e_n_tauint[e_name]))
 502            if self.tau_exp[e_name] > 0:
 503                base = self.e_n_tauint[e_name][self.e_windowsize[e_name]]
 504                x_help = np.arange(2 * self.tau_exp[e_name])
 505                y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base
 506                x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name])
 507                plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',')
 508                plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]],
 509                             yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor'])
 510                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
 511                label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2))
 512            else:
 513                label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))
 514                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
 515
 516            plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label)
 517            plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--')
 518            plt.legend()
 519            plt.xlim(-0.5, xmax)
 520            ylim = plt.ylim()
 521            plt.ylim(bottom=0.0, top=max(1.0, ylim[1]))
 522            plt.draw()
 523            if save:
 524                fig.savefig(save + "_" + str(e))
 525
 526    def plot_rho(self, save=None):
 527        """Plot normalized autocorrelation function time for each ensemble.
 528
 529        Parameters
 530        ----------
 531        save : str
 532            saves the figure to a file named 'save' if.
 533        """
 534        if not hasattr(self, 'e_dvalue'):
 535            raise Exception('Run the gamma method first.')
 536        for e, e_name in enumerate(self.mc_names):
 537            fig = plt.figure()
 538            plt.xlabel('W')
 539            plt.ylabel('rho')
 540            length = int(len(self.e_drho[e_name]))
 541            plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2)
 542            plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',')
 543            if self.tau_exp[e_name] > 0:
 544                plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]],
 545                         [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1)
 546                xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5
 547                plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2)))
 548            else:
 549                xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5)
 550                plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)))
 551            plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1)
 552            plt.xlim(-0.5, xmax)
 553            plt.draw()
 554            if save:
 555                fig.savefig(save + "_" + str(e))
 556
 557    def plot_rep_dist(self):
 558        """Plot replica distribution for each ensemble with more than one replicum."""
 559        if not hasattr(self, 'e_dvalue'):
 560            raise Exception('Run the gamma method first.')
 561        for e, e_name in enumerate(self.mc_names):
 562            if len(self.e_content[e_name]) == 1:
 563                print('No replica distribution for a single replicum (', e_name, ')')
 564                continue
 565            r_length = []
 566            sub_r_mean = 0
 567            for r, r_name in enumerate(self.e_content[e_name]):
 568                r_length.append(len(self.deltas[r_name]))
 569                sub_r_mean += self.shape[r_name] * self.r_values[r_name]
 570            e_N = np.sum(r_length)
 571            sub_r_mean /= e_N
 572            arr = np.zeros(len(self.e_content[e_name]))
 573            for r, r_name in enumerate(self.e_content[e_name]):
 574                arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1))
 575            plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name]))
 576            plt.title('Replica distribution' + e_name + ' (mean=0, var=1)')
 577            plt.draw()
 578
 579    def plot_history(self, expand=True):
 580        """Plot derived Monte Carlo history for each ensemble
 581
 582        Parameters
 583        ----------
 584        expand : bool
 585            show expanded history for irregular Monte Carlo chains (default: True).
 586        """
 587        for e, e_name in enumerate(self.mc_names):
 588            plt.figure()
 589            r_length = []
 590            tmp = []
 591            tmp_expanded = []
 592            for r, r_name in enumerate(self.e_content[e_name]):
 593                tmp.append(self.deltas[r_name] + self.r_values[r_name])
 594                if expand:
 595                    tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name], 1) + self.r_values[r_name])
 596                    r_length.append(len(tmp_expanded[-1]))
 597                else:
 598                    r_length.append(len(tmp[-1]))
 599            e_N = np.sum(r_length)
 600            x = np.arange(e_N)
 601            y_test = np.concatenate(tmp, axis=0)
 602            if expand:
 603                y = np.concatenate(tmp_expanded, axis=0)
 604            else:
 605                y = y_test
 606            plt.errorbar(x, y, fmt='.', markersize=3)
 607            plt.xlim(-0.5, e_N - 0.5)
 608            plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})')
 609            plt.draw()
 610
 611    def plot_piechart(self, save=None):
 612        """Plot piechart which shows the fractional contribution of each
 613        ensemble to the error and returns a dictionary containing the fractions.
 614
 615        Parameters
 616        ----------
 617        save : str
 618            saves the figure to a file named 'save' if.
 619        """
 620        if not hasattr(self, 'e_dvalue'):
 621            raise Exception('Run the gamma method first.')
 622        if np.isclose(0.0, self._dvalue, atol=1e-15):
 623            raise Exception('Error is 0.0')
 624        labels = self.e_names
 625        sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2
 626        fig1, ax1 = plt.subplots()
 627        ax1.pie(sizes, labels=labels, startangle=90, normalize=True)
 628        ax1.axis('equal')
 629        plt.draw()
 630        if save:
 631            fig1.savefig(save)
 632
 633        return dict(zip(labels, sizes))
 634
 635    def dump(self, filename, datatype="json.gz", description="", **kwargs):
 636        """Dump the Obs to a file 'name' of chosen format.
 637
 638        Parameters
 639        ----------
 640        filename : str
 641            name of the file to be saved.
 642        datatype : str
 643            Format of the exported file. Supported formats include
 644            "json.gz" and "pickle"
 645        description : str
 646            Description for output file, only relevant for json.gz format.
 647        path : str
 648            specifies a custom path for the file (default '.')
 649        """
 650        if 'path' in kwargs:
 651            file_name = kwargs.get('path') + '/' + filename
 652        else:
 653            file_name = filename
 654
 655        if datatype == "json.gz":
 656            from .input.json import dump_to_json
 657            dump_to_json([self], file_name, description=description)
 658        elif datatype == "pickle":
 659            with open(file_name + '.p', 'wb') as fb:
 660                pickle.dump(self, fb)
 661        else:
 662            raise Exception("Unknown datatype " + str(datatype))
 663
 664    def export_jackknife(self):
 665        """Export jackknife samples from the Obs
 666
 667        Returns
 668        -------
 669        numpy.ndarray
 670            Returns a numpy array of length N + 1 where N is the number of samples
 671            for the given ensemble and replicum. The zeroth entry of the array contains
 672            the mean value of the Obs, entries 1 to N contain the N jackknife samples
 673            derived from the Obs. The current implementation only works for observables
 674            defined on exactly one ensemble and replicum. The derived jackknife samples
 675            should agree with samples from a full jackknife analysis up to O(1/N).
 676        """
 677
 678        if len(self.names) != 1:
 679            raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.")
 680
 681        name = self.names[0]
 682        full_data = self.deltas[name] + self.r_values[name]
 683        n = full_data.size
 684        mean = self.value
 685        tmp_jacks = np.zeros(n + 1)
 686        tmp_jacks[0] = mean
 687        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
 688        return tmp_jacks
 689
 690    def export_bootstrap(self, samples=500, random_numbers=None, save_rng=None):
 691        """Export bootstrap samples from the Obs
 692
 693        Parameters
 694        ----------
 695        samples : int
 696            Number of bootstrap samples to generate.
 697        random_numbers : np.ndarray
 698            Array of shape (samples, length) containing the random numbers to generate the bootstrap samples.
 699            If not provided the bootstrap samples are generated bashed on the md5 hash of the enesmble name.
 700        save_rng : str
 701            Save the random numbers to a file if a path is specified.
 702
 703        Returns
 704        -------
 705        numpy.ndarray
 706            Returns a numpy array of length N + 1 where N is the number of samples
 707            for the given ensemble and replicum. The zeroth entry of the array contains
 708            the mean value of the Obs, entries 1 to N contain the N import_bootstrap samples
 709            derived from the Obs. The current implementation only works for observables
 710            defined on exactly one ensemble and replicum. The derived bootstrap samples
 711            should agree with samples from a full bootstrap analysis up to O(1/N).
 712        """
 713        if len(self.names) != 1:
 714            raise Exception("'export_boostrap' is only implemented for Obs defined on one ensemble and replicum.")
 715
 716        name = self.names[0]
 717        length = self.N
 718
 719        if random_numbers is None:
 720            seed = int(hashlib.md5(name.encode()).hexdigest(), 16) & 0xFFFFFFFF
 721            rng = np.random.default_rng(seed)
 722            random_numbers = rng.integers(0, length, size=(samples, length))
 723
 724        if save_rng is not None:
 725            np.savetxt(save_rng, random_numbers, fmt='%i')
 726
 727        proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length
 728        ret = np.zeros(samples + 1)
 729        ret[0] = self.value
 730        ret[1:] = proj @ (self.deltas[name] + self.r_values[name])
 731        return ret
 732
 733    def __float__(self):
 734        return float(self.value)
 735
 736    def __repr__(self):
 737        return 'Obs[' + str(self) + ']'
 738
 739    def __str__(self):
 740        return _format_uncertainty(self.value, self._dvalue)
 741
 742    def __format__(self, format_type):
 743        if format_type == "":
 744            significance = 2
 745        else:
 746            significance = int(float(format_type.replace("+", "").replace("-", "")))
 747        my_str = _format_uncertainty(self.value, self._dvalue,
 748                                     significance=significance)
 749        for char in ["+", " "]:
 750            if format_type.startswith(char):
 751                if my_str[0] != "-":
 752                    my_str = char + my_str
 753        return my_str
 754
 755    def __hash__(self):
 756        hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),)
 757        hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()])
 758        hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()])
 759        hash_tuple += tuple([o.encode() for o in self.names])
 760        m = hashlib.md5()
 761        [m.update(o) for o in hash_tuple]
 762        return int(m.hexdigest(), 16) & 0xFFFFFFFF
 763
 764    # Overload comparisons
 765    def __lt__(self, other):
 766        return self.value < other
 767
 768    def __le__(self, other):
 769        return self.value <= other
 770
 771    def __gt__(self, other):
 772        return self.value > other
 773
 774    def __ge__(self, other):
 775        return self.value >= other
 776
 777    def __eq__(self, other):
 778        if other is None:
 779            return False
 780        return (self - other).is_zero()
 781
 782    # Overload math operations
 783    def __add__(self, y):
 784        if isinstance(y, Obs):
 785            return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1])
 786        else:
 787            if isinstance(y, np.ndarray):
 788                return np.array([self + o for o in y])
 789            elif isinstance(y, complex):
 790                return CObs(self, 0) + y
 791            elif y.__class__.__name__ in ['Corr', 'CObs']:
 792                return NotImplemented
 793            else:
 794                return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1])
 795
 796    def __radd__(self, y):
 797        return self + y
 798
 799    def __mul__(self, y):
 800        if isinstance(y, Obs):
 801            return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value])
 802        else:
 803            if isinstance(y, np.ndarray):
 804                return np.array([self * o for o in y])
 805            elif isinstance(y, complex):
 806                return CObs(self * y.real, self * y.imag)
 807            elif y.__class__.__name__ in ['Corr', 'CObs']:
 808                return NotImplemented
 809            else:
 810                return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y])
 811
 812    def __rmul__(self, y):
 813        return self * y
 814
 815    def __sub__(self, y):
 816        if isinstance(y, Obs):
 817            return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1])
 818        else:
 819            if isinstance(y, np.ndarray):
 820                return np.array([self - o for o in y])
 821            elif y.__class__.__name__ in ['Corr', 'CObs']:
 822                return NotImplemented
 823            else:
 824                return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1])
 825
 826    def __rsub__(self, y):
 827        return -1 * (self - y)
 828
 829    def __pos__(self):
 830        return self
 831
 832    def __neg__(self):
 833        return -1 * self
 834
 835    def __truediv__(self, y):
 836        if isinstance(y, Obs):
 837            return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2])
 838        else:
 839            if isinstance(y, np.ndarray):
 840                return np.array([self / o for o in y])
 841            elif y.__class__.__name__ in ['Corr', 'CObs']:
 842                return NotImplemented
 843            else:
 844                return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y])
 845
 846    def __rtruediv__(self, y):
 847        if isinstance(y, Obs):
 848            return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2])
 849        else:
 850            if isinstance(y, np.ndarray):
 851                return np.array([o / self for o in y])
 852            elif y.__class__.__name__ in ['Corr', 'CObs']:
 853                return NotImplemented
 854            else:
 855                return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2])
 856
 857    def __pow__(self, y):
 858        if isinstance(y, Obs):
 859            return derived_observable(lambda x: x[0] ** x[1], [self, y])
 860        else:
 861            return derived_observable(lambda x: x[0] ** y, [self])
 862
 863    def __rpow__(self, y):
 864        if isinstance(y, Obs):
 865            return derived_observable(lambda x: x[0] ** x[1], [y, self])
 866        else:
 867            return derived_observable(lambda x: y ** x[0], [self])
 868
 869    def __abs__(self):
 870        return derived_observable(lambda x: anp.abs(x[0]), [self])
 871
 872    # Overload numpy functions
 873    def sqrt(self):
 874        return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)])
 875
 876    def log(self):
 877        return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value])
 878
 879    def exp(self):
 880        return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)])
 881
 882    def sin(self):
 883        return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)])
 884
 885    def cos(self):
 886        return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)])
 887
 888    def tan(self):
 889        return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2])
 890
 891    def arcsin(self):
 892        return derived_observable(lambda x: anp.arcsin(x[0]), [self])
 893
 894    def arccos(self):
 895        return derived_observable(lambda x: anp.arccos(x[0]), [self])
 896
 897    def arctan(self):
 898        return derived_observable(lambda x: anp.arctan(x[0]), [self])
 899
 900    def sinh(self):
 901        return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)])
 902
 903    def cosh(self):
 904        return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)])
 905
 906    def tanh(self):
 907        return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2])
 908
 909    def arcsinh(self):
 910        return derived_observable(lambda x: anp.arcsinh(x[0]), [self])
 911
 912    def arccosh(self):
 913        return derived_observable(lambda x: anp.arccosh(x[0]), [self])
 914
 915    def arctanh(self):
 916        return derived_observable(lambda x: anp.arctanh(x[0]), [self])
 917
 918
 919class CObs:
 920    """Class for a complex valued observable."""
 921    __slots__ = ['_real', '_imag', 'tag']
 922
 923    def __init__(self, real, imag=0.0):
 924        self._real = real
 925        self._imag = imag
 926        self.tag = None
 927
 928    @property
 929    def real(self):
 930        return self._real
 931
 932    @property
 933    def imag(self):
 934        return self._imag
 935
 936    def gamma_method(self, **kwargs):
 937        """Executes the gamma_method for the real and the imaginary part."""
 938        if isinstance(self.real, Obs):
 939            self.real.gamma_method(**kwargs)
 940        if isinstance(self.imag, Obs):
 941            self.imag.gamma_method(**kwargs)
 942
 943    def is_zero(self):
 944        """Checks whether both real and imaginary part are zero within machine precision."""
 945        return self.real == 0.0 and self.imag == 0.0
 946
 947    def conjugate(self):
 948        return CObs(self.real, -self.imag)
 949
 950    def __add__(self, other):
 951        if isinstance(other, np.ndarray):
 952            return other + self
 953        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 954            return CObs(self.real + other.real,
 955                        self.imag + other.imag)
 956        else:
 957            return CObs(self.real + other, self.imag)
 958
 959    def __radd__(self, y):
 960        return self + y
 961
 962    def __sub__(self, other):
 963        if isinstance(other, np.ndarray):
 964            return -1 * (other - self)
 965        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 966            return CObs(self.real - other.real, self.imag - other.imag)
 967        else:
 968            return CObs(self.real - other, self.imag)
 969
 970    def __rsub__(self, other):
 971        return -1 * (self - other)
 972
 973    def __mul__(self, other):
 974        if isinstance(other, np.ndarray):
 975            return other * self
 976        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 977            if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]):
 978                return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3],
 979                                               [self.real, other.real, self.imag, other.imag],
 980                                               man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]),
 981                            derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3],
 982                                               [self.real, other.real, self.imag, other.imag],
 983                                               man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value]))
 984            elif getattr(other, 'imag', 0) != 0:
 985                return CObs(self.real * other.real - self.imag * other.imag,
 986                            self.imag * other.real + self.real * other.imag)
 987            else:
 988                return CObs(self.real * other.real, self.imag * other.real)
 989        else:
 990            return CObs(self.real * other, self.imag * other)
 991
 992    def __rmul__(self, other):
 993        return self * other
 994
 995    def __truediv__(self, other):
 996        if isinstance(other, np.ndarray):
 997            return 1 / (other / self)
 998        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 999            r = other.real ** 2 + other.imag ** 2
1000            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r)
1001        else:
1002            return CObs(self.real / other, self.imag / other)
1003
1004    def __rtruediv__(self, other):
1005        r = self.real ** 2 + self.imag ** 2
1006        if hasattr(other, 'real') and hasattr(other, 'imag'):
1007            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r)
1008        else:
1009            return CObs(self.real * other / r, -self.imag * other / r)
1010
1011    def __abs__(self):
1012        return np.sqrt(self.real**2 + self.imag**2)
1013
1014    def __pos__(self):
1015        return self
1016
1017    def __neg__(self):
1018        return -1 * self
1019
1020    def __eq__(self, other):
1021        return self.real == other.real and self.imag == other.imag
1022
1023    def __str__(self):
1024        return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)'
1025
1026    def __repr__(self):
1027        return 'CObs[' + str(self) + ']'
1028
1029    def __format__(self, format_type):
1030        if format_type == "":
1031            significance = 2
1032            format_type = "2"
1033        else:
1034            significance = int(float(format_type.replace("+", "").replace("-", "")))
1035        return f"({self.real:{format_type}}{self.imag:+{significance}}j)"
1036
1037
1038def gamma_method(x, **kwargs):
1039    """Vectorized version of the gamma_method applicable to lists or arrays of Obs.
1040
1041    See docstring of pe.Obs.gamma_method for details.
1042    """
1043    return np.vectorize(lambda o: o.gm(**kwargs))(x)
1044
1045
1046gm = gamma_method
1047
1048
1049def _format_uncertainty(value, dvalue, significance=2):
1050    """Creates a string of a value and its error in paranthesis notation, e.g., 13.02(45)"""
1051    if dvalue == 0.0 or (not np.isfinite(dvalue)):
1052        return str(value)
1053    if not isinstance(significance, int):
1054        raise TypeError("significance needs to be an integer.")
1055    if significance < 1:
1056        raise ValueError("significance needs to be larger than zero.")
1057    fexp = np.floor(np.log10(dvalue))
1058    if fexp < 0.0:
1059        return '{:{form}}({:1.0f})'.format(value, dvalue * 10 ** (-fexp + significance - 1), form='.' + str(-int(fexp) + significance - 1) + 'f')
1060    elif fexp == 0.0:
1061        return f"{value:.{significance - 1}f}({dvalue:1.{significance - 1}f})"
1062    else:
1063        return f"{value:.{max(0, int(significance - fexp - 1))}f}({dvalue:2.{max(0, int(significance - fexp - 1))}f})"
1064
1065
1066def _expand_deltas(deltas, idx, shape, gapsize):
1067    """Expand deltas defined on idx to a regular range with spacing gapsize between two
1068       configurations and where holes are filled by 0.
1069       If idx is of type range, the deltas are not changed if the idx.step == gapsize.
1070
1071    Parameters
1072    ----------
1073    deltas : list
1074        List of fluctuations
1075    idx : list
1076        List or range of configs on which the deltas are defined, has to be sorted in ascending order.
1077    shape : int
1078        Number of configs in idx.
1079    gapsize : int
1080        The target distance between two configurations. If longer distances
1081        are found in idx, the data is expanded.
1082    """
1083    if isinstance(idx, range):
1084        if (idx.step == gapsize):
1085            return deltas
1086    ret = np.zeros((idx[-1] - idx[0] + gapsize) // gapsize)
1087    for i in range(shape):
1088        ret[(idx[i] - idx[0]) // gapsize] = deltas[i]
1089    return ret
1090
1091
1092def _merge_idx(idl):
1093    """Returns the union of all lists in idl as range or sorted list
1094
1095    Parameters
1096    ----------
1097    idl : list
1098        List of lists or ranges.
1099    """
1100
1101    if _check_lists_equal(idl):
1102        return idl[0]
1103
1104    idunion = sorted(set().union(*idl))
1105
1106    # Check whether idunion can be expressed as range
1107    idrange = range(idunion[0], idunion[-1] + 1, idunion[1] - idunion[0])
1108    idtest = [list(idrange), idunion]
1109    if _check_lists_equal(idtest):
1110        return idrange
1111
1112    return idunion
1113
1114
1115def _intersection_idx(idl):
1116    """Returns the intersection of all lists in idl as range or sorted list
1117
1118    Parameters
1119    ----------
1120    idl : list
1121        List of lists or ranges.
1122    """
1123
1124    if _check_lists_equal(idl):
1125        return idl[0]
1126
1127    idinter = sorted(set.intersection(*[set(o) for o in idl]))
1128
1129    # Check whether idinter can be expressed as range
1130    try:
1131        idrange = range(idinter[0], idinter[-1] + 1, idinter[1] - idinter[0])
1132        idtest = [list(idrange), idinter]
1133        if _check_lists_equal(idtest):
1134            return idrange
1135    except IndexError:
1136        pass
1137
1138    return idinter
1139
1140
1141def _expand_deltas_for_merge(deltas, idx, shape, new_idx, scalefactor):
1142    """Expand deltas defined on idx to the list of configs that is defined by new_idx.
1143       New, empty entries are filled by 0. If idx and new_idx are of type range, the smallest
1144       common divisor of the step sizes is used as new step size.
1145
1146    Parameters
1147    ----------
1148    deltas : list
1149        List of fluctuations
1150    idx : list
1151        List or range of configs on which the deltas are defined.
1152        Has to be a subset of new_idx and has to be sorted in ascending order.
1153    shape : list
1154        Number of configs in idx.
1155    new_idx : list
1156        List of configs that defines the new range, has to be sorted in ascending order.
1157    scalefactor : float
1158        An additional scaling factor that can be applied to scale the fluctuations,
1159        e.g., when Obs with differing numbers of replica are merged.
1160    """
1161    if type(idx) is range and type(new_idx) is range:
1162        if idx == new_idx:
1163            if scalefactor == 1:
1164                return deltas
1165            else:
1166                return deltas * scalefactor
1167    ret = np.zeros(new_idx[-1] - new_idx[0] + 1)
1168    for i in range(shape):
1169        ret[idx[i] - new_idx[0]] = deltas[i]
1170    return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))]) * len(new_idx) / len(idx) * scalefactor
1171
1172
1173def derived_observable(func, data, array_mode=False, **kwargs):
1174    """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
1175
1176    Parameters
1177    ----------
1178    func : object
1179        arbitrary function of the form func(data, **kwargs). For the
1180        automatic differentiation to work, all numpy functions have to have
1181        the autograd wrapper (use 'import autograd.numpy as anp').
1182    data : list
1183        list of Obs, e.g. [obs1, obs2, obs3].
1184    num_grad : bool
1185        if True, numerical derivatives are used instead of autograd
1186        (default False). To control the numerical differentiation the
1187        kwargs of numdifftools.step_generators.MaxStepGenerator
1188        can be used.
1189    man_grad : list
1190        manually supply a list or an array which contains the jacobian
1191        of func. Use cautiously, supplying the wrong derivative will
1192        not be intercepted.
1193
1194    Notes
1195    -----
1196    For simple mathematical operations it can be practical to use anonymous
1197    functions. For the ratio of two observables one can e.g. use
1198
1199    new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1200    """
1201
1202    data = np.asarray(data)
1203    raveled_data = data.ravel()
1204
1205    # Workaround for matrix operations containing non Obs data
1206    if not all(isinstance(x, Obs) for x in raveled_data):
1207        for i in range(len(raveled_data)):
1208            if isinstance(raveled_data[i], (int, float)):
1209                raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###")
1210
1211    allcov = {}
1212    for o in raveled_data:
1213        for name in o.cov_names:
1214            if name in allcov:
1215                if not np.allclose(allcov[name], o.covobs[name].cov):
1216                    raise Exception('Inconsistent covariance matrices for %s!' % (name))
1217            else:
1218                allcov[name] = o.covobs[name].cov
1219
1220    n_obs = len(raveled_data)
1221    new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x]))
1222    new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x]))
1223    new_sample_names = sorted(set(new_names) - set(new_cov_names))
1224
1225    reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0
1226
1227    if data.ndim == 1:
1228        values = np.array([o.value for o in data])
1229    else:
1230        values = np.vectorize(lambda x: x.value)(data)
1231
1232    new_values = func(values, **kwargs)
1233
1234    multi = int(isinstance(new_values, np.ndarray))
1235
1236    new_r_values = {}
1237    new_idl_d = {}
1238    for name in new_sample_names:
1239        idl = []
1240        tmp_values = np.zeros(n_obs)
1241        for i, item in enumerate(raveled_data):
1242            tmp_values[i] = item.r_values.get(name, item.value)
1243            tmp_idl = item.idl.get(name)
1244            if tmp_idl is not None:
1245                idl.append(tmp_idl)
1246        if multi > 0:
1247            tmp_values = np.array(tmp_values).reshape(data.shape)
1248        new_r_values[name] = func(tmp_values, **kwargs)
1249        new_idl_d[name] = _merge_idx(idl)
1250
1251    def _compute_scalefactor_missing_rep(obs):
1252        """
1253        Computes the scale factor that is to be multiplied with the deltas
1254        in the case where Obs with different subsets of replica are merged.
1255        Returns a dictionary with the scale factor for each Monte Carlo name.
1256
1257        Parameters
1258        ----------
1259        obs : Obs
1260            The observable corresponding to the deltas that are to be scaled
1261        """
1262        scalef_d = {}
1263        for mc_name in obs.mc_names:
1264            mc_idl_d = [name for name in obs.idl if name.startswith(mc_name + '|')]
1265            new_mc_idl_d = [name for name in new_idl_d if name.startswith(mc_name + '|')]
1266            if len(mc_idl_d) > 0 and len(mc_idl_d) < len(new_mc_idl_d):
1267                scalef_d[mc_name] = sum([len(new_idl_d[name]) for name in new_mc_idl_d]) / sum([len(new_idl_d[name]) for name in mc_idl_d])
1268        return scalef_d
1269
1270    if 'man_grad' in kwargs:
1271        deriv = np.asarray(kwargs.get('man_grad'))
1272        if new_values.shape + data.shape != deriv.shape:
1273            raise Exception('Manual derivative does not have correct shape.')
1274    elif kwargs.get('num_grad') is True:
1275        if multi > 0:
1276            raise Exception('Multi mode currently not supported for numerical derivative')
1277        options = {
1278            'base_step': 0.1,
1279            'step_ratio': 2.5}
1280        for key in options.keys():
1281            kwarg = kwargs.get(key)
1282            if kwarg is not None:
1283                options[key] = kwarg
1284        tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs)
1285        if tmp_df.size == 1:
1286            deriv = np.array([tmp_df.real])
1287        else:
1288            deriv = tmp_df.real
1289    else:
1290        deriv = jacobian(func)(values, **kwargs)
1291
1292    final_result = np.zeros(new_values.shape, dtype=object)
1293
1294    if array_mode is True:
1295
1296        class _Zero_grad():
1297            def __init__(self, N):
1298                self.grad = np.zeros((N, 1))
1299
1300        new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x]))
1301        d_extracted = {}
1302        g_extracted = {}
1303        for name in new_sample_names:
1304            d_extracted[name] = []
1305            ens_length = len(new_idl_d[name])
1306            for i_dat, dat in enumerate(data):
1307                d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name], _compute_scalefactor_missing_rep(o).get(name.split('|')[0], 1)) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, )))
1308        for name in new_cov_names:
1309            g_extracted[name] = []
1310            zero_grad = _Zero_grad(new_covobs_lengths[name])
1311            for i_dat, dat in enumerate(data):
1312                g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1)))
1313
1314    for i_val, new_val in np.ndenumerate(new_values):
1315        new_deltas = {}
1316        new_grad = {}
1317        if array_mode is True:
1318            for name in new_sample_names:
1319                ens_length = d_extracted[name][0].shape[-1]
1320                new_deltas[name] = np.zeros(ens_length)
1321                for i_dat, dat in enumerate(d_extracted[name]):
1322                    new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1323            for name in new_cov_names:
1324                new_grad[name] = 0
1325                for i_dat, dat in enumerate(g_extracted[name]):
1326                    new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1327        else:
1328            for j_obs, obs in np.ndenumerate(data):
1329                scalef_d = _compute_scalefactor_missing_rep(obs)
1330                for name in obs.names:
1331                    if name in obs.cov_names:
1332                        new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad
1333                    else:
1334                        new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name], scalef_d.get(name.split('|')[0], 1))
1335
1336        new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad}
1337
1338        if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()):
1339            raise Exception('The same name has been used for deltas and covobs!')
1340        new_samples = []
1341        new_means = []
1342        new_idl = []
1343        new_names_obs = []
1344        for name in new_names:
1345            if name not in new_covobs:
1346                new_samples.append(new_deltas[name])
1347                new_idl.append(new_idl_d[name])
1348                new_means.append(new_r_values[name][i_val])
1349                new_names_obs.append(name)
1350        final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl)
1351        for name in new_covobs:
1352            final_result[i_val].names.append(name)
1353        final_result[i_val]._covobs = new_covobs
1354        final_result[i_val]._value = new_val
1355        final_result[i_val].reweighted = reweighted
1356
1357    if multi == 0:
1358        final_result = final_result.item()
1359
1360    return final_result
1361
1362
1363def _reduce_deltas(deltas, idx_old, idx_new):
1364    """Extract deltas defined on idx_old on all configs of idx_new.
1365
1366    Assumes, that idx_old and idx_new are correctly defined idl, i.e., they
1367    are ordered in an ascending order.
1368
1369    Parameters
1370    ----------
1371    deltas : list
1372        List of fluctuations
1373    idx_old : list
1374        List or range of configs on which the deltas are defined
1375    idx_new : list
1376        List of configs for which we want to extract the deltas.
1377        Has to be a subset of idx_old.
1378    """
1379    if not len(deltas) == len(idx_old):
1380        raise Exception('Length of deltas and idx_old have to be the same: %d != %d' % (len(deltas), len(idx_old)))
1381    if type(idx_old) is range and type(idx_new) is range:
1382        if idx_old == idx_new:
1383            return deltas
1384    if _check_lists_equal([idx_old, idx_new]):
1385        return deltas
1386    indices = np.intersect1d(idx_old, idx_new, assume_unique=True, return_indices=True)[1]
1387    if len(indices) < len(idx_new):
1388        raise Exception('Error in _reduce_deltas: Config of idx_new not in idx_old')
1389    return np.array(deltas)[indices]
1390
1391
1392def reweight(weight, obs, **kwargs):
1393    """Reweight a list of observables.
1394
1395    Parameters
1396    ----------
1397    weight : Obs
1398        Reweighting factor. An Observable that has to be defined on a superset of the
1399        configurations in obs[i].idl for all i.
1400    obs : list
1401        list of Obs, e.g. [obs1, obs2, obs3].
1402    all_configs : bool
1403        if True, the reweighted observables are normalized by the average of
1404        the reweighting factor on all configurations in weight.idl and not
1405        on the configurations in obs[i].idl. Default False.
1406    """
1407    result = []
1408    for i in range(len(obs)):
1409        if len(obs[i].cov_names):
1410            raise Exception('Error: Not possible to reweight an Obs that contains covobs!')
1411        if not set(obs[i].names).issubset(weight.names):
1412            raise Exception('Error: Ensembles do not fit')
1413        for name in obs[i].names:
1414            if not set(obs[i].idl[name]).issubset(weight.idl[name]):
1415                raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
1416        new_samples = []
1417        w_deltas = {}
1418        for name in sorted(obs[i].names):
1419            w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name])
1420            new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name]))
1421        tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1422
1423        if kwargs.get('all_configs'):
1424            new_weight = weight
1425        else:
1426            new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1427
1428        result.append(tmp_obs / new_weight)
1429        result[-1].reweighted = True
1430
1431    return result
1432
1433
1434def correlate(obs_a, obs_b):
1435    """Correlate two observables.
1436
1437    Parameters
1438    ----------
1439    obs_a : Obs
1440        First observable
1441    obs_b : Obs
1442        Second observable
1443
1444    Notes
1445    -----
1446    Keep in mind to only correlate primary observables which have not been reweighted
1447    yet. The reweighting has to be applied after correlating the observables.
1448    Currently only works if ensembles are identical (this is not strictly necessary).
1449    """
1450
1451    if sorted(obs_a.names) != sorted(obs_b.names):
1452        raise Exception(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}")
1453    if len(obs_a.cov_names) or len(obs_b.cov_names):
1454        raise Exception('Error: Not possible to correlate Obs that contain covobs!')
1455    for name in obs_a.names:
1456        if obs_a.shape[name] != obs_b.shape[name]:
1457            raise Exception('Shapes of ensemble', name, 'do not fit')
1458        if obs_a.idl[name] != obs_b.idl[name]:
1459            raise Exception('idl of ensemble', name, 'do not fit')
1460
1461    if obs_a.reweighted is True:
1462        warnings.warn("The first observable is already reweighted.", RuntimeWarning)
1463    if obs_b.reweighted is True:
1464        warnings.warn("The second observable is already reweighted.", RuntimeWarning)
1465
1466    new_samples = []
1467    new_idl = []
1468    for name in sorted(obs_a.names):
1469        new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name]))
1470        new_idl.append(obs_a.idl[name])
1471
1472    o = Obs(new_samples, sorted(obs_a.names), idl=new_idl)
1473    o.reweighted = obs_a.reweighted or obs_b.reweighted
1474    return o
1475
1476
1477def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1478    r'''Calculates the error covariance matrix of a set of observables.
1479
1480    WARNING: This function should be used with care, especially for observables with support on multiple
1481             ensembles with differing autocorrelations. See the notes below for details.
1482
1483    The gamma method has to be applied first to all observables.
1484
1485    Parameters
1486    ----------
1487    obs : list or numpy.ndarray
1488        List or one dimensional array of Obs
1489    visualize : bool
1490        If True plots the corresponding normalized correlation matrix (default False).
1491    correlation : bool
1492        If True the correlation matrix instead of the error covariance matrix is returned (default False).
1493    smooth : None or int
1494        If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue
1495        smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the
1496        largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely
1497        small ones.
1498
1499    Notes
1500    -----
1501    The error covariance is defined such that it agrees with the squared standard error for two identical observables
1502    $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$
1503    in the absence of autocorrelation.
1504    The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite
1505    $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags.
1506    For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements.
1507    $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$
1508    This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1509    '''
1510
1511    length = len(obs)
1512
1513    max_samples = np.max([o.N for o in obs])
1514    if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]:
1515        warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning)
1516
1517    cov = np.zeros((length, length))
1518    for i in range(length):
1519        for j in range(i, length):
1520            cov[i, j] = _covariance_element(obs[i], obs[j])
1521    cov = cov + cov.T - np.diag(np.diag(cov))
1522
1523    corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov)))
1524
1525    if isinstance(smooth, int):
1526        corr = _smooth_eigenvalues(corr, smooth)
1527
1528    if visualize:
1529        plt.matshow(corr, vmin=-1, vmax=1)
1530        plt.set_cmap('RdBu')
1531        plt.colorbar()
1532        plt.draw()
1533
1534    if correlation is True:
1535        return corr
1536
1537    errors = [o.dvalue for o in obs]
1538    cov = np.diag(errors) @ corr @ np.diag(errors)
1539
1540    eigenvalues = np.linalg.eigh(cov)[0]
1541    if not np.all(eigenvalues >= 0):
1542        warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning)
1543
1544    return cov
1545
1546
1547def _smooth_eigenvalues(corr, E):
1548    """Eigenvalue smoothing as described in hep-lat/9412087
1549
1550    corr : np.ndarray
1551        correlation matrix
1552    E : integer
1553        Number of eigenvalues to be left substantially unchanged
1554    """
1555    if not (2 < E < corr.shape[0] - 1):
1556        raise Exception(f"'E' has to be between 2 and the dimension of the correlation matrix minus 1 ({corr.shape[0] - 1}).")
1557    vals, vec = np.linalg.eigh(corr)
1558    lambda_min = np.mean(vals[:-E])
1559    vals[vals < lambda_min] = lambda_min
1560    vals /= np.mean(vals)
1561    return vec @ np.diag(vals) @ vec.T
1562
1563
1564def _covariance_element(obs1, obs2):
1565    """Estimates the covariance of two Obs objects, neglecting autocorrelations."""
1566
1567    def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx):
1568        deltas1 = _reduce_deltas(deltas1, idx1, new_idx)
1569        deltas2 = _reduce_deltas(deltas2, idx2, new_idx)
1570        return np.sum(deltas1 * deltas2)
1571
1572    if set(obs1.names).isdisjoint(set(obs2.names)):
1573        return 0.0
1574
1575    if not hasattr(obs1, 'e_dvalue') or not hasattr(obs2, 'e_dvalue'):
1576        raise Exception('The gamma method has to be applied to both Obs first.')
1577
1578    dvalue = 0.0
1579
1580    for e_name in obs1.mc_names:
1581
1582        if e_name not in obs2.mc_names:
1583            continue
1584
1585        idl_d = {}
1586        for r_name in obs1.e_content[e_name]:
1587            if r_name not in obs2.e_content[e_name]:
1588                continue
1589            idl_d[r_name] = _intersection_idx([obs1.idl[r_name], obs2.idl[r_name]])
1590
1591        gamma = 0.0
1592
1593        for r_name in obs1.e_content[e_name]:
1594            if r_name not in obs2.e_content[e_name]:
1595                continue
1596            if len(idl_d[r_name]) == 0:
1597                continue
1598            gamma += calc_gamma(obs1.deltas[r_name], obs2.deltas[r_name], obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name])
1599
1600        if gamma == 0.0:
1601            continue
1602
1603        gamma_div = 0.0
1604        for r_name in obs1.e_content[e_name]:
1605            if r_name not in obs2.e_content[e_name]:
1606                continue
1607            if len(idl_d[r_name]) == 0:
1608                continue
1609            gamma_div += np.sqrt(calc_gamma(obs1.deltas[r_name], obs1.deltas[r_name], obs1.idl[r_name], obs1.idl[r_name], idl_d[r_name]) * calc_gamma(obs2.deltas[r_name], obs2.deltas[r_name], obs2.idl[r_name], obs2.idl[r_name], idl_d[r_name]))
1610        gamma /= gamma_div
1611
1612        dvalue += gamma
1613
1614    for e_name in obs1.cov_names:
1615
1616        if e_name not in obs2.cov_names:
1617            continue
1618
1619        dvalue += np.dot(np.transpose(obs1.covobs[e_name].grad), np.dot(obs1.covobs[e_name].cov, obs2.covobs[e_name].grad)).item()
1620
1621    return dvalue
1622
1623
1624def import_jackknife(jacks, name, idl=None):
1625    """Imports jackknife samples and returns an Obs
1626
1627    Parameters
1628    ----------
1629    jacks : numpy.ndarray
1630        numpy array containing the mean value as zeroth entry and
1631        the N jackknife samples as first to Nth entry.
1632    name : str
1633        name of the ensemble the samples are defined on.
1634    """
1635    length = len(jacks) - 1
1636    prj = (np.ones((length, length)) - (length - 1) * np.identity(length))
1637    samples = jacks[1:] @ prj
1638    mean = np.mean(samples)
1639    new_obs = Obs([samples - mean], [name], idl=idl, means=[mean])
1640    new_obs._value = jacks[0]
1641    return new_obs
1642
1643
1644def import_bootstrap(boots, name, random_numbers):
1645    """Imports bootstrap samples and returns an Obs
1646
1647    Parameters
1648    ----------
1649    boots : numpy.ndarray
1650        numpy array containing the mean value as zeroth entry and
1651        the N bootstrap samples as first to Nth entry.
1652    name : str
1653        name of the ensemble the samples are defined on.
1654    random_numbers : np.ndarray
1655        Array of shape (samples, length) containing the random numbers to generate the bootstrap samples,
1656        where samples is the number of bootstrap samples and length is the length of the original Monte Carlo
1657        chain to be reconstructed.
1658    """
1659    samples, length = random_numbers.shape
1660    if samples != len(boots) - 1:
1661        raise ValueError("Random numbers do not have the correct shape.")
1662
1663    if samples < length:
1664        raise ValueError("Obs can't be reconstructed if there are fewer bootstrap samples than Monte Carlo data points.")
1665
1666    proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length
1667
1668    samples = scipy.linalg.lstsq(proj, boots[1:])[0]
1669    ret = Obs([samples], [name])
1670    ret._value = boots[0]
1671    return ret
1672
1673
1674def merge_obs(list_of_obs):
1675    """Combine all observables in list_of_obs into one new observable
1676
1677    Parameters
1678    ----------
1679    list_of_obs : list
1680        list of the Obs object to be combined
1681
1682    Notes
1683    -----
1684    It is not possible to combine obs which are based on the same replicum
1685    """
1686    replist = [item for obs in list_of_obs for item in obs.names]
1687    if (len(replist) == len(set(replist))) is False:
1688        raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist)))
1689    if any([len(o.cov_names) for o in list_of_obs]):
1690        raise Exception('Not possible to merge data that contains covobs!')
1691    new_dict = {}
1692    idl_dict = {}
1693    for o in list_of_obs:
1694        new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0)
1695                        for key in set(o.deltas) | set(o.r_values)})
1696        idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)})
1697
1698    names = sorted(new_dict.keys())
1699    o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names])
1700    o.reweighted = np.max([oi.reweighted for oi in list_of_obs])
1701    return o
1702
1703
1704def cov_Obs(means, cov, name, grad=None):
1705    """Create an Obs based on mean(s) and a covariance matrix
1706
1707    Parameters
1708    ----------
1709    mean : list of floats or float
1710        N mean value(s) of the new Obs
1711    cov : list or array
1712        2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
1713    name : str
1714        identifier for the covariance matrix
1715    grad : list or array
1716        Gradient of the Covobs wrt. the means belonging to cov.
1717    """
1718
1719    def covobs_to_obs(co):
1720        """Make an Obs out of a Covobs
1721
1722        Parameters
1723        ----------
1724        co : Covobs
1725            Covobs to be embedded into the Obs
1726        """
1727        o = Obs([], [], means=[])
1728        o._value = co.value
1729        o.names.append(co.name)
1730        o._covobs[co.name] = co
1731        o._dvalue = np.sqrt(co.errsq())
1732        return o
1733
1734    ol = []
1735    if isinstance(means, (float, int)):
1736        means = [means]
1737
1738    for i in range(len(means)):
1739        ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad)))
1740    if ol[0].covobs[name].N != len(means):
1741        raise Exception('You have to provide %d mean values!' % (ol[0].N))
1742    if len(ol) == 1:
1743        return ol[0]
1744    return ol
1745
1746
1747def _determine_gap(o, e_content, e_name):
1748    gaps = []
1749    for r_name in e_content[e_name]:
1750        if isinstance(o.idl[r_name], range):
1751            gaps.append(o.idl[r_name].step)
1752        else:
1753            gaps.append(np.min(np.diff(o.idl[r_name])))
1754
1755    gap = min(gaps)
1756    if not np.all([gi % gap == 0 for gi in gaps]):
1757        raise Exception(f"Replica for ensemble {e_name} do not have a common spacing.", gaps)
1758
1759    return gap
1760
1761
1762def _check_lists_equal(idl):
1763    '''
1764    Use groupby to efficiently check whether all elements of idl are identical.
1765    Returns True if all elements are equal, otherwise False.
1766
1767    Parameters
1768    ----------
1769    idl : list of lists, ranges or np.ndarrays
1770    '''
1771    g = groupby([np.nditer(el) if isinstance(el, np.ndarray) else el for el in idl])
1772    if next(g, True) and not next(g, False):
1773        return True
1774    return False

class CObs: View Source

 920class CObs:
 921    """Class for a complex valued observable."""
 922    __slots__ = ['_real', '_imag', 'tag']
 923
 924    def __init__(self, real, imag=0.0):
 925        self._real = real
 926        self._imag = imag
 927        self.tag = None
 928
 929    @property
 930    def real(self):
 931        return self._real
 932
 933    @property
 934    def imag(self):
 935        return self._imag
 936
 937    def gamma_method(self, **kwargs):
 938        """Executes the gamma_method for the real and the imaginary part."""
 939        if isinstance(self.real, Obs):
 940            self.real.gamma_method(**kwargs)
 941        if isinstance(self.imag, Obs):
 942            self.imag.gamma_method(**kwargs)
 943
 944    def is_zero(self):
 945        """Checks whether both real and imaginary part are zero within machine precision."""
 946        return self.real == 0.0 and self.imag == 0.0
 947
 948    def conjugate(self):
 949        return CObs(self.real, -self.imag)
 950
 951    def __add__(self, other):
 952        if isinstance(other, np.ndarray):
 953            return other + self
 954        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 955            return CObs(self.real + other.real,
 956                        self.imag + other.imag)
 957        else:
 958            return CObs(self.real + other, self.imag)
 959
 960    def __radd__(self, y):
 961        return self + y
 962
 963    def __sub__(self, other):
 964        if isinstance(other, np.ndarray):
 965            return -1 * (other - self)
 966        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 967            return CObs(self.real - other.real, self.imag - other.imag)
 968        else:
 969            return CObs(self.real - other, self.imag)
 970
 971    def __rsub__(self, other):
 972        return -1 * (self - other)
 973
 974    def __mul__(self, other):
 975        if isinstance(other, np.ndarray):
 976            return other * self
 977        elif hasattr(other, 'real') and hasattr(other, 'imag'):
 978            if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]):
 979                return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3],
 980                                               [self.real, other.real, self.imag, other.imag],
 981                                               man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]),
 982                            derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3],
 983                                               [self.real, other.real, self.imag, other.imag],
 984                                               man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value]))
 985            elif getattr(other, 'imag', 0) != 0:
 986                return CObs(self.real * other.real - self.imag * other.imag,
 987                            self.imag * other.real + self.real * other.imag)
 988            else:
 989                return CObs(self.real * other.real, self.imag * other.real)
 990        else:
 991            return CObs(self.real * other, self.imag * other)
 992
 993    def __rmul__(self, other):
 994        return self * other
 995
 996    def __truediv__(self, other):
 997        if isinstance(other, np.ndarray):
 998            return 1 / (other / self)
 999        elif hasattr(other, 'real') and hasattr(other, 'imag'):
1000            r = other.real ** 2 + other.imag ** 2
1001            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r)
1002        else:
1003            return CObs(self.real / other, self.imag / other)
1004
1005    def __rtruediv__(self, other):
1006        r = self.real ** 2 + self.imag ** 2
1007        if hasattr(other, 'real') and hasattr(other, 'imag'):
1008            return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r)
1009        else:
1010            return CObs(self.real * other / r, -self.imag * other / r)
1011
1012    def __abs__(self):
1013        return np.sqrt(self.real**2 + self.imag**2)
1014
1015    def __pos__(self):
1016        return self
1017
1018    def __neg__(self):
1019        return -1 * self
1020
1021    def __eq__(self, other):
1022        return self.real == other.real and self.imag == other.imag
1023
1024    def __str__(self):
1025        return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)'
1026
1027    def __repr__(self):
1028        return 'CObs[' + str(self) + ']'
1029
1030    def __format__(self, format_type):
1031        if format_type == "":
1032            significance = 2
1033            format_type = "2"
1034        else:
1035            significance = int(float(format_type.replace("+", "").replace("-", "")))
1036        return f"({self.real:{format_type}}{self.imag:+{significance}}j)"

Class for a complex valued observable.

CObs(real, imag=0.0) View Source

924    def __init__(self, real, imag=0.0):
925        self._real = real
926        self._imag = imag
927        self.tag = None

tag

real View Source

929    @property
930    def real(self):
931        return self._real

imag View Source

933    @property
934    def imag(self):
935        return self._imag

def gamma_method(self, **kwargs): View Source

937    def gamma_method(self, **kwargs):
938        """Executes the gamma_method for the real and the imaginary part."""
939        if isinstance(self.real, Obs):
940            self.real.gamma_method(**kwargs)
941        if isinstance(self.imag, Obs):
942            self.imag.gamma_method(**kwargs)

Executes the gamma_method for the real and the imaginary part.

def is_zero(self): View Source

944    def is_zero(self):
945        """Checks whether both real and imaginary part are zero within machine precision."""
946        return self.real == 0.0 and self.imag == 0.0

Checks whether both real and imaginary part are zero within machine precision.

def conjugate(self): View Source

948    def conjugate(self):
949        return CObs(self.real, -self.imag)

def gamma_method(x, **kwargs): View Source

1039def gamma_method(x, **kwargs):
1040    """Vectorized version of the gamma_method applicable to lists or arrays of Obs.
1041
1042    See docstring of pe.Obs.gamma_method for details.
1043    """
1044    return np.vectorize(lambda o: o.gm(**kwargs))(x)

Vectorized version of the gamma_method applicable to lists or arrays of Obs.

See docstring of pe.Obs.gamma_method for details.

def gm(x, **kwargs): View Source

1039def gamma_method(x, **kwargs):
1040    """Vectorized version of the gamma_method applicable to lists or arrays of Obs.
1041
1042    See docstring of pe.Obs.gamma_method for details.
1043    """
1044    return np.vectorize(lambda o: o.gm(**kwargs))(x)

Vectorized version of the gamma_method applicable to lists or arrays of Obs.

See docstring of pe.Obs.gamma_method for details.

def derived_observable(func, data, array_mode=False, **kwargs): View Source

1174def derived_observable(func, data, array_mode=False, **kwargs):
1175    """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
1176
1177    Parameters
1178    ----------
1179    func : object
1180        arbitrary function of the form func(data, **kwargs). For the
1181        automatic differentiation to work, all numpy functions have to have
1182        the autograd wrapper (use 'import autograd.numpy as anp').
1183    data : list
1184        list of Obs, e.g. [obs1, obs2, obs3].
1185    num_grad : bool
1186        if True, numerical derivatives are used instead of autograd
1187        (default False). To control the numerical differentiation the
1188        kwargs of numdifftools.step_generators.MaxStepGenerator
1189        can be used.
1190    man_grad : list
1191        manually supply a list or an array which contains the jacobian
1192        of func. Use cautiously, supplying the wrong derivative will
1193        not be intercepted.
1194
1195    Notes
1196    -----
1197    For simple mathematical operations it can be practical to use anonymous
1198    functions. For the ratio of two observables one can e.g. use
1199
1200    new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1201    """
1202
1203    data = np.asarray(data)
1204    raveled_data = data.ravel()
1205
1206    # Workaround for matrix operations containing non Obs data
1207    if not all(isinstance(x, Obs) for x in raveled_data):
1208        for i in range(len(raveled_data)):
1209            if isinstance(raveled_data[i], (int, float)):
1210                raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###")
1211
1212    allcov = {}
1213    for o in raveled_data:
1214        for name in o.cov_names:
1215            if name in allcov:
1216                if not np.allclose(allcov[name], o.covobs[name].cov):
1217                    raise Exception('Inconsistent covariance matrices for %s!' % (name))
1218            else:
1219                allcov[name] = o.covobs[name].cov
1220
1221    n_obs = len(raveled_data)
1222    new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x]))
1223    new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x]))
1224    new_sample_names = sorted(set(new_names) - set(new_cov_names))
1225
1226    reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0
1227
1228    if data.ndim == 1:
1229        values = np.array([o.value for o in data])
1230    else:
1231        values = np.vectorize(lambda x: x.value)(data)
1232
1233    new_values = func(values, **kwargs)
1234
1235    multi = int(isinstance(new_values, np.ndarray))
1236
1237    new_r_values = {}
1238    new_idl_d = {}
1239    for name in new_sample_names:
1240        idl = []
1241        tmp_values = np.zeros(n_obs)
1242        for i, item in enumerate(raveled_data):
1243            tmp_values[i] = item.r_values.get(name, item.value)
1244            tmp_idl = item.idl.get(name)
1245            if tmp_idl is not None:
1246                idl.append(tmp_idl)
1247        if multi > 0:
1248            tmp_values = np.array(tmp_values).reshape(data.shape)
1249        new_r_values[name] = func(tmp_values, **kwargs)
1250        new_idl_d[name] = _merge_idx(idl)
1251
1252    def _compute_scalefactor_missing_rep(obs):
1253        """
1254        Computes the scale factor that is to be multiplied with the deltas
1255        in the case where Obs with different subsets of replica are merged.
1256        Returns a dictionary with the scale factor for each Monte Carlo name.
1257
1258        Parameters
1259        ----------
1260        obs : Obs
1261            The observable corresponding to the deltas that are to be scaled
1262        """
1263        scalef_d = {}
1264        for mc_name in obs.mc_names:
1265            mc_idl_d = [name for name in obs.idl if name.startswith(mc_name + '|')]
1266            new_mc_idl_d = [name for name in new_idl_d if name.startswith(mc_name + '|')]
1267            if len(mc_idl_d) > 0 and len(mc_idl_d) < len(new_mc_idl_d):
1268                scalef_d[mc_name] = sum([len(new_idl_d[name]) for name in new_mc_idl_d]) / sum([len(new_idl_d[name]) for name in mc_idl_d])
1269        return scalef_d
1270
1271    if 'man_grad' in kwargs:
1272        deriv = np.asarray(kwargs.get('man_grad'))
1273        if new_values.shape + data.shape != deriv.shape:
1274            raise Exception('Manual derivative does not have correct shape.')
1275    elif kwargs.get('num_grad') is True:
1276        if multi > 0:
1277            raise Exception('Multi mode currently not supported for numerical derivative')
1278        options = {
1279            'base_step': 0.1,
1280            'step_ratio': 2.5}
1281        for key in options.keys():
1282            kwarg = kwargs.get(key)
1283            if kwarg is not None:
1284                options[key] = kwarg
1285        tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs)
1286        if tmp_df.size == 1:
1287            deriv = np.array([tmp_df.real])
1288        else:
1289            deriv = tmp_df.real
1290    else:
1291        deriv = jacobian(func)(values, **kwargs)
1292
1293    final_result = np.zeros(new_values.shape, dtype=object)
1294
1295    if array_mode is True:
1296
1297        class _Zero_grad():
1298            def __init__(self, N):
1299                self.grad = np.zeros((N, 1))
1300
1301        new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x]))
1302        d_extracted = {}
1303        g_extracted = {}
1304        for name in new_sample_names:
1305            d_extracted[name] = []
1306            ens_length = len(new_idl_d[name])
1307            for i_dat, dat in enumerate(data):
1308                d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name], _compute_scalefactor_missing_rep(o).get(name.split('|')[0], 1)) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, )))
1309        for name in new_cov_names:
1310            g_extracted[name] = []
1311            zero_grad = _Zero_grad(new_covobs_lengths[name])
1312            for i_dat, dat in enumerate(data):
1313                g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1)))
1314
1315    for i_val, new_val in np.ndenumerate(new_values):
1316        new_deltas = {}
1317        new_grad = {}
1318        if array_mode is True:
1319            for name in new_sample_names:
1320                ens_length = d_extracted[name][0].shape[-1]
1321                new_deltas[name] = np.zeros(ens_length)
1322                for i_dat, dat in enumerate(d_extracted[name]):
1323                    new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1324            for name in new_cov_names:
1325                new_grad[name] = 0
1326                for i_dat, dat in enumerate(g_extracted[name]):
1327                    new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat)
1328        else:
1329            for j_obs, obs in np.ndenumerate(data):
1330                scalef_d = _compute_scalefactor_missing_rep(obs)
1331                for name in obs.names:
1332                    if name in obs.cov_names:
1333                        new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad
1334                    else:
1335                        new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name], scalef_d.get(name.split('|')[0], 1))
1336
1337        new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad}
1338
1339        if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()):
1340            raise Exception('The same name has been used for deltas and covobs!')
1341        new_samples = []
1342        new_means = []
1343        new_idl = []
1344        new_names_obs = []
1345        for name in new_names:
1346            if name not in new_covobs:
1347                new_samples.append(new_deltas[name])
1348                new_idl.append(new_idl_d[name])
1349                new_means.append(new_r_values[name][i_val])
1350                new_names_obs.append(name)
1351        final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl)
1352        for name in new_covobs:
1353            final_result[i_val].names.append(name)
1354        final_result[i_val]._covobs = new_covobs
1355        final_result[i_val]._value = new_val
1356        final_result[i_val].reweighted = reweighted
1357
1358    if multi == 0:
1359        final_result = final_result.item()
1360
1361    return final_result

Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.

Parameters

func (object): arbitrary function of the form func(data, **kwargs). For the automatic differentiation to work, all numpy functions have to have the autograd wrapper (use 'import autograd.numpy as anp').
data (list): list of Obs, e.g. [obs1, obs2, obs3].
num_grad (bool): if True, numerical derivatives are used instead of autograd (default False). To control the numerical differentiation the kwargs of numdifftools.step_generators.MaxStepGenerator can be used.
man_grad (list): manually supply a list or an array which contains the jacobian of func. Use cautiously, supplying the wrong derivative will not be intercepted.

Notes

For simple mathematical operations it can be practical to use anonymous functions. For the ratio of two observables one can e.g. use

new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])

def reweight(weight, obs, **kwargs): View Source

1393def reweight(weight, obs, **kwargs):
1394    """Reweight a list of observables.
1395
1396    Parameters
1397    ----------
1398    weight : Obs
1399        Reweighting factor. An Observable that has to be defined on a superset of the
1400        configurations in obs[i].idl for all i.
1401    obs : list
1402        list of Obs, e.g. [obs1, obs2, obs3].
1403    all_configs : bool
1404        if True, the reweighted observables are normalized by the average of
1405        the reweighting factor on all configurations in weight.idl and not
1406        on the configurations in obs[i].idl. Default False.
1407    """
1408    result = []
1409    for i in range(len(obs)):
1410        if len(obs[i].cov_names):
1411            raise Exception('Error: Not possible to reweight an Obs that contains covobs!')
1412        if not set(obs[i].names).issubset(weight.names):
1413            raise Exception('Error: Ensembles do not fit')
1414        for name in obs[i].names:
1415            if not set(obs[i].idl[name]).issubset(weight.idl[name]):
1416                raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
1417        new_samples = []
1418        w_deltas = {}
1419        for name in sorted(obs[i].names):
1420            w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name])
1421            new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name]))
1422        tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1423
1424        if kwargs.get('all_configs'):
1425            new_weight = weight
1426        else:
1427            new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)])
1428
1429        result.append(tmp_obs / new_weight)
1430        result[-1].reweighted = True
1431
1432    return result

Reweight a list of observables.

Parameters

weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
obs (list): list of Obs, e.g. [obs1, obs2, obs3].
all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.

def correlate(obs_a, obs_b): View Source

1435def correlate(obs_a, obs_b):
1436    """Correlate two observables.
1437
1438    Parameters
1439    ----------
1440    obs_a : Obs
1441        First observable
1442    obs_b : Obs
1443        Second observable
1444
1445    Notes
1446    -----
1447    Keep in mind to only correlate primary observables which have not been reweighted
1448    yet. The reweighting has to be applied after correlating the observables.
1449    Currently only works if ensembles are identical (this is not strictly necessary).
1450    """
1451
1452    if sorted(obs_a.names) != sorted(obs_b.names):
1453        raise Exception(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}")
1454    if len(obs_a.cov_names) or len(obs_b.cov_names):
1455        raise Exception('Error: Not possible to correlate Obs that contain covobs!')
1456    for name in obs_a.names:
1457        if obs_a.shape[name] != obs_b.shape[name]:
1458            raise Exception('Shapes of ensemble', name, 'do not fit')
1459        if obs_a.idl[name] != obs_b.idl[name]:
1460            raise Exception('idl of ensemble', name, 'do not fit')
1461
1462    if obs_a.reweighted is True:
1463        warnings.warn("The first observable is already reweighted.", RuntimeWarning)
1464    if obs_b.reweighted is True:
1465        warnings.warn("The second observable is already reweighted.", RuntimeWarning)
1466
1467    new_samples = []
1468    new_idl = []
1469    for name in sorted(obs_a.names):
1470        new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name]))
1471        new_idl.append(obs_a.idl[name])
1472
1473    o = Obs(new_samples, sorted(obs_a.names), idl=new_idl)
1474    o.reweighted = obs_a.reweighted or obs_b.reweighted
1475    return o

Correlate two observables.

Parameters

obs_a (Obs): First observable
obs_b (Obs): Second observable

Notes

Keep in mind to only correlate primary observables which have not been reweighted yet. The reweighting has to be applied after correlating the observables. Currently only works if ensembles are identical (this is not strictly necessary).

def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs): View Source

1478def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
1479    r'''Calculates the error covariance matrix of a set of observables.
1480
1481    WARNING: This function should be used with care, especially for observables with support on multiple
1482             ensembles with differing autocorrelations. See the notes below for details.
1483
1484    The gamma method has to be applied first to all observables.
1485
1486    Parameters
1487    ----------
1488    obs : list or numpy.ndarray
1489        List or one dimensional array of Obs
1490    visualize : bool
1491        If True plots the corresponding normalized correlation matrix (default False).
1492    correlation : bool
1493        If True the correlation matrix instead of the error covariance matrix is returned (default False).
1494    smooth : None or int
1495        If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue
1496        smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the
1497        largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely
1498        small ones.
1499
1500    Notes
1501    -----
1502    The error covariance is defined such that it agrees with the squared standard error for two identical observables
1503    $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$
1504    in the absence of autocorrelation.
1505    The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite
1506    $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags.
1507    For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements.
1508    $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$
1509    This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1510    '''
1511
1512    length = len(obs)
1513
1514    max_samples = np.max([o.N for o in obs])
1515    if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]:
1516        warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning)
1517
1518    cov = np.zeros((length, length))
1519    for i in range(length):
1520        for j in range(i, length):
1521            cov[i, j] = _covariance_element(obs[i], obs[j])
1522    cov = cov + cov.T - np.diag(np.diag(cov))
1523
1524    corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov)))
1525
1526    if isinstance(smooth, int):
1527        corr = _smooth_eigenvalues(corr, smooth)
1528
1529    if visualize:
1530        plt.matshow(corr, vmin=-1, vmax=1)
1531        plt.set_cmap('RdBu')
1532        plt.colorbar()
1533        plt.draw()
1534
1535    if correlation is True:
1536        return corr
1537
1538    errors = [o.dvalue for o in obs]
1539    cov = np.diag(errors) @ corr @ np.diag(errors)
1540
1541    eigenvalues = np.linalg.eigh(cov)[0]
1542    if not np.all(eigenvalues >= 0):
1543        warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning)
1544
1545    return cov

Calculates the error covariance matrix of a set of observables.

WARNING: This function should be used with care, especially for observables with support on multiple ensembles with differing autocorrelations. See the notes below for details.

The gamma method has to be applied first to all observables.

Parameters

obs (list or numpy.ndarray): List or one dimensional array of Obs
visualize (bool): If True plots the corresponding normalized correlation matrix (default False).
correlation (bool): If True the correlation matrix instead of the error covariance matrix is returned (default False).
smooth (None or int): If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely small ones.

Notes

The error covariance is defined such that it agrees with the squared standard error for two identical observables $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ in the absence of autocorrelation. The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).

def import_jackknife(jacks, name, idl=None): View Source

1625def import_jackknife(jacks, name, idl=None):
1626    """Imports jackknife samples and returns an Obs
1627
1628    Parameters
1629    ----------
1630    jacks : numpy.ndarray
1631        numpy array containing the mean value as zeroth entry and
1632        the N jackknife samples as first to Nth entry.
1633    name : str
1634        name of the ensemble the samples are defined on.
1635    """
1636    length = len(jacks) - 1
1637    prj = (np.ones((length, length)) - (length - 1) * np.identity(length))
1638    samples = jacks[1:] @ prj
1639    mean = np.mean(samples)
1640    new_obs = Obs([samples - mean], [name], idl=idl, means=[mean])
1641    new_obs._value = jacks[0]
1642    return new_obs

Imports jackknife samples and returns an Obs

Parameters

jacks (numpy.ndarray): numpy array containing the mean value as zeroth entry and the N jackknife samples as first to Nth entry.
name (str): name of the ensemble the samples are defined on.

def import_bootstrap(boots, name, random_numbers): View Source

1645def import_bootstrap(boots, name, random_numbers):
1646    """Imports bootstrap samples and returns an Obs
1647
1648    Parameters
1649    ----------
1650    boots : numpy.ndarray
1651        numpy array containing the mean value as zeroth entry and
1652        the N bootstrap samples as first to Nth entry.
1653    name : str
1654        name of the ensemble the samples are defined on.
1655    random_numbers : np.ndarray
1656        Array of shape (samples, length) containing the random numbers to generate the bootstrap samples,
1657        where samples is the number of bootstrap samples and length is the length of the original Monte Carlo
1658        chain to be reconstructed.
1659    """
1660    samples, length = random_numbers.shape
1661    if samples != len(boots) - 1:
1662        raise ValueError("Random numbers do not have the correct shape.")
1663
1664    if samples < length:
1665        raise ValueError("Obs can't be reconstructed if there are fewer bootstrap samples than Monte Carlo data points.")
1666
1667    proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length
1668
1669    samples = scipy.linalg.lstsq(proj, boots[1:])[0]
1670    ret = Obs([samples], [name])
1671    ret._value = boots[0]
1672    return ret

Imports bootstrap samples and returns an Obs

Parameters

boots (numpy.ndarray): numpy array containing the mean value as zeroth entry and the N bootstrap samples as first to Nth entry.
name (str): name of the ensemble the samples are defined on.
random_numbers (np.ndarray): Array of shape (samples, length) containing the random numbers to generate the bootstrap samples, where samples is the number of bootstrap samples and length is the length of the original Monte Carlo chain to be reconstructed.

def merge_obs(list_of_obs): View Source

1675def merge_obs(list_of_obs):
1676    """Combine all observables in list_of_obs into one new observable
1677
1678    Parameters
1679    ----------
1680    list_of_obs : list
1681        list of the Obs object to be combined
1682
1683    Notes
1684    -----
1685    It is not possible to combine obs which are based on the same replicum
1686    """
1687    replist = [item for obs in list_of_obs for item in obs.names]
1688    if (len(replist) == len(set(replist))) is False:
1689        raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist)))
1690    if any([len(o.cov_names) for o in list_of_obs]):
1691        raise Exception('Not possible to merge data that contains covobs!')
1692    new_dict = {}
1693    idl_dict = {}
1694    for o in list_of_obs:
1695        new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0)
1696                        for key in set(o.deltas) | set(o.r_values)})
1697        idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)})
1698
1699    names = sorted(new_dict.keys())
1700    o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names])
1701    o.reweighted = np.max([oi.reweighted for oi in list_of_obs])
1702    return o

Combine all observables in list_of_obs into one new observable

Parameters

list_of_obs (list): list of the Obs object to be combined

Notes

It is not possible to combine obs which are based on the same replicum

def cov_Obs(means, cov, name, grad=None): View Source

1705def cov_Obs(means, cov, name, grad=None):
1706    """Create an Obs based on mean(s) and a covariance matrix
1707
1708    Parameters
1709    ----------
1710    mean : list of floats or float
1711        N mean value(s) of the new Obs
1712    cov : list or array
1713        2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
1714    name : str
1715        identifier for the covariance matrix
1716    grad : list or array
1717        Gradient of the Covobs wrt. the means belonging to cov.
1718    """
1719
1720    def covobs_to_obs(co):
1721        """Make an Obs out of a Covobs
1722
1723        Parameters
1724        ----------
1725        co : Covobs
1726            Covobs to be embedded into the Obs
1727        """
1728        o = Obs([], [], means=[])
1729        o._value = co.value
1730        o.names.append(co.name)
1731        o._covobs[co.name] = co
1732        o._dvalue = np.sqrt(co.errsq())
1733        return o
1734
1735    ol = []
1736    if isinstance(means, (float, int)):
1737        means = [means]
1738
1739    for i in range(len(means)):
1740        ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad)))
1741    if ol[0].covobs[name].N != len(means):
1742        raise Exception('You have to provide %d mean values!' % (ol[0].N))
1743    if len(ol) == 1:
1744        return ol[0]
1745    return ol

Create an Obs based on mean(s) and a covariance matrix

Parameters

mean (list of floats or float): N mean value(s) of the new Obs
cov (list or array): 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
name (str): identifier for the covariance matrix
grad (list or array): Gradient of the Covobs wrt. the means belonging to cov.