Source code for scine_autocas

"""Module to provide the Autocas class, which controlls all active space
related methods."""
# -*- coding: utf-8 -*-
__copyright__ = """This code is licensed under the 3-clause BSD license.
Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group.
See LICENSE.txt for details. """

from typing import Any, Dict, List, Optional, Tuple, Union, cast

import numpy as np

from scine_autocas.autocas_utils.active_space import ActiveSpace
from scine_autocas.autocas_utils.diagnostics import Diagnostics
from scine_autocas.autocas_utils.large_active_spaces import LargeSpaces
from scine_autocas.autocas_utils.molecule import Molecule

from ._version import __version__  # noqa: F401


[docs]class SingleReferenceException(Exception):
    """
    Raised when autoCAS determines that the system is single reference
    instead of multireference. This allows the client to catch the exception
    in case they want to perform a single reference calculation afterward.
    """
    pass


[docs]class Autocas:
    """
    Main class to handle all autoCAS functionality.

    Attributes
    ----------
    plateau_values : int, default = 10
        determines the minimum length of a plateau
    threshold_step : float, default 0.01
        determines the threshold steps in the plateau search and the number of threshold, respectively
    cas : ActiveSpace
        stores information on the active space
    molecule : Molecule
        stores information about the current molecule
    diagnostics : Diagnostics
        stores information from the DMRG calculation
    large_spaces : LargeSpaces
        controls settings for the large active space protocol
    _excited_states_orbital_indices : List[int]
        stores excited states CAS indices
    _excited_states_mod : int
        handles excited states in combination with large cas
    """

    __slots__ = (
        "plateau_values",
        "threshold_step",
        "cas",
        "molecule",
        "diagnostics",
        "large_spaces",
        "_excited_states_orbital_indices",
        "_excited_states_mod",
    )

[docs]    def __init__(self, molecule: Molecule, settings_dict: Optional[Dict[str, Any]] = None):
        """Initialize an autocas object.

        The autocas class provides all functionalities to set up active spaces from molecular information,
        or to apply an active space search based on entopy information from a dmrg calculation.

        Parameters
        ----------
        molecule : Molecule
            stores molecular information
        settings_dict : Dict[str, Any], optional
            holds settings from an autocas yaml input file
        """
        self.plateau_values: int = 10
        """determines the minimum length of a plateau"""
        self.threshold_step: float = 0.01
        """determines the threshold steps in the plateau search and the number of threshold, respectively"""
        self.cas: ActiveSpace = ActiveSpace()
        """stores information on the active space"""
        self.molecule: Molecule = molecule
        """stores information about the current molecule"""
        self.diagnostics: Diagnostics = Diagnostics()
        """stores information from the DMRG calculation"""
        self.large_spaces: LargeSpaces = LargeSpaces()
        """controls settings for the large active space protocol"""
        self._excited_states_orbital_indices: List[int] = []
        """stores excited states CAS indices"""
        self._excited_states_mod: int = -1
        """handles excited states in combination with large CAS"""

        if settings_dict is not None:
            for key in settings_dict:
                if key == "diagnostics":
                    self.diagnostics = Diagnostics(settings_dict[key])
                elif key == "large_spaces":
                    self.large_spaces = LargeSpaces(settings_dict[key])
                elif key == "cas":
                    self.cas = ActiveSpace(settings_dict[key])
                elif hasattr(self, key):
                    setattr(self, key, settings_dict[key])

[docs]    def make_initial_active_space(self) -> Tuple[List[int], List[int]]:
        """Build valence active space.

        Returns
        -------
        cas_occupations : List[int]
            a list with int occupations, e.g. 2: doubly occupied, 1: singly occupied, 0: virtual.
        cas_indices : List[int]
            contains the indices of the orbitals in the active space
        """
        self.cas.make_valence_cas(self.molecule)
        return self.cas.occupation, self.cas.orbital_indices

[docs]    def _sort_orbitals_by_s1(
        self, thresholds_list: np.ndarray, orbitals_index: np.ndarray
    ) -> Tuple[np.ndarray, np.ndarray]:
        """Sort orbital indices with respect to their threshold value.

        Parameters
        ----------
        thresholds_list : List[float]
            contains the thresholds of s1 values e.g. s1_i / max(s1)
        orbitals_index : List[int]
            a list with orbitals indices

        Returns
        -------
        thresholds_list : List[float]
            the ordered tresholds_list
        orbitals_index : List[int]
            the ordered orbitals_index with respect to the thresholds
        """
        # sort arrays decreasing
        sortkey = np.argsort(-thresholds_list)
        thresholds_list = thresholds_list[sortkey]
        orbitals_index = orbitals_index[sortkey]
        return thresholds_list, orbitals_index

[docs]    def _get_plateau(self, thresholds_list: np.ndarray) -> List[int]:
        """Create a plateau vector from s1 values.

        A plateau is complete after not changing the number of orbitals over 10%, e.g. self.plateau_values.

        Parameters
        ----------
        thresholds_list : np.ndarray
            contains fractions of s1 values, e.g. s1_i / max(s1)

        Returns
        -------
        plateau_vector : List[int]
            contains the indices from the plateau
        """
        percent = np.arange(0, 1.0, self.threshold_step)
        cas_orbs_over_threshold = np.zeros(int(1 / self.threshold_step + 1))
        for i, _ in enumerate(percent):
            cas_orbs_over_threshold[i] = sum(thresholds_list > percent[i])
        number_cas_orbs = cas_orbs_over_threshold[0]
        plateau_vector = []
        thresh_count = 1
        for i, threshold in enumerate(cas_orbs_over_threshold):
            if threshold == number_cas_orbs:
                thresh_count = thresh_count + 1
            else:
                thresh_count = 1
                number_cas_orbs = threshold
            if thresh_count >= self.plateau_values:
                plateau_vector.append(int(threshold))
        return plateau_vector

[docs]    def _find_plateaus(self) -> Tuple[List[int], np.ndarray]:
        """Search for a plateau of s1 values.

        Returns
        -------
        plateau_vector : List[int]
            the indices from the plateau
        orbitals_index : np.ndarray
                indices of the orbitals in the active space
        """
        max_s1 = max(self.diagnostics.s1_entropy)
        print(f"Maximal single orbital entropy: {max_s1}")
        if self._excited_states_orbital_indices:
            orbitals_index = np.array(self._excited_states_orbital_indices)
        else:
            orbitals_index = np.array(np.arange(1, self.cas.n_orbitals + 1))

        thresholds_list = np.zeros((self.cas.n_orbitals))
        # print(self.cas.n_orbitals)

        for i in range(self.cas.n_orbitals):
            # print(i)
            thresholds_list[i] = self.diagnostics.s1_entropy[i] / max_s1
        thresholds_list, orbitals_index = self._sort_orbitals_by_s1(
            thresholds_list, orbitals_index
        )
        plateau_vector = self._get_plateau(thresholds_list)
        return plateau_vector, orbitals_index

[docs]    def _make_active_space(self) -> bool:
        """Make active space by searching plateaus in s1 and reordering
        orbitals.

        Returns
        -------
        bool
            True if a plateau was found, e.g. a smaller active space; False if no plateau was found
        """
        plateau_vector, orbitals_index = self._find_plateaus()
        # found plateau

        if len(plateau_vector) != 0:
            self.cas.get_from_plateau(
                plateau_vector, list(orbitals_index.tolist())
            )
            print(f"Found plateau, including {self.cas.n_orbitals} orbitals.")
            return True
        return False

[docs]    def get_active_space(
        self, occupation: List[int], s1_entropy: np.ndarray, s2_entropy: Optional[np.ndarray] = None,
            mutual_information: Optional[np.ndarray] = None, force_cas: Optional[bool] = False,
            xyz_file: Optional[str] = None, molecule: Optional[Molecule] = None
    ) -> Tuple[List[int], List[int]]:
        """Get active space.

        autoCAS uses single orbital entropies to decrease the active space size by finding plateaus and
        including only orbitals with large single orbital entropies

        Parameters
        ----------
        occupation : List[int]
            orbital occupation for each orbital in active space
        s1_entropy : np.ndarray
            single orbital entropy
        s2_entropy : np.ndarray, optional
            two orbital entropy, not required right now
        mutual_information : np.ndarray, optional
            mutual information, not required right now
        force_cas :: bool
            forces to find an active space, even if no orbital has a high single orbital entropy

        Returns
        -------
        cas_occupations : List[int]
            the occupation of each orbital in the active space,
            e.g. 2: doubly occupied, 1: singly occupied, 0: virtual orbital
        cas_indices : List[int]
            indices of orbitals for the CAS calculation

        Raises
        ------
        AttributeError
            if no xyz file was provided
        FileNotFoundError
            if provided xyz file cannot be found
        Exception
            if the entropies indicate, that no cas is required, and not forced
        NotImplementedError
            if a larger cas than valence is theoretically required
        """

        print("")
        print("*******************************************************************************************")
        print("*                                                                                         *")
        print("*                                 Active space search                                     *")
        print("*                                                                                         *")
        print("*******************************************************************************************")

        # check if molecule is set
        if self.molecule.core_orbitals != -1 and molecule is None:
            pass
        else:
            try:
                if molecule is not None:
                    self.molecule = molecule
                elif xyz_file is not None:
                    self.molecule = Molecule(xyz_file)
                else:
                    raise AttributeError("No xyz-file provided.")
            except FileNotFoundError as exc:
                raise FileNotFoundError("No xyz-file provided.") from exc

        # check if excited states and large cas is used
        if self._excited_states_mod < -19:
            self._excited_states_mod = self.cas.n_orbitals

        # check if excited states are used
        if self._excited_states_orbital_indices is None:
            self.cas.update(self.molecule, occupation)
        else:
            self.cas.update(self.molecule, occupation, self._excited_states_orbital_indices)

        # for excited states and large cas
        if self._excited_states_mod > 1:
            self.cas.n_orbitals = self._excited_states_mod

        # set diagnostics
        self.diagnostics.s1_entropy = s1_entropy
        if s2_entropy is not None:
            self.diagnostics.s2_entropy = s2_entropy
        else:
            self.diagnostics.s2_entropy = np.zeros((self.cas.n_orbitals, self.cas.n_orbitals))
        if mutual_information is not None:
            self.diagnostics.mutual_information = mutual_information
        else:
            self.diagnostics.mutual_information = np.zeros((self.cas.n_orbitals, self.cas.n_orbitals))

        self.cas.n_orbitals = len(s1_entropy)
        output_string = f"Start active space search for provided set of {self.cas.n_orbitals} orbitals"
        output_string += f" and {int(sum(occupation))} electrons."
        print(output_string)

        # check for single reference
        if self.diagnostics.is_single_reference() and not force_cas:
            print("Single orbital entropies (s1) are too low. AutoCAS suggests no active space.")
            print("Note: if you want an active space calculation anyway, you can enforce this")
            print("behavior, by adding 'force_cas=True' as agrument.")
            raise SingleReferenceException("No active space method required here")

        # make active space
        if self._make_active_space():
            print("Found smaller active space.")
            return self._pretty_return()

        print("No plateau found: autoCAS tries to exclude orbitals with low single orbital entropy.")

        self.cas.orbital_indices = []
        self.diagnostics.s1_entropy = np.array(self.cas.exclude_orbitals(self.diagnostics))

        if len(self.diagnostics.s1_entropy) < len(s1_entropy):
            occupation = self.cas.occupation
            self._excited_states_orbital_indices = self.cas.orbital_indices
            print("Successfully excluded orbitals with low single orbital entropy.")
            return self.get_active_space(
                occupation,
                self.diagnostics.s1_entropy,
                s2_entropy=self.diagnostics.s2_entropy,
                mutual_information=self.diagnostics.mutual_information,
                force_cas=force_cas,
                xyz_file=xyz_file,
            )
        self._excited_states_orbital_indices = []
        if self.cas.excluded and force_cas:
            print("Still no plateau found after excluding orbitals with low entropy.")
            return self._pretty_return()
        if force_cas:
            print("Enforcing CAS, even though there was no plateau.")
            return self._pretty_return()
        print("Not able to reduce active space size.")
        raise NotImplementedError("Repeat analysis with large active space.")
        # TODO: repeat analysis with larger cas

[docs]    def _pretty_return(self):
        """Ensure that cas occupation is always list of int."""
        self.cas.occupation = [int(i) for i in self.cas.occupation]
        print(f"Selected active space CAS(e, o): ({sum(self.cas.occupation)}, {len(self.cas.occupation)})")
        print(f"Orbital indices:     {self.cas.orbital_indices}")
        print(f"Orbital occupations: {self.cas.occupation}")
        return self.cas.occupation, self.cas.orbital_indices

[docs]    def get_large_active_spaces(self):
        """Create a list of active spaces for the large active space protocol
        in autoCAS.

        Returns
        -------
        occupation : List[List[int]]
            List of occupations for large CAS protocol
        orbital_indices : List[List[int]]
            List of orbitals indices for large CAS protocol
        """
        if self.large_spaces.max_orbitals > len(self.cas.orbital_indices):
            print(
                f"""Large CAS protocol is not required here, but it will be done anyways with
                \nmax orbitals = number of orbitals/2 = {len(self.cas.orbital_indices)/2}"""
            )
            self.large_spaces.max_orbitals = len(self.cas.orbital_indices) / 2
        occupied_orbitals, virtual_orbitals = self.large_spaces.separate_space(
            self.cas, self.molecule
        )
        self.large_spaces.generate_spaces(
            occupied_orbitals, virtual_orbitals, self.molecule
        )
        return self.large_spaces.occupation, self.large_spaces.orbital_indices

[docs]    def collect_entropies(
        self,
        indices_list: List[List[int]],
        occupations_list: List[List[int]],
        s1_list: List[np.ndarray],
        s2_list: Optional[List[np.ndarray]] = None,
        mut_inf_list: Optional[List[np.ndarray]] = None,
    ) -> Union[
        Tuple[np.ndarray, np.ndarray],
        Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray],
    ]:
        """Fill entropy matrices from lists of entropies produced by the large
        active space protocol.

        Parameters
        ----------
        indices_list : List[List[int]]
            contains several orbital indices from each active space calculation of the large CAS protocol
        occupations_list : List[List[int]]
            contains several orbital occupations for each active space calculation of the large CAS protocol
        s1_list : List[np.ndarray]
            contains several s1 entropies from the large CAS protocol
        s2_list : List[np.ndarray]
            contains several s2 entropies from the large CAS protocol
        mut_inf_list : List[np.ndarray]
            contains several mutual information from the large CAS protocol

        Returns
        -------
        occupation : List[int]
            the occupation for the large active space
        s1_entropy : np.ndarray
            s1 entropy build and scaled from several entropies
        s2_entropy : np.ndarray, optional
            s2 entropy build and scaled from several entropies
        mut_inf : np.ndarray, optional
            mutual information build and scaled from several mutual information
        """
        if self._excited_states_mod > 1:
            self.cas.n_orbitals = self._excited_states_mod
        occupation = np.zeros(self.cas.n_orbitals)
        s1_entropy = np.zeros(self.cas.n_orbitals)
        scale_1 = np.zeros(self.cas.n_orbitals)

        for i, _ in enumerate(s1_list):
            for count, index in enumerate(indices_list[i]):
                occupation[index - self.molecule.core_orbitals] = occupations_list[i][count]
                if self.large_spaces.average_entanglement:
                    s1_entropy[index - self.molecule.core_orbitals] += s1_list[i][count]
                    scale_1[index - self.molecule.core_orbitals] += 1
                else:
                    if s1_entropy[index - self.molecule.core_orbitals] < s1_list[i][count]:
                        s1_entropy[index - self.molecule.core_orbitals] = s1_list[i][count]
        if self.large_spaces.average_entanglement:
            # normalization
            for i, _ in enumerate(s1_entropy):
                s1_entropy[i] = s1_entropy[i] / scale_1[i]

        occupation = occupation.astype(int, copy=False)
        self.diagnostics.s1_entropy = s1_entropy
        self.diagnostics.s2_entropy = np.zeros((self.cas.n_orbitals, self.cas.n_orbitals))
        self.diagnostics.mutual_information = np.zeros((self.cas.n_orbitals, self.cas.n_orbitals))
        self.cas.occupation = list(occupation.tolist())

        if s2_list and mut_inf_list:
            s2_entropy = np.zeros((self.cas.n_orbitals, self.cas.n_orbitals))
            mut_inf = np.zeros((self.cas.n_orbitals, self.cas.n_orbitals))
            scale_2 = np.zeros((self.cas.n_orbitals, self.cas.n_orbitals))
            for i in range(len(s1_list)):
                for count1, index_1 in enumerate(indices_list[i]):
                    for count2, index_2 in enumerate(indices_list[i]):
                        if self.large_spaces.average_entanglement:
                            s2_entropy[
                                index_1 - self.molecule.core_orbitals,
                                index_2 - self.molecule.core_orbitals,
                            ] += s2_list[i][count1, count2]
                            mut_inf[
                                index_1 - self.molecule.core_orbitals,
                                index_2 - self.molecule.core_orbitals,
                            ] += mut_inf_list[i][count1, count2]
                            scale_2[
                                index_1 - self.molecule.core_orbitals,
                                index_2 - self.molecule.core_orbitals,
                            ] += 1
                        else:
                            if s2_entropy[
                                index_1 - self.molecule.core_orbitals,
                                index_2 - self.molecule.core_orbitals,
                            ] < s2_list[i][count1, count2]:
                                s2_entropy[
                                    index_1 - self.molecule.core_orbitals,
                                    index_2 - self.molecule.core_orbitals,
                                ] = s2_list[i][count1, count2]
                            if mut_inf[
                                index_1 - self.molecule.core_orbitals,
                                index_2 - self.molecule.core_orbitals,
                            ] < mut_inf_list[i][count1, count2]:
                                mut_inf[
                                    index_1 - self.molecule.core_orbitals,
                                    index_2 - self.molecule.core_orbitals,
                                ] = mut_inf_list[i][count1, count2]
            if self.large_spaces.average_entanglement:
                # normalization
                for i in range(len(s1_entropy)):
                    for j in range(len(s1_entropy)):
                        if scale_2[i, j] != 0:
                            s2_entropy[i, j] = s2_entropy[i, j] / scale_2[i, j]
                            mut_inf[i, j] = mut_inf[i, j] / scale_2[i, j]
            self.diagnostics.s2_entropy = s2_entropy
            self.diagnostics.mutual_information = mut_inf
            return occupation, s1_entropy, s2_entropy, mut_inf

        return occupation, s1_entropy

[docs]    def get_cas_from_large_cas(
        self,
        indices_list: List[List[int]],
        occupations_list: List[List[int]],
        s1_list: List[np.ndarray],
        s2_list: Optional[List[np.ndarray]] = None,
        mut_inf_list: Optional[List[np.ndarray]] = None,
        force_cas: Optional[bool] = False,
    ) -> Tuple[List[int], List[int]]:
        """Shortcut function to directly get an active space from the large CAS
        protocol.

        Parameters
        ----------
        indices_list : List[List[int]]
            contains several orbital indices from each active space calculation of the large CAS protocol
        occupations_list : List[List[int]]
            contains several orbital occupations for each active space calculation of the large CAS protocol
        s1_list : List[np.ndarray]
            contains several s1 entropies from the large CAS protocol
        s2_list : List[np.ndarray], optional
            contains several s2 entropies from the large CAS protocol
        mut_inf_list : List[np.ndarray], optional
            contains several mutual information from the large CAS protocol
        force_cas : bool, optional, default = False
            forces to find an active space, even if no orbital has a high single orbital entropy

        Returns
        -------
        cas_occupations : List[int]
            the occupation of each orbital in the active space,
            e.g. 2: doubly occupied, 1: singly occupied, 0: virtual orbital
        cas_indices : List[int]
            indices of orbitals for the CAS calculation

        Raises
        ------
        ValueError
            if something strange happened
        """
        if s2_list and mut_inf_list:
            entropies = self.collect_entropies(
                indices_list, occupations_list, s1_list, s2_list, mut_inf_list
            )
            if len(entropies) == 4:
                entropies = cast(Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray], entropies)
                occupation = list(entropies[0].tolist())
                s1_entropy = entropies[1]
                s2_entropy = entropies[2]
                mut_inf = entropies[3]
                return self.get_active_space(occupation, s1_entropy, s2_entropy, mut_inf, force_cas=force_cas)

        entropies = self.collect_entropies(
            indices_list, occupations_list, s1_list, s2_list, mut_inf_list
        )
        if len(entropies) == 2:
            occupation = list(entropies[0].tolist())
            s1_entropy = entropies[1]
            return self.get_active_space(occupation, s1_entropy, force_cas=force_cas)

        raise ValueError("Something went wrong in get_cas_from_large_cas.")

[docs]    def get_cas_from_excited_states(
        self, occupation: List[int], s1_list: List[np.ndarray], s2_list: Optional[List[np.ndarray]] = None,
        mutual_information_list: Optional[List[np.ndarray]] = None, force_cas: Optional[bool] = False,
    ) -> Tuple[List[int], List[int]]:
        """Shortcut function to directly get an active space from excited
        states.

        Parameters
        ----------
        occupation : List[int]
            contains the orbital occupation for the active space.
        s1_list : List[np.ndarray]
            contains the one orbital entropies for each provided occupation
        s2_list : List[np.ndarray], optional
            contains the two orbital entropies for each provided occupation
        mutual_information_list : List[np.ndarray], optional
            contains the mutual information for each provided occupation

        Returns
        -------
        final_occupation : List[int]
            contains the occupations of the found active space
        final_orbital_indices : List[int]
            contains the orbital indices of the found active space
        """
        final_occupation_list = []
        final_orbital_list = []
        for i, s1_entropy in enumerate(s1_list):
            if mutual_information_list:
                if not s2_list:
                    s2_list = mutual_information_list
                final_occupation, final_orbital_indices = self.get_active_space(
                    occupation,
                    s1_entropy,
                    s2_entropy=s2_list[i],
                    mutual_information=mutual_information_list[i],
                    force_cas=force_cas,
                )
            else:
                final_occupation, final_orbital_indices = self.get_active_space(
                    occupation,
                    s1_entropy,
                    force_cas=force_cas,
                )

            final_occupation_list.append(final_occupation)
            final_orbital_list.append(final_orbital_indices)
        final_occupation = []
        final_orbital_indices = []
        for i, final_orbitals in enumerate(final_orbital_list):
            for j, final_orbital in enumerate(final_orbitals):
                if final_orbital not in final_orbital_indices:
                    final_orbital_indices.append(final_orbital)
                    final_occupation.append(final_occupation_list[i][j])
        return final_occupation, final_orbital_indices

[docs]    def get_cas_from_large_cas_excited_states(
        self, indices: List[List[int]], occupations: List[List[int]], s1_entropy: List[List[np.ndarray]],
        s2_entropy: Optional[List[List[np.ndarray]]] = None, mut_inf: Optional[List[List[np.ndarray]]] = None,
        force_cas: bool = False
    ):
        """Get active space from a large CAS excited state calculation.

        Parameters
        ----------
        indices :
            contains a list of indices for each state
        """

        self._excited_states_mod = -20

        # re-sorting entropies by states
        partial_s1_per_state: List[List[np.ndarray]] = []
        for i in range(len(s1_entropy[0])):
            partial_s1_per_state.append([])

        for i, part_s1 in enumerate(s1_entropy):
            for j, part_s1_1 in enumerate(part_s1):
                partial_s1_per_state[j].append(part_s1_1)

        if s2_entropy is not None and mut_inf is not None:

            partial_s2_per_state: List[List[np.ndarray]] = []
            partial_mut_inf_per_state: List[List[np.ndarray]] = []

            for i in range(len(s1_entropy[0])):
                partial_s2_per_state.append([])
                partial_mut_inf_per_state.append([])

            for i, part_s1 in enumerate(s1_entropy):
                for j, part_s1_1 in enumerate(part_s1):
                    partial_s2_per_state[j].append(s2_entropy[i][j])
                    partial_mut_inf_per_state[j].append(mut_inf[i][j])

        # get best active space per state
        final_occupation_list = []
        final_orbital_list = []

        final_orbital_indices = []
        final_occupation = []
        for i, partial_s1_state in enumerate(partial_s1_per_state):
            print(f"Evaluating state: {i}")
            if mut_inf is not None and s2_entropy is not None:
                occupation_per_state, orbital_indices_per_state = self.get_cas_from_large_cas(
                    indices, occupations, partial_s1_state, partial_s2_per_state[i],
                    partial_mut_inf_per_state[i], force_cas=force_cas,
                )
            else:
                occupation_per_state, orbital_indices_per_state = self.get_cas_from_large_cas(
                    indices, occupations, partial_s1_state, force_cas=force_cas,
                )

            print_n_electrons = int(sum(occupation_per_state))
            print_n_orbitals = len(occupation_per_state)
            print(
                f"Optimal CAS for current state CAS(e, o): CAS({print_n_electrons}, {print_n_orbitals})"
            )
            print(f"Optimal Indices:     {orbital_indices_per_state}")
            print(f"Optimal Occupations: {occupation_per_state}")

            final_occupation_list.append(occupation_per_state)
            final_orbital_list.append(orbital_indices_per_state)

            # merge active spaces and occupations
            for j, orbital_index in enumerate(orbital_indices_per_state):
                if orbital_index not in final_orbital_indices:
                    final_orbital_indices.append(orbital_index)
                    final_occupation.append(int(occupation_per_state[j]))

            # for i, final_orbitals in enumerate(final_orbital_list):
            #     for j, final_orbital in enumerate(final_orbitals):
            #         if final_orbital not in final_orbital_indices:
            #             final_orbital_indices.append(final_orbital)
            #             final_occupation.append(final_occupation_list[i][j])

        # sort indices and occupations
        final_orbital_indices_tmp = np.array(final_orbital_indices)
        final_occupation_tmp = np.array(final_occupation)
        sort_key = final_orbital_indices_tmp.argsort()
        final_orbital_indices_tmp = final_orbital_indices_tmp[sort_key]
        final_occupation_tmp = final_occupation_tmp[sort_key]
        final_occupation = list(final_occupation_tmp.tolist())
        final_orbital_indices = list(final_orbital_indices_tmp.tolist())

        # output
        n_states = len(final_occupation_list)
        print_n_electrons = int(sum(occupation_per_state))
        print_n_orbitals = len(occupation_per_state)
        print(f"Used entropies from {n_states} for evaluation of the active space.")
        print(f"Best active space involving all states: CAS({print_n_electrons}, {print_n_orbitals})")
        print(f"Optimal Indices:     {final_orbital_indices}")
        print(f"Optimal Occupations: {final_occupation}")

        return final_occupation, final_orbital_indices