![support@xmrig.com](/assets/img/avatar_default.png)
42 changed files with 4057 additions and 382 deletions
@ -0,0 +1,188 @@ |
|||
/*
|
|||
* Copyright © 2020 Inria. All rights reserved. |
|||
* See COPYING in top-level directory. |
|||
*/ |
|||
|
|||
/** \file
|
|||
* \brief Kinds of CPU cores. |
|||
*/ |
|||
|
|||
#ifndef HWLOC_CPUKINDS_H |
|||
#define HWLOC_CPUKINDS_H |
|||
|
|||
#include "hwloc.h" |
|||
|
|||
#ifdef __cplusplus |
|||
extern "C" { |
|||
#elif 0 |
|||
} |
|||
#endif |
|||
|
|||
/** \defgroup hwlocality_cpukinds Kinds of CPU cores
|
|||
* |
|||
* Platforms with heterogeneous CPUs may have some cores with |
|||
* different features or frequencies. |
|||
* This API exposes identical PUs in sets called CPU kinds. |
|||
* Each PU of the topology may only be in a single kind. |
|||
* |
|||
* The number of kinds may be obtained with hwloc_cpukinds_get_nr(). |
|||
* If the platform is homogeneous, there may be a single kind |
|||
* with all PUs. |
|||
* If the platform or operating system does not expose any |
|||
* information about CPU cores, there may be no kind at all. |
|||
* |
|||
* The index of the kind that describes a given CPU set |
|||
* (if any, and not partially) |
|||
* may be obtained with hwloc_cpukinds_get_by_cpuset(). |
|||
* |
|||
* From the index of a kind, it is possible to retrieve information |
|||
* with hwloc_cpukinds_get_info(): |
|||
* an abstracted efficiency value, |
|||
* and an array of info attributes |
|||
* (for instance the "CoreType" and "FrequencyMaxMHz", |
|||
* see \ref topoattrs_cpukinds). |
|||
* |
|||
* A higher efficiency value means intrinsic greater performance |
|||
* (and possibly less performance/power efficiency). |
|||
* Kinds with lower efficiency are ranked first: |
|||
* Passing 0 as \p kind_index to hwloc_cpukinds_get_info() will |
|||
* return information about the less efficient CPU kind. |
|||
* |
|||
* When available, efficiency values are gathered from the operating |
|||
* system (when \p cpukind_efficiency is set in the |
|||
* struct hwloc_topology_discovery_support array, only on Windows 10 for now). |
|||
* Otherwise hwloc tries to compute efficiencies |
|||
* by comparing CPU kinds using frequencies (on ARM), |
|||
* or core types and frequencies (on other architectures). |
|||
* The environment variable HWLOC_CPUKINDS_RANKING may be used |
|||
* to change this heuristics, see \ref envvar. |
|||
* |
|||
* If hwloc fails to rank any kind, for instance because the operating |
|||
* system does not expose efficiencies and core frequencies, |
|||
* all kinds will have an unknown efficiency (\c -1), |
|||
* and they are not indexed/ordered in any specific way. |
|||
* |
|||
* @{ |
|||
*/ |
|||
|
|||
/** \brief Get the number of different kinds of CPU cores in the topology.
|
|||
* |
|||
* \p flags must be \c 0 for now. |
|||
* |
|||
* \return The number of CPU kinds (positive integer) on success. |
|||
* \return \c 0 if no information about kinds was found. |
|||
* \return \c -1 with \p errno set to \c EINVAL if \p flags is invalid. |
|||
*/ |
|||
HWLOC_DECLSPEC int |
|||
hwloc_cpukinds_get_nr(hwloc_topology_t topology, |
|||
unsigned long flags); |
|||
|
|||
/** \brief Get the index of the CPU kind that contains CPUs listed in \p cpuset.
|
|||
* |
|||
* \p flags must be \c 0 for now. |
|||
* |
|||
* \return The index of the CPU kind (positive integer or 0) on success. |
|||
* \return \c -1 with \p errno set to \c EXDEV if \p cpuset is |
|||
* only partially included in the some kind. |
|||
* \return \c -1 with \p errno set to \c ENOENT if \p cpuset is |
|||
* not included in any kind, even partially. |
|||
* \return \c -1 with \p errno set to \c EINVAL if parameters are invalid. |
|||
*/ |
|||
HWLOC_DECLSPEC int |
|||
hwloc_cpukinds_get_by_cpuset(hwloc_topology_t topology, |
|||
hwloc_const_bitmap_t cpuset, |
|||
unsigned long flags); |
|||
|
|||
/** \brief Get the CPU set and infos about a CPU kind in the topology.
|
|||
* |
|||
* \p kind_index identifies one kind of CPU between 0 and the number |
|||
* of kinds returned by hwloc_cpukinds_get_nr() minus 1. |
|||
* |
|||
* If not \c NULL, the bitmap \p cpuset will be filled with |
|||
* the set of PUs of this kind. |
|||
* |
|||
* The integer pointed by \p efficiency, if not \c NULL will, be filled |
|||
* with the ranking of this kind of CPU in term of efficiency (see above). |
|||
* It ranges from \c 0 to the number of kinds |
|||
* (as reported by hwloc_cpukinds_get_nr()) minus 1. |
|||
* |
|||
* Kinds with lower efficiency are reported first. |
|||
* |
|||
* If there is a single kind in the topology, its efficiency \c 0. |
|||
* If the efficiency of some kinds of cores is unknown, |
|||
* the efficiency of all kinds is set to \c -1, |
|||
* and kinds are reported in no specific order. |
|||
* |
|||
* The array of info attributes (for instance the "CoreType", |
|||
* "FrequencyMaxMHz" or "FrequencyBaseMHz", see \ref topoattrs_cpukinds) |
|||
* and its length are returned in \p infos or \p nr_infos. |
|||
* The array belongs to the topology, it should not be freed or modified. |
|||
* |
|||
* If \p nr_infos or \p infos is \c NULL, no info is returned. |
|||
* |
|||
* \p flags must be \c 0 for now. |
|||
* |
|||
* \return \c 0 on success. |
|||
* \return \c -1 with \p errno set to \c ENOENT if \p kind_index does not match any CPU kind. |
|||
* \return \c -1 with \p errno set to \c EINVAL if parameters are invalid. |
|||
*/ |
|||
HWLOC_DECLSPEC int |
|||
hwloc_cpukinds_get_info(hwloc_topology_t topology, |
|||
unsigned kind_index, |
|||
hwloc_bitmap_t cpuset, |
|||
int *efficiency, |
|||
unsigned *nr_infos, struct hwloc_info_s **infos, |
|||
unsigned long flags); |
|||
|
|||
/** \brief Register a kind of CPU in the topology.
|
|||
* |
|||
* Mark the PUs listed in \p cpuset as being of the same kind |
|||
* with respect to the given attributes. |
|||
* |
|||
* \p forced_efficiency should be \c -1 if unknown. |
|||
* Otherwise it is an abstracted efficiency value to enforce |
|||
* the ranking of all kinds if all of them have valid (and |
|||
* different) efficiencies. |
|||
* |
|||
* The array \p infos of size \p nr_infos may be used to provide |
|||
* info names and values describing this kind of PUs. |
|||
* |
|||
* \p flags must be \c 0 for now. |
|||
* |
|||
* Parameters \p cpuset and \p infos will be duplicated internally, |
|||
* the caller is responsible for freeing them. |
|||
* |
|||
* If \p cpuset overlaps with some existing kinds, those might get |
|||
* modified or split. For instance if existing kind A contains |
|||
* PUs 0 and 1, and one registers another kind for PU 1 and 2, |
|||
* there will be 3 resulting kinds: |
|||
* existing kind A is restricted to only PU 0; |
|||
* new kind B contains only PU 1 and combines information from A |
|||
* and from the newly-registered kind; |
|||
* new kind C contains only PU 2 and only gets information from |
|||
* the newly-registered kind. |
|||
* |
|||
* \note The efficiency \p forced_efficiency provided to this function |
|||
* may be different from the one reported later by hwloc_cpukinds_get_info() |
|||
* because hwloc will scale efficiency values down to |
|||
* between 0 and the number of kinds minus 1. |
|||
* |
|||
* \return \c 0 on success. |
|||
* \return \c -1 with \p errno set to \c EINVAL if some parameters are invalid, |
|||
* for instance if \p cpuset is \c NULL or empty. |
|||
*/ |
|||
HWLOC_DECLSPEC int |
|||
hwloc_cpukinds_register(hwloc_topology_t topology, |
|||
hwloc_bitmap_t cpuset, |
|||
int forced_efficiency, |
|||
unsigned nr_infos, struct hwloc_info_s *infos, |
|||
unsigned long flags); |
|||
|
|||
/** @} */ |
|||
|
|||
#ifdef __cplusplus |
|||
} /* extern "C" */ |
|||
#endif |
|||
|
|||
|
|||
#endif /* HWLOC_CPUKINDS_H */ |
@ -0,0 +1,455 @@ |
|||
/*
|
|||
* Copyright © 2019-2020 Inria. All rights reserved. |
|||
* See COPYING in top-level directory. |
|||
*/ |
|||
|
|||
/** \file
|
|||
* \brief Memory node attributes. |
|||
*/ |
|||
|
|||
#ifndef HWLOC_MEMATTR_H |
|||
#define HWLOC_MEMATTR_H |
|||
|
|||
#include "hwloc.h" |
|||
|
|||
#ifdef __cplusplus |
|||
extern "C" { |
|||
#elif 0 |
|||
} |
|||
#endif |
|||
|
|||
/** \defgroup hwlocality_memattrs Comparing memory node attributes for finding where to allocate on
|
|||
* |
|||
* Platforms with heterogeneous memory require ways to decide whether |
|||
* a buffer should be allocated on "fast" memory (such as HBM), |
|||
* "normal" memory (DDR) or even "slow" but large-capacity memory |
|||
* (non-volatile memory). |
|||
* These memory nodes are called "Targets" while the CPU accessing them |
|||
* is called the "Initiator". Access performance depends on their |
|||
* locality (NUMA platforms) as well as the intrinsic performance |
|||
* of the targets (heterogeneous platforms). |
|||
* |
|||
* The following attributes describe the performance of memory accesses |
|||
* from an Initiator to a memory Target, for instance their latency |
|||
* or bandwidth. |
|||
* Initiators performing these memory accesses are usually some PUs or Cores |
|||
* (described as a CPU set). |
|||
* Hence a Core may choose where to allocate a memory buffer by comparing |
|||
* the attributes of different target memory nodes nearby. |
|||
* |
|||
* There are also some attributes that are system-wide. |
|||
* Their value does not depend on a specific initiator performing |
|||
* an access. |
|||
* The memory node Capacity is an example of such attribute without |
|||
* initiator. |
|||
* |
|||
* One way to use this API is to start with a cpuset describing the Cores where |
|||
* a program is bound. The best target NUMA node for allocating memory in this |
|||
* program on these Cores may be obtained by passing this cpuset as an initiator |
|||
* to hwloc_memattr_get_best_target() with the relevant memory attribute. |
|||
* For instance, if the code is latency limited, use the Latency attribute. |
|||
* |
|||
* A more flexible approach consists in getting the list of local NUMA nodes |
|||
* by passing this cpuset to hwloc_get_local_numanode_objs(). |
|||
* Attribute values for these nodes, if any, may then be obtained with |
|||
* hwloc_memattr_get_value() and manually compared with the desired criteria. |
|||
* |
|||
* \note The API also supports specific objects as initiator, |
|||
* but it is currently not used internally by hwloc. |
|||
* Users may for instance use it to provide custom performance |
|||
* values for host memory accesses performed by GPUs. |
|||
* |
|||
* \note The interface actually also accepts targets that are not NUMA nodes. |
|||
* @{ |
|||
*/ |
|||
|
|||
/** \brief Memory node attributes. */ |
|||
enum hwloc_memattr_id_e { |
|||
/** \brief "Capacity".
|
|||
* The capacity is returned in bytes |
|||
* (local_memory attribute in objects). |
|||
* |
|||
* Best capacity nodes are nodes with <b>higher capacity</b>. |
|||
* |
|||
* No initiator is involved when looking at this attribute. |
|||
* The corresponding attribute flags are ::HWLOC_MEMATTR_FLAG_HIGHER_FIRST. |
|||
*/ |
|||
HWLOC_MEMATTR_ID_CAPACITY = 0, |
|||
|
|||
/** \brief "Locality".
|
|||
* The locality is returned as the number of PUs in that locality |
|||
* (e.g. the weight of its cpuset). |
|||
* |
|||
* Best locality nodes are nodes with <b>smaller locality</b> |
|||
* (nodes that are local to very few PUs). |
|||
* Poor locality nodes are nodes with larger locality |
|||
* (nodes that are local to the entire machine). |
|||
* |
|||
* No initiator is involved when looking at this attribute. |
|||
* The corresponding attribute flags are ::HWLOC_MEMATTR_FLAG_HIGHER_FIRST. |
|||
*/ |
|||
HWLOC_MEMATTR_ID_LOCALITY = 1, |
|||
|
|||
/** \brief "Bandwidth".
|
|||
* The bandwidth is returned in MiB/s, as seen from the given initiator location. |
|||
* Best bandwidth nodes are nodes with <b>higher bandwidth</b>. |
|||
* The corresponding attribute flags are ::HWLOC_MEMATTR_FLAG_HIGHER_FIRST |
|||
* and ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR. |
|||
*/ |
|||
HWLOC_MEMATTR_ID_BANDWIDTH = 2, |
|||
|
|||
/** \brief "Latency".
|
|||
* The latency is returned as nanoseconds, as seen from the given initiator location. |
|||
* Best latency nodes are nodes with <b>smaller latency</b>. |
|||
* The corresponding attribute flags are ::HWLOC_MEMATTR_FLAG_LOWER_FIRST |
|||
* and ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR. |
|||
*/ |
|||
HWLOC_MEMATTR_ID_LATENCY = 3 |
|||
|
|||
/* TODO read vs write, persistence? */ |
|||
}; |
|||
|
|||
/** \brief A memory attribute identifier.
|
|||
* May be either one of ::hwloc_memattr_id_e or a new id returned by hwloc_memattr_register(). |
|||
*/ |
|||
typedef unsigned hwloc_memattr_id_t; |
|||
|
|||
/** \brief Return the identifier of the memory attribute with the given name.
|
|||
*/ |
|||
HWLOC_DECLSPEC int |
|||
hwloc_memattr_get_by_name(hwloc_topology_t topology, |
|||
const char *name, |
|||
hwloc_memattr_id_t *id); |
|||
|
|||
|
|||
/** \brief Type of location. */ |
|||
enum hwloc_location_type_e { |
|||
/** \brief Location is given as a cpuset, in the location cpuset union field. \hideinitializer */ |
|||
HWLOC_LOCATION_TYPE_CPUSET = 1, |
|||
/** \brief Location is given as an object, in the location object union field. \hideinitializer */ |
|||
HWLOC_LOCATION_TYPE_OBJECT = 0 |
|||
}; |
|||
|
|||
/** \brief Where to measure attributes from. */ |
|||
struct hwloc_location { |
|||
/** \brief Type of location. */ |
|||
enum hwloc_location_type_e type; |
|||
/** \brief Actual location. */ |
|||
union hwloc_location_u { |
|||
/** \brief Location as a cpuset, when the location type is ::HWLOC_LOCATION_TYPE_CPUSET. */ |
|||
hwloc_cpuset_t cpuset; |
|||
/** \brief Location as an object, when the location type is ::HWLOC_LOCATION_TYPE_OBJECT. */ |
|||
hwloc_obj_t object; |
|||
} location; |
|||
}; |
|||
|
|||
|
|||
/** \brief Flags for selecting target NUMA nodes. */ |
|||
enum hwloc_local_numanode_flag_e { |
|||
/** \brief Select NUMA nodes whose locality is larger than the given cpuset.
|
|||
* For instance, if a single PU (or its cpuset) is given in \p initiator, |
|||
* select all nodes close to the package that contains this PU. |
|||
* \hideinitializer |
|||
*/ |
|||
HWLOC_LOCAL_NUMANODE_FLAG_LARGER_LOCALITY = (1UL<<0), |
|||
|
|||
/** \brief Select NUMA nodes whose locality is smaller than the given cpuset.
|
|||
* For instance, if a package (or its cpuset) is given in \p initiator, |
|||
* also select nodes that are attached to only a half of that package. |
|||
* \hideinitializer |
|||
*/ |
|||
HWLOC_LOCAL_NUMANODE_FLAG_SMALLER_LOCALITY = (1UL<<1), |
|||
|
|||
/** \brief Select all NUMA nodes in the topology.
|
|||
* The initiator \p initiator is ignored. |
|||
* \hideinitializer |
|||
*/ |
|||
HWLOC_LOCAL_NUMANODE_FLAG_ALL = (1UL<<2) |
|||
}; |
|||
|
|||
/** \brief Return an array of local NUMA nodes.
|
|||
* |
|||
* By default only select the NUMA nodes whose locality is exactly |
|||
* the given \p location. More nodes may be selected if additional flags |
|||
* are given as a OR'ed set of ::hwloc_local_numanode_flag_e. |
|||
* |
|||
* If \p location is given as an explicit object, its CPU set is used |
|||
* to find NUMA nodes with the corresponding locality. |
|||
* If the object does not have a CPU set (e.g. I/O object), the CPU |
|||
* parent (where the I/O object is attached) is used. |
|||
* |
|||
* On input, \p nr points to the number of nodes that may be stored |
|||
* in the \p nodes array. |
|||
* On output, \p nr will be changed to the number of stored nodes, |
|||
* or the number of nodes that would have been stored if there were |
|||
* enough room. |
|||
* |
|||
* \note Some of these NUMA nodes may not have any memory attribute |
|||
* values and hence not be reported as actual targets in other functions. |
|||
* |
|||
* \note The number of NUMA nodes in the topology (obtained by |
|||
* hwloc_bitmap_weight() on the root object nodeset) may be used |
|||
* to allocate the \p nodes array. |
|||
* |
|||
* \note When an object CPU set is given as locality, for instance a Package, |
|||
* and when flags contain both ::HWLOC_LOCAL_NUMANODE_FLAG_LARGER_LOCALITY |
|||
* and ::HWLOC_LOCAL_NUMANODE_FLAG_SMALLER_LOCALITY, |
|||
* the returned array corresponds to the nodeset of that object. |
|||
*/ |
|||
HWLOC_DECLSPEC int |
|||
hwloc_get_local_numanode_objs(hwloc_topology_t topology, |
|||
struct hwloc_location *location, |
|||
unsigned *nr, |
|||
hwloc_obj_t *nodes, |
|||
unsigned long flags); |
|||
|
|||
|
|||
|
|||
/** \brief Return an attribute value for a specific target NUMA node.
|
|||
* |
|||
* If the attribute does not relate to a specific initiator |
|||
* (it does not have the flag ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR), |
|||
* location \p initiator is ignored and may be \c NULL. |
|||
* |
|||
* \p flags must be \c 0 for now. |
|||
* |
|||
* \note The initiator \p initiator should be of type ::HWLOC_LOCATION_TYPE_CPUSET |
|||
* when refering to accesses performed by CPU cores. |
|||
* ::HWLOC_LOCATION_TYPE_OBJECT is currently unused internally by hwloc, |
|||
* but users may for instance use it to provide custom information about |
|||
* host memory accesses performed by GPUs. |
|||
*/ |
|||
HWLOC_DECLSPEC int |
|||
hwloc_memattr_get_value(hwloc_topology_t topology, |
|||
hwloc_memattr_id_t attribute, |
|||
hwloc_obj_t target_node, |
|||
struct hwloc_location *initiator, |
|||
unsigned long flags, |
|||
hwloc_uint64_t *value); |
|||
|
|||
/** \brief Return the best target NUMA node for the given attribute and initiator.
|
|||
* |
|||
* If the attribute does not relate to a specific initiator |
|||
* (it does not have the flag ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR), |
|||
* location \p initiator is ignored and may be \c NULL. |
|||
* |
|||
* If \p value is non \c NULL, the corresponding value is returned there. |
|||
* |
|||
* If multiple targets have the same attribute values, only one is |
|||
* returned (and there is no way to clarify how that one is chosen). |
|||
* Applications that want to detect targets with identical/similar |
|||
* values, or that want to look at values for multiple attributes, |
|||
* should rather get all values using hwloc_memattr_get_value() |
|||
* and manually select the target they consider the best. |
|||
* |
|||
* \p flags must be \c 0 for now. |
|||
* |
|||
* If there are no matching targets, \c -1 is returned with \p errno set to \c ENOENT; |
|||
* |
|||
* \note The initiator \p initiator should be of type ::HWLOC_LOCATION_TYPE_CPUSET |
|||
* when refering to accesses performed by CPU cores. |
|||
* ::HWLOC_LOCATION_TYPE_OBJECT is currently unused internally by hwloc, |
|||
* but users may for instance use it to provide custom information about |
|||
* host memory accesses performed by GPUs. |
|||
*/ |
|||
HWLOC_DECLSPEC int |
|||
hwloc_memattr_get_best_target(hwloc_topology_t topology, |
|||
hwloc_memattr_id_t attribute, |
|||
struct hwloc_location *initiator, |
|||
unsigned long flags, |
|||
hwloc_obj_t *best_target, hwloc_uint64_t *value); |
|||
|
|||
/** \brief Return the best initiator for the given attribute and target NUMA node.
|
|||
* |
|||
* If the attribute does not relate to a specific initiator |
|||
* (it does not have the flag ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR), |
|||
* \c -1 is returned and \p errno is set to \c EINVAL. |
|||
* |
|||
* If \p value is non \c NULL, the corresponding value is returned there. |
|||
* |
|||
* If multiple initiators have the same attribute values, only one is |
|||
* returned (and there is no way to clarify how that one is chosen). |
|||
* Applications that want to detect initiators with identical/similar |
|||
* values, or that want to look at values for multiple attributes, |
|||
* should rather get all values using hwloc_memattr_get_value() |
|||
* and manually select the initiator they consider the best. |
|||
* |
|||
* The returned initiator should not be modified or freed, |
|||
* it belongs to the topology. |
|||
* |
|||
* \p flags must be \c 0 for now. |
|||
* |
|||
* If there are no matching initiators, \c -1 is returned with \p errno set to \c ENOENT; |
|||
*/ |
|||
HWLOC_DECLSPEC int |
|||
hwloc_memattr_get_best_initiator(hwloc_topology_t topology, |
|||
hwloc_memattr_id_t attribute, |
|||
hwloc_obj_t target, |
|||
unsigned long flags, |
|||
struct hwloc_location *best_initiator, hwloc_uint64_t *value); |
|||
|
|||
/** @} */ |
|||
|
|||
|
|||
/** \defgroup hwlocality_memattrs_manage Managing memory attributes
|
|||
* @{ |
|||
*/ |
|||
|
|||
/** \brief Return the name of a memory attribute.
|
|||
*/ |
|||
HWLOC_DECLSPEC int |
|||
hwloc_memattr_get_name(hwloc_topology_t topology, |
|||
hwloc_memattr_id_t attribute, |
|||
const char **name); |
|||
|
|||
/** \brief Return the flags of the given attribute.
|
|||
* |
|||
* Flags are a OR'ed set of ::hwloc_memattr_flag_e. |
|||
*/ |
|||
HWLOC_DECLSPEC int |
|||
hwloc_memattr_get_flags(hwloc_topology_t topology, |
|||
hwloc_memattr_id_t attribute, |
|||
unsigned long *flags); |
|||
|
|||
/** \brief Memory attribute flags.
|
|||
* Given to hwloc_memattr_register() and returned by hwloc_memattr_get_flags(). |
|||
*/ |
|||
enum hwloc_memattr_flag_e { |
|||
/** \brief The best nodes for this memory attribute are those with the higher values.
|
|||
* For instance Bandwidth. |
|||
*/ |
|||
HWLOC_MEMATTR_FLAG_HIGHER_FIRST = (1UL<<0), |
|||
/** \brief The best nodes for this memory attribute are those with the lower values.
|
|||
* For instance Latency. |
|||
*/ |
|||
HWLOC_MEMATTR_FLAG_LOWER_FIRST = (1UL<<1), |
|||
/** \brief The value returned for this memory attribute depends on the given initiator.
|
|||
* For instance Bandwidth and Latency, but not Capacity. |
|||
*/ |
|||
HWLOC_MEMATTR_FLAG_NEED_INITIATOR = (1UL<<2) |
|||
}; |
|||
|
|||
/** \brief Register a new memory attribute.
|
|||
* |
|||
* Add a specific memory attribute that is not defined in ::hwloc_memattr_id_e. |
|||
* Flags are a OR'ed set of ::hwloc_memattr_flag_e. It must contain at least |
|||
* one of ::HWLOC_MEMATTR_FLAG_HIGHER_FIRST or ::HWLOC_MEMATTR_FLAG_LOWER_FIRST. |
|||
*/ |
|||
HWLOC_DECLSPEC int |
|||
hwloc_memattr_register(hwloc_topology_t topology, |
|||
const char *name, |
|||
unsigned long flags, |
|||
hwloc_memattr_id_t *id); |
|||
|
|||
/** \brief Set an attribute value for a specific target NUMA node.
|
|||
* |
|||
* If the attribute does not relate to a specific initiator |
|||
* (it does not have the flag ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR), |
|||
* location \p initiator is ignored and may be \c NULL. |
|||
* |
|||
* The initiator will be copied into the topology, |
|||
* the caller should free anything allocated to store the initiator, |
|||
* for instance the cpuset. |
|||
* |
|||
* \p flags must be \c 0 for now. |
|||
* |
|||
* \note The initiator \p initiator should be of type ::HWLOC_LOCATION_TYPE_CPUSET |
|||
* when refering to accesses performed by CPU cores. |
|||
* ::HWLOC_LOCATION_TYPE_OBJECT is currently unused internally by hwloc, |
|||
* but users may for instance use it to provide custom information about |
|||
* host memory accesses performed by GPUs. |
|||
*/ |
|||
HWLOC_DECLSPEC int |
|||
hwloc_memattr_set_value(hwloc_topology_t topology, |
|||
hwloc_memattr_id_t attribute, |
|||
hwloc_obj_t target_node, |
|||
struct hwloc_location *initiator, |
|||
unsigned long flags, |
|||
hwloc_uint64_t value); |
|||
|
|||
/** \brief Return the target NUMA nodes that have some values for a given attribute.
|
|||
* |
|||
* Return targets for the given attribute in the \p targets array |
|||
* (for the given initiator if any). |
|||
* If \p values is not \c NULL, the corresponding attribute values |
|||
* are stored in the array it points to. |
|||
* |
|||
* On input, \p nr points to the number of targets that may be stored |
|||
* in the array \p targets (and \p values). |
|||
* On output, \p nr points to the number of targets (and values) that |
|||
* were actually found, even if some of them couldn't be stored in the array. |
|||
* Targets that couldn't be stored are ignored, but the function still |
|||
* returns success (\c 0). The caller may find out by comparing the value pointed |
|||
* by \p nr before and after the function call. |
|||
* |
|||
* The returned targets should not be modified or freed, |
|||
* they belong to the topology. |
|||
* |
|||
* Argument \p initiator is ignored if the attribute does not relate to a specific |
|||
* initiator (it does not have the flag ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR). |
|||
* Otherwise \p initiator may be non \c NULL to report only targets |
|||
* that have a value for that initiator. |
|||
* |
|||
* \p flags must be \c 0 for now. |
|||
* |
|||
* \note This function is meant for tools and debugging (listing internal information) |
|||
* rather than for application queries. Applications should rather select useful |
|||
* NUMA nodes with hwloc_get_local_numanode_objs() and then look at their attribute |
|||
* values. |
|||
* |
|||
* \note The initiator \p initiator should be of type ::HWLOC_LOCATION_TYPE_CPUSET |
|||
* when refering to accesses performed by CPU cores. |
|||
* ::HWLOC_LOCATION_TYPE_OBJECT is currently unused internally by hwloc, |
|||
* but users may for instance use it to provide custom information about |
|||
* host memory accesses performed by GPUs. |
|||
*/ |
|||
HWLOC_DECLSPEC int |
|||
hwloc_memattr_get_targets(hwloc_topology_t topology, |
|||
hwloc_memattr_id_t attribute, |
|||
struct hwloc_location *initiator, |
|||
unsigned long flags, |
|||
unsigned *nrp, hwloc_obj_t *targets, hwloc_uint64_t *values); |
|||
|
|||
/** \brief Return the initiators that have values for a given attribute for a specific target NUMA node.
|
|||
* |
|||
* Return initiators for the given attribute and target node in the |
|||
* \p initiators array. |
|||
* If \p values is not \c NULL, the corresponding attribute values |
|||
* are stored in the array it points to. |
|||
* |
|||
* On input, \p nr points to the number of initiators that may be stored |
|||
* in the array \p initiators (and \p values). |
|||
* On output, \p nr points to the number of initiators (and values) that |
|||
* were actually found, even if some of them couldn't be stored in the array. |
|||
* Initiators that couldn't be stored are ignored, but the function still |
|||
* returns success (\c 0). The caller may find out by comparing the value pointed |
|||
* by \p nr before and after the function call. |
|||
* |
|||
* The returned initiators should not be modified or freed, |
|||
* they belong to the topology. |
|||
* |
|||
* \p flags must be \c 0 for now. |
|||
* |
|||
* If the attribute does not relate to a specific initiator |
|||
* (it does not have the flag ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR), |
|||
* no initiator is returned. |
|||
* |
|||
* \note This function is meant for tools and debugging (listing internal information) |
|||
* rather than for application queries. Applications should rather select useful |
|||
* NUMA nodes with hwloc_get_local_numanode_objs() and then look at their attribute |
|||
* values for some relevant initiators. |
|||
*/ |
|||
HWLOC_DECLSPEC int |
|||
hwloc_memattr_get_initiators(hwloc_topology_t topology, |
|||
hwloc_memattr_id_t attribute, |
|||
hwloc_obj_t target_node, |
|||
unsigned long flags, |
|||
unsigned *nr, struct hwloc_location *initiators, hwloc_uint64_t *values); |
|||
/** @} */ |
|||
|
|||
#ifdef __cplusplus |
|||
} /* extern "C" */ |
|||
#endif |
|||
|
|||
|
|||
#endif /* HWLOC_MEMATTR_H */ |
@ -0,0 +1,649 @@ |
|||
/*
|
|||
* Copyright © 2020 Inria. All rights reserved. |
|||
* See COPYING in top-level directory. |
|||
*/ |
|||
|
|||
#include "private/autogen/config.h" |
|||
#include "hwloc.h" |
|||
#include "private/private.h" |
|||
#include "private/debug.h" |
|||
|
|||
|
|||
/*****************
|
|||
* Basics |
|||
*/ |
|||
|
|||
void |
|||
hwloc_internal_cpukinds_init(struct hwloc_topology *topology) |
|||
{ |
|||
topology->cpukinds = NULL; |
|||
topology->nr_cpukinds = 0; |
|||
topology->nr_cpukinds_allocated = 0; |
|||
} |
|||
|
|||
void |
|||
hwloc_internal_cpukinds_destroy(struct hwloc_topology *topology) |
|||
{ |
|||
unsigned i; |
|||
for(i=0; i<topology->nr_cpukinds; i++) { |
|||
struct hwloc_internal_cpukind_s *kind = &topology->cpukinds[i]; |
|||
hwloc_bitmap_free(kind->cpuset); |
|||
hwloc__free_infos(kind->infos, kind->nr_infos); |
|||
} |
|||
free(topology->cpukinds); |
|||
topology->cpukinds = NULL; |
|||
topology->nr_cpukinds = 0; |
|||
} |
|||
|
|||
int |
|||
hwloc_internal_cpukinds_dup(hwloc_topology_t new, hwloc_topology_t old) |
|||
{ |
|||
struct hwloc_tma *tma = new->tma; |
|||
struct hwloc_internal_cpukind_s *kinds; |
|||
unsigned i; |
|||
|
|||
kinds = hwloc_tma_malloc(tma, old->nr_cpukinds * sizeof(*kinds)); |
|||
if (!kinds) |
|||
return -1; |
|||
new->cpukinds = kinds; |
|||
new->nr_cpukinds = old->nr_cpukinds; |
|||
memcpy(kinds, old->cpukinds, old->nr_cpukinds * sizeof(*kinds)); |
|||
|
|||
for(i=0;i<old->nr_cpukinds; i++) { |
|||
kinds[i].cpuset = hwloc_bitmap_tma_dup(tma, old->cpukinds[i].cpuset); |
|||
if (!kinds[i].cpuset) { |
|||
new->nr_cpukinds = i; |
|||
goto failed; |
|||
} |
|||
if (hwloc__tma_dup_infos(tma, |
|||
&kinds[i].infos, &kinds[i].nr_infos, |
|||
old->cpukinds[i].infos, old->cpukinds[i].nr_infos) < 0) { |
|||
assert(!tma || !tma->dontfree); /* this tma cannot fail to allocate */ |
|||
hwloc_bitmap_free(kinds[i].cpuset); |
|||
new->nr_cpukinds = i; |
|||
goto failed; |
|||
} |
|||
} |
|||
|
|||
return 0; |
|||
|
|||
failed: |
|||
hwloc_internal_cpukinds_destroy(new); |
|||
return -1; |
|||
} |
|||
|
|||
void |
|||
hwloc_internal_cpukinds_restrict(hwloc_topology_t topology) |
|||
{ |
|||
unsigned i; |
|||
int removed = 0; |
|||
for(i=0; i<topology->nr_cpukinds; i++) { |
|||
struct hwloc_internal_cpukind_s *kind = &topology->cpukinds[i]; |
|||
hwloc_bitmap_and(kind->cpuset, kind->cpuset, hwloc_get_root_obj(topology)->cpuset); |
|||
if (hwloc_bitmap_iszero(kind->cpuset)) { |
|||
hwloc_bitmap_free(kind->cpuset); |
|||
hwloc__free_infos(kind->infos, kind->nr_infos); |
|||
memmove(kind, kind+1, (topology->nr_cpukinds - i - 1)*sizeof(*kind)); |
|||
i--; |
|||
topology->nr_cpukinds--; |
|||
removed = 1; |
|||
} |
|||
} |
|||
if (removed) |
|||
hwloc_internal_cpukinds_rank(topology); |
|||
} |
|||
|
|||
|
|||
/********************
|
|||
* Registering |
|||
*/ |
|||
|
|||
static __hwloc_inline int |
|||
hwloc__cpukind_check_duplicate_info(struct hwloc_internal_cpukind_s *kind, |
|||
const char *name, const char *value) |
|||
{ |
|||
unsigned i; |
|||
for(i=0; i<kind->nr_infos; i++) |
|||
if (!strcmp(kind->infos[i].name, name) |
|||
&& !strcmp(kind->infos[i].value, value)) |
|||
return 1; |
|||
return 0; |
|||
} |
|||
|
|||
static __hwloc_inline void |
|||
hwloc__cpukind_add_infos(struct hwloc_internal_cpukind_s *kind, |
|||
const struct hwloc_info_s *infos, unsigned nr_infos) |
|||
{ |
|||
unsigned i; |
|||
for(i=0; i<nr_infos; i++) { |
|||
if (hwloc__cpukind_check_duplicate_info(kind, infos[i].name, infos[i].value)) |
|||
continue; |
|||
hwloc__add_info(&kind->infos, &kind->nr_infos, infos[i].name, infos[i].value); |
|||
} |
|||
} |
|||
|
|||
int |
|||
hwloc_internal_cpukinds_register(hwloc_topology_t topology, hwloc_cpuset_t cpuset, |
|||
int forced_efficiency, |
|||
const struct hwloc_info_s *infos, unsigned nr_infos, |
|||
unsigned long flags) |
|||
{ |
|||
struct hwloc_internal_cpukind_s *kinds; |
|||
unsigned i, max, bits, oldnr, newnr; |
|||
|
|||
if (hwloc_bitmap_iszero(cpuset)) { |
|||
hwloc_bitmap_free(cpuset); |
|||
errno = EINVAL; |
|||
return -1; |
|||
} |
|||
|
|||
if (flags & ~HWLOC_CPUKINDS_REGISTER_FLAG_OVERWRITE_FORCED_EFFICIENCY) { |
|||
errno = EINVAL; |
|||
return -1; |
|||
} |
|||
|
|||
/* TODO: for now, only windows provides a forced efficiency.
|
|||
* if another backend ever provides a conflicting value, the first backend value will be kept. |
|||
* (user-provided values are not an issue, they are meant to overwrite) |
|||
*/ |
|||
|
|||
/* If we have N kinds currently, we may need 2N+1 kinds after inserting the new one:
|
|||
* - each existing kind may get split into which PUs are in the new kind and which aren't. |
|||
* - some PUs might not have been in any kind yet. |
|||
*/ |
|||
max = 2 * topology->nr_cpukinds + 1; |
|||
/* Allocate the power-of-two above 2N+1. */ |
|||
bits = hwloc_flsl(max-1) + 1; |
|||
max = 1U<<bits; |
|||
/* Allocate 8 minimum to avoid multiple reallocs */ |
|||
if (max < 8) |
|||
max = 8; |
|||
|
|||
/* Create or enlarge the array of kinds if needed */ |
|||
kinds = topology->cpukinds; |
|||
if (max > topology->nr_cpukinds_allocated) { |
|||
kinds = realloc(kinds, max * sizeof(*kinds)); |
|||
if (!kinds) { |
|||
hwloc_bitmap_free(cpuset); |
|||
return -1; |
|||
} |
|||
memset(&kinds[topology->nr_cpukinds_allocated], 0, (max - topology->nr_cpukinds_allocated) * sizeof(*kinds)); |
|||
topology->nr_cpukinds_allocated = max; |
|||
topology->cpukinds = kinds; |
|||
} |
|||
|
|||
newnr = oldnr = topology->nr_cpukinds; |
|||
for(i=0; i<oldnr; i++) { |
|||
int res = hwloc_bitmap_compare_inclusion(cpuset, kinds[i].cpuset); |
|||
if (res == HWLOC_BITMAP_INTERSECTS || res == HWLOC_BITMAP_INCLUDED) { |
|||
/* new kind with intersection of cpusets and union of infos */ |
|||
kinds[newnr].cpuset = hwloc_bitmap_alloc(); |
|||
kinds[newnr].efficiency = HWLOC_CPUKIND_EFFICIENCY_UNKNOWN; |
|||
kinds[newnr].forced_efficiency = forced_efficiency; |
|||
hwloc_bitmap_and(kinds[newnr].cpuset, cpuset, kinds[i].cpuset); |
|||
hwloc__cpukind_add_infos(&kinds[newnr], kinds[i].infos, kinds[i].nr_infos); |
|||
hwloc__cpukind_add_infos(&kinds[newnr], infos, nr_infos); |
|||
/* remove cpuset PUs from the existing kind that we just split */ |
|||
hwloc_bitmap_andnot(kinds[i].cpuset, kinds[i].cpuset, kinds[newnr].cpuset); |
|||
/* clear cpuset PUs that were taken care of */ |
|||
hwloc_bitmap_andnot(cpuset, cpuset, kinds[newnr].cpuset); |
|||
|
|||
newnr++; |
|||
|
|||
} else if (res == HWLOC_BITMAP_CONTAINS |
|||
|| res == HWLOC_BITMAP_EQUAL) { |
|||
/* append new info to existing smaller (or equal) kind */ |
|||
hwloc__cpukind_add_infos(&kinds[i], infos, nr_infos); |
|||
if ((flags & HWLOC_CPUKINDS_REGISTER_FLAG_OVERWRITE_FORCED_EFFICIENCY) |
|||
|| kinds[i].forced_efficiency == HWLOC_CPUKIND_EFFICIENCY_UNKNOWN) |
|||
kinds[i].forced_efficiency = forced_efficiency; |
|||
/* clear cpuset PUs that were taken care of */ |
|||
hwloc_bitmap_andnot(cpuset, cpuset, kinds[i].cpuset); |
|||
|
|||
} else { |
|||
assert(res == HWLOC_BITMAP_DIFFERENT); |
|||
/* nothing to do */ |
|||
} |
|||
|
|||
/* don't compare with anything else if already empty */ |
|||
if (hwloc_bitmap_iszero(cpuset)) |
|||
break; |
|||
} |
|||
|
|||
/* add a final kind with remaining PUs if any */ |
|||
if (!hwloc_bitmap_iszero(cpuset)) { |
|||
kinds[newnr].cpuset = cpuset; |
|||
kinds[newnr].efficiency = HWLOC_CPUKIND_EFFICIENCY_UNKNOWN; |
|||
kinds[newnr].forced_efficiency = forced_efficiency; |
|||
hwloc__cpukind_add_infos(&kinds[newnr], infos, nr_infos); |
|||
newnr++; |
|||
} else { |
|||
hwloc_bitmap_free(cpuset); |
|||
} |
|||
|
|||
topology->nr_cpukinds = newnr; |
|||
return 0; |
|||
} |
|||
|
|||
int |
|||
hwloc_cpukinds_register(hwloc_topology_t topology, hwloc_cpuset_t _cpuset, |
|||
int forced_efficiency, |
|||
unsigned nr_infos, struct hwloc_info_s *infos, |
|||
unsigned long flags) |
|||
{ |
|||
hwloc_bitmap_t cpuset; |
|||
int err; |
|||
|
|||
if (flags) { |
|||
errno = EINVAL; |
|||
return -1; |
|||
} |
|||
|
|||
if (!_cpuset || hwloc_bitmap_iszero(_cpuset)) { |
|||
errno = EINVAL; |
|||
return -1; |
|||
} |
|||
|
|||
cpuset = hwloc_bitmap_dup(_cpuset); |
|||
if (!cpuset) |
|||
return -1; |
|||
|
|||
if (forced_efficiency < 0) |
|||
forced_efficiency = HWLOC_CPUKIND_EFFICIENCY_UNKNOWN; |
|||
|
|||
err = hwloc_internal_cpukinds_register(topology, cpuset, forced_efficiency, infos, nr_infos, HWLOC_CPUKINDS_REGISTER_FLAG_OVERWRITE_FORCED_EFFICIENCY); |
|||
if (err < 0) |
|||
return err; |
|||
|
|||
hwloc_internal_cpukinds_rank(topology); |
|||
return 0; |
|||
} |
|||
|
|||
|
|||
/*********************
|
|||
* Ranking |
|||
*/ |
|||
|
|||
static int |
|||
hwloc__cpukinds_check_duplicate_rankings(struct hwloc_topology *topology) |
|||
{ |
|||
unsigned i,j; |
|||
for(i=0; i<topology->nr_cpukinds; i++) |
|||
for(j=i+1; j<topology->nr_cpukinds; j++) |
|||
if (topology->cpukinds[i].forced_efficiency == topology->cpukinds[j].forced_efficiency) |
|||
/* if any duplicate, fail */ |
|||
return -1; |
|||
return 0; |
|||
} |
|||
|
|||
static int |
|||
hwloc__cpukinds_try_rank_by_forced_efficiency(struct hwloc_topology *topology) |
|||
{ |
|||
unsigned i; |
|||
|
|||
hwloc_debug("Trying to rank cpukinds by forced efficiency...\n"); |
|||
for(i=0; i<topology->nr_cpukinds; i++) { |
|||
if (topology->cpukinds[i].forced_efficiency == HWLOC_CPUKIND_EFFICIENCY_UNKNOWN) |
|||
/* if any unknown, fail */ |
|||
return -1; |
|||
topology->cpukinds[i].ranking_value = topology->cpukinds[i].forced_efficiency; |
|||
} |
|||
|
|||
return hwloc__cpukinds_check_duplicate_rankings(topology); |
|||
} |
|||
|
|||
struct hwloc_cpukinds_info_summary { |
|||
int have_max_freq; |
|||
int have_base_freq; |
|||
int have_intel_core_type; |
|||
struct hwloc_cpukind_info_summary { |
|||
unsigned intel_core_type; /* 1 for atom, 2 for core */ |
|||
unsigned max_freq, base_freq; /* MHz, hence < 100000 */ |
|||
} * summaries; |
|||
}; |
|||
|
|||
static void |
|||
hwloc__cpukinds_summarize_info(struct hwloc_topology *topology, |
|||
struct hwloc_cpukinds_info_summary *summary) |
|||
{ |
|||
unsigned i, j; |
|||
|
|||
summary->have_max_freq = 1; |
|||
summary->have_base_freq = 1; |
|||
summary->have_intel_core_type = 1; |
|||
|
|||
for(i=0; i<topology->nr_cpukinds; i++) { |
|||
struct hwloc_internal_cpukind_s *kind = &topology->cpukinds[i]; |
|||
for(j=0; j<kind->nr_infos; j++) { |
|||
struct hwloc_info_s *info = &kind->infos[j]; |
|||
if (!strcmp(info->name, "FrequencyMaxMHz")) { |
|||
summary->summaries[i].max_freq = atoi(info->value); |
|||
} else if (!strcmp(info->name, "FrequencyBaseMHz")) { |
|||
summary->summaries[i].base_freq = atoi(info->value); |
|||
} else if (!strcmp(info->name, "CoreType")) { |
|||
if (!strcmp(info->value, "IntelAtom")) |
|||
summary->summaries[i].intel_core_type = 1; |
|||
else if (!strcmp(info->value, "IntelCore")) |
|||
summary->summaries[i].intel_core_type = 2; |
|||
} |
|||
} |
|||
hwloc_debug("cpukind #%u has intel_core_type %u max_freq %u base_freq %u\n", |
|||
i, summary->summaries[i].intel_core_type, |
|||
summary->summaries[i].max_freq, summary->summaries[i].base_freq); |
|||
if (!summary->summaries[i].base_freq) |
|||
summary->have_base_freq = 0; |
|||
if (!summary->summaries[i].max_freq) |
|||
summary->have_max_freq = 0; |
|||
if (!summary->summaries[i].intel_core_type) |
|||
summary->have_intel_core_type = 0; |
|||
} |
|||
} |
|||
|
|||
enum hwloc_cpukinds_ranking { |
|||
HWLOC_CPUKINDS_RANKING_DEFAULT, /* forced + frequency on ARM, forced + coretype_frequency otherwise */ |
|||
HWLOC_CPUKINDS_RANKING_NO_FORCED_EFFICIENCY, /* default without forced */ |
|||
HWLOC_CPUKINDS_RANKING_FORCED_EFFICIENCY, |
|||
HWLOC_CPUKINDS_RANKING_CORETYPE_FREQUENCY, |
|||
HWLOC_CPUKINDS_RANKING_CORETYPE, |
|||
HWLOC_CPUKINDS_RANKING_FREQUENCY, |
|||
HWLOC_CPUKINDS_RANKING_FREQUENCY_MAX, |
|||
HWLOC_CPUKINDS_RANKING_FREQUENCY_BASE, |
|||
HWLOC_CPUKINDS_RANKING_NONE |
|||
}; |
|||
|
|||
static int |
|||
hwloc__cpukinds_try_rank_by_info(struct hwloc_topology *topology, |
|||
enum hwloc_cpukinds_ranking heuristics, |
|||
struct hwloc_cpukinds_info_summary *summary) |
|||
{ |
|||
unsigned i; |
|||
|
|||
if (HWLOC_CPUKINDS_RANKING_CORETYPE_FREQUENCY == heuristics) { |
|||
hwloc_debug("Trying to rank cpukinds by coretype+frequency...\n"); |
|||
/* we need intel_core_type + (base or max freq) for all kinds */ |
|||
if (!summary->have_intel_core_type |
|||
|| (!summary->have_max_freq && !summary->have_base_freq)) |
|||
return -1; |
|||
/* rank first by coretype (Core>>Atom) then by frequency, base if available, max otherwise */ |
|||
for(i=0; i<topology->nr_cpukinds; i++) { |
|||
struct hwloc_internal_cpukind_s *kind = &topology->cpukinds[i]; |
|||
if (summary->have_base_freq) |
|||
kind->ranking_value = (summary->summaries[i].intel_core_type << 20) + summary->summaries[i].base_freq; |
|||
else |
|||
kind->ranking_value = (summary->summaries[i].intel_core_type << 20) + summary->summaries[i].max_freq; |
|||
} |
|||
|
|||
} else if (HWLOC_CPUKINDS_RANKING_CORETYPE == heuristics) { |
|||
hwloc_debug("Trying to rank cpukinds by coretype...\n"); |
|||
/* we need intel_core_type */ |
|||
if (!summary->have_intel_core_type) |
|||
return -1; |
|||
/* rank by coretype (Core>>Atom) */ |
|||
for(i=0; i<topology->nr_cpukinds; i++) { |
|||
struct hwloc_internal_cpukind_s *kind = &topology->cpukinds[i]; |
|||
kind->ranking_value = (summary->summaries[i].intel_core_type << 20); |
|||
} |
|||
|
|||
} else if (HWLOC_CPUKINDS_RANKING_FREQUENCY == heuristics) { |
|||
hwloc_debug("Trying to rank cpukinds by frequency...\n"); |
|||
/* we need base or max freq for all kinds */ |
|||
if (!summary->have_max_freq && !summary->have_base_freq) |
|||
return -1; |
|||
/* rank first by frequency, base if available, max otherwise */ |
|||
for(i=0; i<topology->nr_cpukinds; i++) { |
|||
struct hwloc_internal_cpukind_s *kind = &topology->cpukinds[i]; |
|||
if (summary->have_base_freq) |
|||
kind->ranking_value = summary->summaries[i].base_freq; |
|||
else |
|||
kind->ranking_value = summary->summaries[i].max_freq; |
|||
} |
|||
|
|||
} else if (HWLOC_CPUKINDS_RANKING_FREQUENCY_MAX == heuristics) { |
|||
hwloc_debug("Trying to rank cpukinds by frequency max...\n"); |
|||
/* we need max freq for all kinds */ |
|||
if (!summary->have_max_freq) |
|||
return -1; |
|||
/* rank first by frequency, base if available, max otherwise */ |
|||
for(i=0; i<topology->nr_cpukinds; i++) { |
|||
struct hwloc_internal_cpukind_s *kind = &topology->cpukinds[i]; |
|||
kind->ranking_value = summary->summaries[i].max_freq; |
|||
} |
|||
|
|||
} else if (HWLOC_CPUKINDS_RANKING_FREQUENCY_BASE == heuristics) { |
|||
hwloc_debug("Trying to rank cpukinds by frequency base...\n"); |
|||
/* we need max freq for all kinds */ |
|||
if (!summary->have_base_freq) |
|||
return -1; |
|||
/* rank first by frequency, base if available, max otherwise */ |
|||
for(i=0; i<topology->nr_cpukinds; i++) { |
|||
struct hwloc_internal_cpukind_s *kind = &topology->cpukinds[i]; |
|||
kind->ranking_value = summary->summaries[i].base_freq; |
|||
} |
|||
|
|||
} else assert(0); |
|||
|
|||
return hwloc__cpukinds_check_duplicate_rankings(topology); |
|||
} |
|||
|
|||
static int hwloc__cpukinds_compare_ranking_values(const void *_a, const void *_b) |
|||
{ |
|||
const struct hwloc_internal_cpukind_s *a = _a; |
|||
const struct hwloc_internal_cpukind_s *b = _b; |
|||
return a->ranking_value - b->ranking_value; |
|||
} |
|||
|
|||
/* this function requires ranking values to be unique */ |
|||
static void |
|||
hwloc__cpukinds_finalize_ranking(struct hwloc_topology *topology) |
|||
{ |
|||
unsigned i; |
|||
/* sort */ |
|||
qsort(topology->cpukinds, topology->nr_cpukinds, sizeof(*topology->cpukinds), hwloc__cpukinds_compare_ranking_values); |
|||
/* define our own efficiency between 0 and N-1 */ |
|||
for(i=0; i<topology->nr_cpukinds; i++) |
|||
topology->cpukinds[i].efficiency = i; |
|||
} |
|||
|
|||
int |
|||
hwloc_internal_cpukinds_rank(struct hwloc_topology *topology) |
|||
{ |
|||
enum hwloc_cpukinds_ranking heuristics; |
|||
char *env; |
|||
unsigned i; |
|||
int err; |
|||
|
|||
if (!topology->nr_cpukinds) |
|||
return 0; |
|||
|
|||
if (topology->nr_cpukinds == 1) { |
|||
topology->cpukinds[0].efficiency = 0; |
|||
return 0; |
|||
} |
|||
|
|||
heuristics = HWLOC_CPUKINDS_RANKING_DEFAULT; |
|||
env = getenv("HWLOC_CPUKINDS_RANKING"); |
|||
if (env) { |
|||
if (!strcmp(env, "default")) |
|||
heuristics = HWLOC_CPUKINDS_RANKING_DEFAULT; |
|||
else if (!strcmp(env, "none")) |
|||
heuristics = HWLOC_CPUKINDS_RANKING_NONE; |
|||
else if (!strcmp(env, "coretype+frequency")) |
|||
heuristics = HWLOC_CPUKINDS_RANKING_CORETYPE_FREQUENCY; |
|||
else if (!strcmp(env, "coretype")) |
|||
heuristics = HWLOC_CPUKINDS_RANKING_CORETYPE; |
|||
else if (!strcmp(env, "frequency")) |
|||
heuristics = HWLOC_CPUKINDS_RANKING_FREQUENCY; |
|||
else if (!strcmp(env, "frequency_max")) |
|||
heuristics = HWLOC_CPUKINDS_RANKING_FREQUENCY_MAX; |
|||
else if (!strcmp(env, "frequency_base")) |
|||
heuristics = HWLOC_CPUKINDS_RANKING_FREQUENCY_BASE; |
|||
else if (!strcmp(env, "forced_efficiency")) |
|||
heuristics = HWLOC_CPUKINDS_RANKING_FORCED_EFFICIENCY; |
|||
else if (!strcmp(env, "no_forced_efficiency")) |
|||
heuristics = HWLOC_CPUKINDS_RANKING_NO_FORCED_EFFICIENCY; |
|||
else if (!hwloc_hide_errors()) |
|||
fprintf(stderr, "Failed to recognize HWLOC_CPUKINDS_RANKING value %s\n", env); |
|||
} |
|||
|
|||
if (heuristics == HWLOC_CPUKINDS_RANKING_DEFAULT |
|||
|| heuristics == HWLOC_CPUKINDS_RANKING_NO_FORCED_EFFICIENCY) { |
|||
/* default is forced_efficiency first */ |
|||
struct hwloc_cpukinds_info_summary summary; |
|||
enum hwloc_cpukinds_ranking subheuristics; |
|||
const char *arch; |
|||
|
|||
if (heuristics == HWLOC_CPUKINDS_RANKING_DEFAULT) |
|||
hwloc_debug("Using default ranking strategy...\n"); |
|||
else |
|||
hwloc_debug("Using custom ranking strategy from HWLOC_CPUKINDS_RANKING=%s\n", env); |
|||
|
|||
if (heuristics != HWLOC_CPUKINDS_RANKING_NO_FORCED_EFFICIENCY) { |
|||
err = hwloc__cpukinds_try_rank_by_forced_efficiency(topology); |
|||
if (!err) |
|||
goto ready; |
|||
} |
|||
|
|||
summary.summaries = calloc(topology->nr_cpukinds, sizeof(*summary.summaries)); |
|||
if (!summary.summaries) |
|||
goto failed; |
|||
hwloc__cpukinds_summarize_info(topology, &summary); |
|||
|
|||
arch = hwloc_obj_get_info_by_name(topology->levels[0][0], "Architecture"); |
|||
/* TODO: rather coretype_frequency only on x86/Intel? */ |
|||
if (arch && (!strncmp(arch, "arm", 3) || !strncmp(arch, "aarch", 5))) |
|||
/* then frequency on ARM */ |
|||
subheuristics = HWLOC_CPUKINDS_RANKING_FREQUENCY; |
|||
else |
|||
/* or coretype+frequency otherwise */ |
|||
subheuristics = HWLOC_CPUKINDS_RANKING_CORETYPE_FREQUENCY; |
|||
|
|||
err = hwloc__cpukinds_try_rank_by_info(topology, subheuristics, &summary); |
|||
free(summary.summaries); |
|||
if (!err) |
|||
goto ready; |
|||
|
|||
} else if (heuristics == HWLOC_CPUKINDS_RANKING_FORCED_EFFICIENCY) { |
|||
hwloc_debug("Using custom ranking strategy from HWLOC_CPUKINDS_RANKING=%s\n", env); |
|||
|
|||
err = hwloc__cpukinds_try_rank_by_forced_efficiency(topology); |
|||
if (!err) |
|||
goto ready; |
|||
|
|||
} else if (heuristics != HWLOC_CPUKINDS_RANKING_NONE) { |
|||
/* custom heuristics */ |
|||
struct hwloc_cpukinds_info_summary summary; |
|||
|
|||
hwloc_debug("Using custom ranking strategy from HWLOC_CPUKINDS_RANKING=%s\n", env); |
|||
|
|||
summary.summaries = calloc(topology->nr_cpukinds, sizeof(*summary.summaries)); |
|||
if (!summary.summaries) |
|||
goto failed; |
|||
hwloc__cpukinds_summarize_info(topology, &summary); |
|||
|
|||
err = hwloc__cpukinds_try_rank_by_info(topology, heuristics, &summary); |
|||
free(summary.summaries); |
|||
if (!err) |
|||
goto ready; |
|||
} |
|||
|
|||
failed: |
|||
/* failed to rank, clear efficiencies */ |
|||
for(i=0; i<topology->nr_cpukinds; i++) |
|||
topology->cpukinds[i].efficiency = HWLOC_CPUKIND_EFFICIENCY_UNKNOWN; |
|||
hwloc_debug("Failed to rank cpukinds.\n\n"); |
|||
return 0; |
|||
|
|||
ready: |
|||
for(i=0; i<topology->nr_cpukinds; i++) |
|||
hwloc_debug("cpukind #%u got ranking value %llu\n", i, (unsigned long long) topology->cpukinds[i].ranking_value); |
|||
hwloc__cpukinds_finalize_ranking(topology); |
|||
#ifdef HWLOC_DEBUG |
|||
for(i=0; i<topology->nr_cpukinds; i++) |
|||
assert(topology->cpukinds[i].efficiency == (int) i); |
|||
#endif |
|||
hwloc_debug("\n"); |
|||
return 0; |
|||
} |
|||
|
|||
|
|||
/*****************
|
|||
* Consulting |
|||
*/ |
|||
|
|||
int |
|||
hwloc_cpukinds_get_nr(hwloc_topology_t topology, unsigned long flags) |
|||
{ |
|||
if (flags) { |
|||
errno = EINVAL; |
|||
return -1; |
|||
} |
|||
|
|||
return topology->nr_cpukinds; |
|||
} |
|||
|
|||
int |
|||
hwloc_cpukinds_get_info(hwloc_topology_t topology, |
|||
unsigned id, |
|||
hwloc_bitmap_t cpuset, |
|||
int *efficiencyp, |
|||
unsigned *nr_infosp, struct hwloc_info_s **infosp, |
|||
unsigned long flags) |
|||
{ |
|||
struct hwloc_internal_cpukind_s *kind; |
|||
|
|||
if (flags) { |
|||
errno = EINVAL; |
|||
return -1; |
|||
} |
|||
|
|||
if (id >= topology->nr_cpukinds) { |
|||
errno = ENOENT; |
|||
return -1; |
|||
} |
|||
|
|||
kind = &topology->cpukinds[id]; |
|||
|
|||
if (cpuset) |
|||
hwloc_bitmap_copy(cpuset, kind->cpuset); |
|||
|
|||
if (efficiencyp) |
|||
*efficiencyp = kind->efficiency; |
|||
|
|||
if (nr_infosp && infosp) { |
|||
*nr_infosp = kind->nr_infos; |
|||
*infosp = kind->infos; |
|||
} |
|||
return 0; |
|||
} |
|||
|
|||
int |
|||
hwloc_cpukinds_get_by_cpuset(hwloc_topology_t topology, |
|||
hwloc_const_bitmap_t cpuset, |
|||
unsigned long flags) |
|||
{ |
|||
unsigned id; |
|||
|
|||
if (flags) { |
|||
errno = EINVAL; |
|||
return -1; |
|||
} |
|||
|
|||
if (!cpuset || hwloc_bitmap_iszero(cpuset)) { |
|||
errno = EINVAL; |
|||
return -1; |
|||
} |
|||
|
|||
for(id=0; id<topology->nr_cpukinds; id++) { |
|||
struct hwloc_internal_cpukind_s *kind = &topology->cpukinds[id]; |
|||
int res = hwloc_bitmap_compare_inclusion(cpuset, kind->cpuset); |
|||
if (res == HWLOC_BITMAP_EQUAL || res == HWLOC_BITMAP_INCLUDED) { |
|||
return (int) id; |
|||
} else if (res == HWLOC_BITMAP_INTERSECTS || res == HWLOC_BITMAP_CONTAINS) { |
|||
errno = EXDEV; |
|||
return -1; |
|||
} |
|||
} |
|||
|
|||
errno = ENOENT; |
|||
return -1; |
|||
} |
File diff suppressed because it is too large
Loading…
Reference in new issue