forked from luck/tmp_suning_uos_patched
698b1b3064
Memory compaction can be currently performed in several contexts: - kswapd balancing a zone after a high-order allocation failure - direct compaction to satisfy a high-order allocation, including THP page fault attemps - khugepaged trying to collapse a hugepage - manually from /proc The purpose of compaction is two-fold. The obvious purpose is to satisfy a (pending or future) high-order allocation, and is easy to evaluate. The other purpose is to keep overal memory fragmentation low and help the anti-fragmentation mechanism. The success wrt the latter purpose is more The current situation wrt the purposes has a few drawbacks: - compaction is invoked only when a high-order page or hugepage is not available (or manually). This might be too late for the purposes of keeping memory fragmentation low. - direct compaction increases latency of allocations. Again, it would be better if compaction was performed asynchronously to keep fragmentation low, before the allocation itself comes. - (a special case of the previous) the cost of compaction during THP page faults can easily offset the benefits of THP. - kswapd compaction appears to be complex, fragile and not working in some scenarios. It could also end up compacting for a high-order allocation request when it should be reclaiming memory for a later order-0 request. To improve the situation, we should be able to benefit from an equivalent of kswapd, but for compaction - i.e. a background thread which responds to fragmentation and the need for high-order allocations (including hugepages) somewhat proactively. One possibility is to extend the responsibilities of kswapd, which could however complicate its design too much. It should be better to let kswapd handle reclaim, as order-0 allocations are often more critical than high-order ones. Another possibility is to extend khugepaged, but this kthread is a single instance and tied to THP configs. This patch goes with the option of a new set of per-node kthreads called kcompactd, and lays the foundations, without introducing any new tunables. The lifecycle mimics kswapd kthreads, including the memory hotplug hooks. For compaction, kcompactd uses the standard compaction_suitable() and ompact_finished() criteria and the deferred compaction functionality. Unlike direct compaction, it uses only sync compaction, as there's no allocation latency to minimize. This patch doesn't yet add a call to wakeup_kcompactd. The kswapd compact/reclaim loop for high-order pages will be replaced by waking up kcompactd in the next patch with the description of what's wrong with the old approach. Waking up of the kcompactd threads is also tied to kswapd activity and follows these rules: - we don't want to affect any fastpaths, so wake up kcompactd only from the slowpath, as it's done for kswapd - if kswapd is doing reclaim, it's more important than compaction, so don't invoke kcompactd until kswapd goes to sleep - the target order used for kswapd is passed to kcompactd Future possible future uses for kcompactd include the ability to wake up kcompactd on demand in special situations, such as when hugepages are not available (currently not done due to __GFP_NO_KSWAPD) or when a fragmentation event (i.e. __rmqueue_fallback()) occurs. It's also possible to perform periodic compaction with kcompactd. [arnd@arndb.de: fix build errors with kcompactd] [paul.gortmaker@windriver.com: don't use modular references for non modular code] Signed-off-by: Vlastimil Babka <vbabka@suse.cz> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com> Cc: Rik van Riel <riel@redhat.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Mel Gorman <mgorman@techsingularity.net> Cc: David Rientjes <rientjes@google.com> Cc: Michal Hocko <mhocko@suse.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Signed-off-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> Cc: Hugh Dickins <hughd@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
122 lines
3.7 KiB
C
122 lines
3.7 KiB
C
#ifndef _LINUX_COMPACTION_H
|
|
#define _LINUX_COMPACTION_H
|
|
|
|
/* Return values for compact_zone() and try_to_compact_pages() */
|
|
/* compaction didn't start as it was deferred due to past failures */
|
|
#define COMPACT_DEFERRED 0
|
|
/* compaction didn't start as it was not possible or direct reclaim was more suitable */
|
|
#define COMPACT_SKIPPED 1
|
|
/* compaction should continue to another pageblock */
|
|
#define COMPACT_CONTINUE 2
|
|
/* direct compaction partially compacted a zone and there are suitable pages */
|
|
#define COMPACT_PARTIAL 3
|
|
/* The full zone was compacted */
|
|
#define COMPACT_COMPLETE 4
|
|
/* For more detailed tracepoint output */
|
|
#define COMPACT_NO_SUITABLE_PAGE 5
|
|
#define COMPACT_NOT_SUITABLE_ZONE 6
|
|
#define COMPACT_CONTENDED 7
|
|
/* When adding new states, please adjust include/trace/events/compaction.h */
|
|
|
|
/* Used to signal whether compaction detected need_sched() or lock contention */
|
|
/* No contention detected */
|
|
#define COMPACT_CONTENDED_NONE 0
|
|
/* Either need_sched() was true or fatal signal pending */
|
|
#define COMPACT_CONTENDED_SCHED 1
|
|
/* Zone lock or lru_lock was contended in async compaction */
|
|
#define COMPACT_CONTENDED_LOCK 2
|
|
|
|
struct alloc_context; /* in mm/internal.h */
|
|
|
|
#ifdef CONFIG_COMPACTION
|
|
extern int sysctl_compact_memory;
|
|
extern int sysctl_compaction_handler(struct ctl_table *table, int write,
|
|
void __user *buffer, size_t *length, loff_t *ppos);
|
|
extern int sysctl_extfrag_threshold;
|
|
extern int sysctl_extfrag_handler(struct ctl_table *table, int write,
|
|
void __user *buffer, size_t *length, loff_t *ppos);
|
|
extern int sysctl_compact_unevictable_allowed;
|
|
|
|
extern int fragmentation_index(struct zone *zone, unsigned int order);
|
|
extern unsigned long try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
|
|
int alloc_flags, const struct alloc_context *ac,
|
|
enum migrate_mode mode, int *contended);
|
|
extern void compact_pgdat(pg_data_t *pgdat, int order);
|
|
extern void reset_isolation_suitable(pg_data_t *pgdat);
|
|
extern unsigned long compaction_suitable(struct zone *zone, int order,
|
|
int alloc_flags, int classzone_idx);
|
|
|
|
extern void defer_compaction(struct zone *zone, int order);
|
|
extern bool compaction_deferred(struct zone *zone, int order);
|
|
extern void compaction_defer_reset(struct zone *zone, int order,
|
|
bool alloc_success);
|
|
extern bool compaction_restarting(struct zone *zone, int order);
|
|
|
|
extern int kcompactd_run(int nid);
|
|
extern void kcompactd_stop(int nid);
|
|
extern void wakeup_kcompactd(pg_data_t *pgdat, int order, int classzone_idx);
|
|
|
|
#else
|
|
static inline unsigned long try_to_compact_pages(gfp_t gfp_mask,
|
|
unsigned int order, int alloc_flags,
|
|
const struct alloc_context *ac,
|
|
enum migrate_mode mode, int *contended)
|
|
{
|
|
return COMPACT_CONTINUE;
|
|
}
|
|
|
|
static inline void compact_pgdat(pg_data_t *pgdat, int order)
|
|
{
|
|
}
|
|
|
|
static inline void reset_isolation_suitable(pg_data_t *pgdat)
|
|
{
|
|
}
|
|
|
|
static inline unsigned long compaction_suitable(struct zone *zone, int order,
|
|
int alloc_flags, int classzone_idx)
|
|
{
|
|
return COMPACT_SKIPPED;
|
|
}
|
|
|
|
static inline void defer_compaction(struct zone *zone, int order)
|
|
{
|
|
}
|
|
|
|
static inline bool compaction_deferred(struct zone *zone, int order)
|
|
{
|
|
return true;
|
|
}
|
|
|
|
static inline int kcompactd_run(int nid)
|
|
{
|
|
return 0;
|
|
}
|
|
static inline void kcompactd_stop(int nid)
|
|
{
|
|
}
|
|
|
|
static inline void wakeup_kcompactd(pg_data_t *pgdat, int order, int classzone_idx)
|
|
{
|
|
}
|
|
|
|
#endif /* CONFIG_COMPACTION */
|
|
|
|
#if defined(CONFIG_COMPACTION) && defined(CONFIG_SYSFS) && defined(CONFIG_NUMA)
|
|
extern int compaction_register_node(struct node *node);
|
|
extern void compaction_unregister_node(struct node *node);
|
|
|
|
#else
|
|
|
|
static inline int compaction_register_node(struct node *node)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline void compaction_unregister_node(struct node *node)
|
|
{
|
|
}
|
|
#endif /* CONFIG_COMPACTION && CONFIG_SYSFS && CONFIG_NUMA */
|
|
|
|
#endif /* _LINUX_COMPACTION_H */
|