cpu.c 48.1 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1 2 3 4 5 6 7 8 9
/* CPU control.
 * (C) 2001, 2002, 2003, 2004 Rusty Russell
 *
 * This code is licenced under the GPL.
 */
#include <linux/proc_fs.h>
#include <linux/smp.h>
#include <linux/init.h>
#include <linux/notifier.h>
10
#include <linux/sched/signal.h>
11
#include <linux/sched/hotplug.h>
12
#include <linux/sched/task.h>
Linus Torvalds's avatar
Linus Torvalds committed
13 14
#include <linux/unistd.h>
#include <linux/cpu.h>
15 16
#include <linux/oom.h>
#include <linux/rcupdate.h>
17
#include <linux/export.h>
18
#include <linux/bug.h>
Linus Torvalds's avatar
Linus Torvalds committed
19 20
#include <linux/kthread.h>
#include <linux/stop_machine.h>
21
#include <linux/mutex.h>
22
#include <linux/gfp.h>
23
#include <linux/suspend.h>
24
#include <linux/lockdep.h>
25
#include <linux/tick.h>
26
#include <linux/irq.h>
27
#include <linux/nmi.h>
28
#include <linux/smpboot.h>
29
#include <linux/relay.h>
30
#include <linux/slab.h>
31
#include <linux/percpu-rwsem.h>
32

33
#include <trace/events/power.h>
34 35
#define CREATE_TRACE_POINTS
#include <trace/events/cpuhp.h>
Linus Torvalds's avatar
Linus Torvalds committed
36

37 38
#include "smpboot.h"

39 40 41 42
/**
 * cpuhp_cpu_state - Per cpu hotplug state storage
 * @state:	The current cpu state
 * @target:	The target state
43 44
 * @thread:	Pointer to the hotplug thread
 * @should_run:	Thread should execute
45
 * @rollback:	Perform a rollback
46 47 48
 * @single:	Single callback invocation
 * @bringup:	Single callback bringup or teardown selector
 * @cb_state:	The state for a single callback (install/uninstall)
49
 * @result:	Result of the operation
50 51
 * @done_up:	Signal completion to the issuer of the task for cpu-up
 * @done_down:	Signal completion to the issuer of the task for cpu-down
52 53 54 55
 */
struct cpuhp_cpu_state {
	enum cpuhp_state	state;
	enum cpuhp_state	target;
56
	enum cpuhp_state	fail;
57 58 59
#ifdef CONFIG_SMP
	struct task_struct	*thread;
	bool			should_run;
60
	bool			rollback;
61 62
	bool			single;
	bool			bringup;
63
	struct hlist_node	*node;
64
	struct hlist_node	*last;
65 66
	enum cpuhp_state	cb_state;
	int			result;
67 68
	struct completion	done_up;
	struct completion	done_down;
69
#endif
70 71
};

72 73 74
static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state) = {
	.fail = CPUHP_INVALID,
};
75

76
#if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP)
77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
static struct lockdep_map cpuhp_state_up_map =
	STATIC_LOCKDEP_MAP_INIT("cpuhp_state-up", &cpuhp_state_up_map);
static struct lockdep_map cpuhp_state_down_map =
	STATIC_LOCKDEP_MAP_INIT("cpuhp_state-down", &cpuhp_state_down_map);


static void inline cpuhp_lock_acquire(bool bringup)
{
	lock_map_acquire(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
}

static void inline cpuhp_lock_release(bool bringup)
{
	lock_map_release(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
}
#else

static void inline cpuhp_lock_acquire(bool bringup) { }
static void inline cpuhp_lock_release(bool bringup) { }

97 98
#endif

99 100 101 102 103 104 105
/**
 * cpuhp_step - Hotplug state machine step
 * @name:	Name of the step
 * @startup:	Startup function of the step
 * @teardown:	Teardown function of the step
 * @skip_onerr:	Do not invoke the functions on error rollback
 *		Will go away once the notifiers	are gone
106
 * @cant_stop:	Bringup/teardown can't be stopped at this step
107 108
 */
struct cpuhp_step {
109 110
	const char		*name;
	union {
111 112 113 114
		int		(*single)(unsigned int cpu);
		int		(*multi)(unsigned int cpu,
					 struct hlist_node *node);
	} startup;
115
	union {
116 117 118 119
		int		(*single)(unsigned int cpu);
		int		(*multi)(unsigned int cpu,
					 struct hlist_node *node);
	} teardown;
120 121 122 123
	struct hlist_head	list;
	bool			skip_onerr;
	bool			cant_stop;
	bool			multi_instance;
124 125
};

126
static DEFINE_MUTEX(cpuhp_state_mutex);
127
static struct cpuhp_step cpuhp_bp_states[];
128
static struct cpuhp_step cpuhp_ap_states[];
129

130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
static bool cpuhp_is_ap_state(enum cpuhp_state state)
{
	/*
	 * The extra check for CPUHP_TEARDOWN_CPU is only for documentation
	 * purposes as that state is handled explicitly in cpu_down.
	 */
	return state > CPUHP_BRINGUP_CPU && state != CPUHP_TEARDOWN_CPU;
}

static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
{
	struct cpuhp_step *sp;

	sp = cpuhp_is_ap_state(state) ? cpuhp_ap_states : cpuhp_bp_states;
	return sp + state;
}

147 148 149
/**
 * cpuhp_invoke_callback _ Invoke the callbacks for a given state
 * @cpu:	The cpu for which the callback should be invoked
150
 * @state:	The state to do callbacks for
151
 * @bringup:	True if the bringup callback should be invoked
152 153
 * @node:	For multi-instance, do a single entry callback for install/remove
 * @lastp:	For multi-instance rollback, remember how far we got
154
 *
155
 * Called from cpu hotplug and from the state register machinery.
156
 */
157
static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
158 159
				 bool bringup, struct hlist_node *node,
				 struct hlist_node **lastp)
160 161
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
162
	struct cpuhp_step *step = cpuhp_get_step(state);
163 164 165 166
	int (*cbm)(unsigned int cpu, struct hlist_node *node);
	int (*cb)(unsigned int cpu);
	int ret, cnt;

167 168 169 170 171 172 173 174 175
	if (st->fail == state) {
		st->fail = CPUHP_INVALID;

		if (!(bringup ? step->startup.single : step->teardown.single))
			return 0;

		return -EAGAIN;
	}

176
	if (!step->multi_instance) {
177
		WARN_ON_ONCE(lastp && *lastp);
178
		cb = bringup ? step->startup.single : step->teardown.single;
179 180
		if (!cb)
			return 0;
181
		trace_cpuhp_enter(cpu, st->target, state, cb);
182
		ret = cb(cpu);
183
		trace_cpuhp_exit(cpu, st->state, state, ret);
184 185
		return ret;
	}
186
	cbm = bringup ? step->startup.multi : step->teardown.multi;
187 188 189 190 191
	if (!cbm)
		return 0;

	/* Single invocation for instance add/remove */
	if (node) {
192
		WARN_ON_ONCE(lastp && *lastp);
193 194 195 196 197 198 199 200 201
		trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
		ret = cbm(cpu, node);
		trace_cpuhp_exit(cpu, st->state, state, ret);
		return ret;
	}

	/* State transition. Invoke on all instances */
	cnt = 0;
	hlist_for_each(node, &step->list) {
202 203 204
		if (lastp && node == *lastp)
			break;

205 206 207
		trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
		ret = cbm(cpu, node);
		trace_cpuhp_exit(cpu, st->state, state, ret);
208 209 210 211 212 213 214
		if (ret) {
			if (!lastp)
				goto err;

			*lastp = node;
			return ret;
		}
215 216
		cnt++;
	}
217 218
	if (lastp)
		*lastp = NULL;
219 220 221
	return 0;
err:
	/* Rollback the instances if one failed */
222
	cbm = !bringup ? step->startup.multi : step->teardown.multi;
223 224 225 226 227 228
	if (!cbm)
		return ret;

	hlist_for_each(node, &step->list) {
		if (!cnt--)
			break;
229 230 231 232 233 234 235 236

		trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
		ret = cbm(cpu, node);
		trace_cpuhp_exit(cpu, st->state, state, ret);
		/*
		 * Rollback must not fail,
		 */
		WARN_ON_ONCE(ret);
237 238 239 240
	}
	return ret;
}

241
#ifdef CONFIG_SMP
242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261
static inline void wait_for_ap_thread(struct cpuhp_cpu_state *st, bool bringup)
{
	struct completion *done = bringup ? &st->done_up : &st->done_down;
	wait_for_completion(done);
}

static inline void complete_ap_thread(struct cpuhp_cpu_state *st, bool bringup)
{
	struct completion *done = bringup ? &st->done_up : &st->done_down;
	complete(done);
}

/*
 * The former STARTING/DYING states, ran with IRQs disabled and must not fail.
 */
static bool cpuhp_is_atomic_state(enum cpuhp_state state)
{
	return CPUHP_AP_IDLE_DEAD <= state && state < CPUHP_AP_ONLINE;
}

262
/* Serializes the updates to cpu_online_mask, cpu_present_mask */
263
static DEFINE_MUTEX(cpu_add_remove_lock);
264 265
bool cpuhp_tasks_frozen;
EXPORT_SYMBOL_GPL(cpuhp_tasks_frozen);
Linus Torvalds's avatar
Linus Torvalds committed
266

267
/*
268 269
 * The following two APIs (cpu_maps_update_begin/done) must be used when
 * attempting to serialize the updates to cpu_online_mask & cpu_present_mask.
270 271 272 273 274 275 276 277 278 279
 */
void cpu_maps_update_begin(void)
{
	mutex_lock(&cpu_add_remove_lock);
}

void cpu_maps_update_done(void)
{
	mutex_unlock(&cpu_add_remove_lock);
}
Linus Torvalds's avatar
Linus Torvalds committed
280

281 282
/*
 * If set, cpu_up and cpu_down will return -EBUSY and do nothing.
283 284 285 286
 * Should always be manipulated under cpu_add_remove_lock
 */
static int cpu_hotplug_disabled;

287 288
#ifdef CONFIG_HOTPLUG_CPU

289
DEFINE_STATIC_PERCPU_RWSEM(cpu_hotplug_lock);
290

291
void cpus_read_lock(void)
292
{
293
	percpu_down_read(&cpu_hotplug_lock);
294
}
295
EXPORT_SYMBOL_GPL(cpus_read_lock);
296

297
void cpus_read_unlock(void)
298
{
299
	percpu_up_read(&cpu_hotplug_lock);
300
}
301
EXPORT_SYMBOL_GPL(cpus_read_unlock);
302

303
void cpus_write_lock(void)
304
{
305
	percpu_down_write(&cpu_hotplug_lock);
306
}
307

308
void cpus_write_unlock(void)
309
{
310
	percpu_up_write(&cpu_hotplug_lock);
311 312
}

313
void lockdep_assert_cpus_held(void)
314
{
315
	percpu_rwsem_assert_held(&cpu_hotplug_lock);
316
}
317

318 319 320 321 322 323 324 325 326 327
/*
 * Wait for currently running CPU hotplug operations to complete (if any) and
 * disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects
 * the 'cpu_hotplug_disabled' flag. The same lock is also acquired by the
 * hotplug path before performing hotplug operations. So acquiring that lock
 * guarantees mutual exclusion from any currently running hotplug operations.
 */
void cpu_hotplug_disable(void)
{
	cpu_maps_update_begin();
328
	cpu_hotplug_disabled++;
329 330
	cpu_maps_update_done();
}
331
EXPORT_SYMBOL_GPL(cpu_hotplug_disable);
332

333 334 335 336 337 338 339
static void __cpu_hotplug_enable(void)
{
	if (WARN_ONCE(!cpu_hotplug_disabled, "Unbalanced cpu hotplug enable\n"))
		return;
	cpu_hotplug_disabled--;
}

340 341 342
void cpu_hotplug_enable(void)
{
	cpu_maps_update_begin();
343
	__cpu_hotplug_enable();
344 345
	cpu_maps_update_done();
}
346
EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
347
#endif	/* CONFIG_HOTPLUG_CPU */
348

349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397
static inline enum cpuhp_state
cpuhp_set_state(struct cpuhp_cpu_state *st, enum cpuhp_state target)
{
	enum cpuhp_state prev_state = st->state;

	st->rollback = false;
	st->last = NULL;

	st->target = target;
	st->single = false;
	st->bringup = st->state < target;

	return prev_state;
}

static inline void
cpuhp_reset_state(struct cpuhp_cpu_state *st, enum cpuhp_state prev_state)
{
	st->rollback = true;

	/*
	 * If we have st->last we need to undo partial multi_instance of this
	 * state first. Otherwise start undo at the previous state.
	 */
	if (!st->last) {
		if (st->bringup)
			st->state--;
		else
			st->state++;
	}

	st->target = prev_state;
	st->bringup = !st->bringup;
}

/* Regular hotplug invocation of the AP hotplug thread */
static void __cpuhp_kick_ap(struct cpuhp_cpu_state *st)
{
	if (!st->single && st->state == st->target)
		return;

	st->result = 0;
	/*
	 * Make sure the above stores are visible before should_run becomes
	 * true. Paired with the mb() above in cpuhp_thread_fun()
	 */
	smp_mb();
	st->should_run = true;
	wake_up_process(st->thread);
398
	wait_for_ap_thread(st, st->bringup);
399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414
}

static int cpuhp_kick_ap(struct cpuhp_cpu_state *st, enum cpuhp_state target)
{
	enum cpuhp_state prev_state;
	int ret;

	prev_state = cpuhp_set_state(st, target);
	__cpuhp_kick_ap(st);
	if ((ret = st->result)) {
		cpuhp_reset_state(st, prev_state);
		__cpuhp_kick_ap(st);
	}

	return ret;
}
415

416 417 418 419
static int bringup_wait_for_ap(unsigned int cpu)
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);

420
	/* Wait for the CPU to reach CPUHP_AP_ONLINE_IDLE */
421
	wait_for_ap_thread(st, true);
422 423
	if (WARN_ON_ONCE((!cpu_online(cpu))))
		return -ECANCELED;
424 425 426 427 428

	/* Unpark the stopper thread and the hotplug thread of the target cpu */
	stop_machine_unpark(cpu);
	kthread_unpark(st->thread);

429 430 431 432
	if (st->target <= CPUHP_AP_ONLINE_IDLE)
		return 0;

	return cpuhp_kick_ap(st, st->target);
433 434
}

435 436 437 438 439
static int bringup_cpu(unsigned int cpu)
{
	struct task_struct *idle = idle_thread_get(cpu);
	int ret;

440 441 442 443 444 445 446
	/*
	 * Some architectures have to walk the irq descriptors to
	 * setup the vector space for the cpu which comes online.
	 * Prevent irq alloc/free across the bringup.
	 */
	irq_lock_sparse();

447 448
	/* Arch-specific enabling code. */
	ret = __cpu_up(cpu, idle);
449
	irq_unlock_sparse();
450
	if (ret)
451
		return ret;
452
	return bringup_wait_for_ap(cpu);
453 454
}

455 456 457 458
/*
 * Hotplug state machine related functions
 */

459
static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st)
460 461
{
	for (st->state--; st->state > st->target; st->state--) {
462
		struct cpuhp_step *step = cpuhp_get_step(st->state);
463 464

		if (!step->skip_onerr)
465
			cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
466 467 468 469
	}
}

static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
470
			      enum cpuhp_state target)
471 472 473 474 475 476
{
	enum cpuhp_state prev_state = st->state;
	int ret = 0;

	while (st->state < target) {
		st->state++;
477
		ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
478 479
		if (ret) {
			st->target = prev_state;
480
			undo_cpu_up(cpu, st);
481 482 483 484 485 486
			break;
		}
	}
	return ret;
}

487 488 489 490 491 492 493
/*
 * The cpu hotplug threads manage the bringup and teardown of the cpus
 */
static void cpuhp_create(unsigned int cpu)
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);

494 495
	init_completion(&st->done_up);
	init_completion(&st->done_down);
496 497 498 499 500 501 502 503 504 505 506 507
}

static int cpuhp_should_run(unsigned int cpu)
{
	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);

	return st->should_run;
}

/*
 * Execute teardown/startup callbacks on the plugged cpu. Also used to invoke
 * callbacks when a state gets [un]installed at runtime.
508 509 510 511 512 513 514 515 516 517
 *
 * Each invocation of this function by the smpboot thread does a single AP
 * state callback.
 *
 * It has 3 modes of operation:
 *  - single: runs st->cb_state
 *  - up:     runs ++st->state, while st->state < st->target
 *  - down:   runs st->state--, while st->state > st->target
 *
 * When complete or on error, should_run is cleared and the completion is fired.
518 519 520 521
 */
static void cpuhp_thread_fun(unsigned int cpu)
{
	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
522 523
	bool bringup = st->bringup;
	enum cpuhp_state state;
524 525

	/*
526 527
	 * ACQUIRE for the cpuhp_should_run() load of ->should_run. Ensures
	 * that if we see ->should_run we also see the rest of the state.
528 529 530
	 */
	smp_mb();

531 532
	if (WARN_ON_ONCE(!st->should_run))
		return;
533

534
	cpuhp_lock_acquire(bringup);
535

536
	if (st->single) {
537 538 539 540 541 542 543 544
		state = st->cb_state;
		st->should_run = false;
	} else {
		if (bringup) {
			st->state++;
			state = st->state;
			st->should_run = (st->state < st->target);
			WARN_ON_ONCE(st->state > st->target);
545
		} else {
546 547 548 549
			state = st->state;
			st->state--;
			st->should_run = (st->state > st->target);
			WARN_ON_ONCE(st->state < st->target);
550
		}
551 552 553 554 555 556 557 558 559 560 561 562 563 564
	}

	WARN_ON_ONCE(!cpuhp_is_ap_state(state));

	if (st->rollback) {
		struct cpuhp_step *step = cpuhp_get_step(state);
		if (step->skip_onerr)
			goto next;
	}

	if (cpuhp_is_atomic_state(state)) {
		local_irq_disable();
		st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last);
		local_irq_enable();
565

566 567 568 569
		/*
		 * STARTING/DYING must not fail!
		 */
		WARN_ON_ONCE(st->result);
570
	} else {
571 572 573 574 575 576 577 578 579 580 581
		st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last);
	}

	if (st->result) {
		/*
		 * If we fail on a rollback, we're up a creek without no
		 * paddle, no way forward, no way back. We loose, thanks for
		 * playing.
		 */
		WARN_ON_ONCE(st->rollback);
		st->should_run = false;
582
	}
583 584

next:
585
	cpuhp_lock_release(bringup);
586 587

	if (!st->should_run)
588
		complete_ap_thread(st, bringup);
589 590 591
}

/* Invoke a single callback on a remote cpu */
592
static int
593 594
cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup,
			 struct hlist_node *node)
595 596
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
597
	int ret;
598 599 600 601

	if (!cpu_online(cpu))
		return 0;

602 603 604 605 606
	cpuhp_lock_acquire(false);
	cpuhp_lock_release(false);

	cpuhp_lock_acquire(true);
	cpuhp_lock_release(true);
607

608 609 610 611 612
	/*
	 * If we are up and running, use the hotplug thread. For early calls
	 * we invoke the thread function directly.
	 */
	if (!st->thread)
613
		return cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
614

615 616 617 618 619
	st->rollback = false;
	st->last = NULL;

	st->node = node;
	st->bringup = bringup;
620
	st->cb_state = state;
621 622
	st->single = true;

623
	__cpuhp_kick_ap(st);
624 625

	/*
626
	 * If we failed and did a partial, do a rollback.
627
	 */
628 629 630 631 632 633 634
	if ((ret = st->result) && st->last) {
		st->rollback = true;
		st->bringup = !bringup;

		__cpuhp_kick_ap(st);
	}

635 636 637 638 639
	/*
	 * Clean up the leftovers so the next hotplug operation wont use stale
	 * data.
	 */
	st->node = st->last = NULL;
640
	return ret;
641 642 643 644 645
}

static int cpuhp_kick_ap_work(unsigned int cpu)
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
646 647
	enum cpuhp_state prev_state = st->state;
	int ret;
648

649 650 651 652 653
	cpuhp_lock_acquire(false);
	cpuhp_lock_release(false);

	cpuhp_lock_acquire(true);
	cpuhp_lock_release(true);
654 655 656 657 658 659

	trace_cpuhp_enter(cpu, st->target, prev_state, cpuhp_kick_ap_work);
	ret = cpuhp_kick_ap(st, st->target);
	trace_cpuhp_exit(cpu, st->state, prev_state, ret);

	return ret;
660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676
}

static struct smp_hotplug_thread cpuhp_threads = {
	.store			= &cpuhp_state.thread,
	.create			= &cpuhp_create,
	.thread_should_run	= cpuhp_should_run,
	.thread_fn		= cpuhp_thread_fun,
	.thread_comm		= "cpuhp/%u",
	.selfparking		= true,
};

void __init cpuhp_threads_init(void)
{
	BUG_ON(smpboot_register_percpu_thread(&cpuhp_threads));
	kthread_unpark(this_cpu_read(cpuhp_state.thread));
}

677
#ifdef CONFIG_HOTPLUG_CPU
678 679 680 681 682 683 684 685 686 687 688 689
/**
 * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU
 * @cpu: a CPU id
 *
 * This function walks all processes, finds a valid mm struct for each one and
 * then clears a corresponding bit in mm's cpumask.  While this all sounds
 * trivial, there are various non-obvious corner cases, which this function
 * tries to solve in a safe manner.
 *
 * Also note that the function uses a somewhat relaxed locking scheme, so it may
 * be called only for an already offlined CPU.
 */
690 691 692 693 694 695 696 697 698 699 700
void clear_tasks_mm_cpumask(int cpu)
{
	struct task_struct *p;

	/*
	 * This function is called after the cpu is taken down and marked
	 * offline, so its not like new tasks will ever get this cpu set in
	 * their mm mask. -- Peter Zijlstra
	 * Thus, we may use rcu_read_lock() here, instead of grabbing
	 * full-fledged tasklist_lock.
	 */
701
	WARN_ON(cpu_online(cpu));
702 703 704 705
	rcu_read_lock();
	for_each_process(p) {
		struct task_struct *t;

706 707 708 709
		/*
		 * Main thread might exit, but other threads may still have
		 * a valid mm. Find one.
		 */
710 711 712 713 714 715 716 717 718
		t = find_lock_task_mm(p);
		if (!t)
			continue;
		cpumask_clear_cpu(cpu, mm_cpumask(t->mm));
		task_unlock(t);
	}
	rcu_read_unlock();
}

Linus Torvalds's avatar
Linus Torvalds committed
719
/* Take this CPU down. */
720
static int take_cpu_down(void *_param)
Linus Torvalds's avatar
Linus Torvalds committed
721
{
722 723
	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
	enum cpuhp_state target = max((int)st->target, CPUHP_AP_OFFLINE);
724
	int err, cpu = smp_processor_id();
725
	int ret;
Linus Torvalds's avatar
Linus Torvalds committed
726 727 728 729

	/* Ensure this CPU doesn't handle any more interrupts. */
	err = __cpu_disable();
	if (err < 0)
Zwane Mwaikambo's avatar
Zwane Mwaikambo committed
730
		return err;
Linus Torvalds's avatar
Linus Torvalds committed
731

732 733 734 735 736 737
	/*
	 * We get here while we are in CPUHP_TEARDOWN_CPU state and we must not
	 * do this step again.
	 */
	WARN_ON(st->state != CPUHP_TEARDOWN_CPU);
	st->state--;
738
	/* Invoke the former CPU_DYING callbacks */
739 740 741 742 743 744 745
	for (; st->state > target; st->state--) {
		ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
		/*
		 * DYING must not fail!
		 */
		WARN_ON_ONCE(ret);
	}
746

747 748
	/* Give up timekeeping duties */
	tick_handover_do_timer();
749
	/* Park the stopper thread */
750
	stop_machine_park(cpu);
Zwane Mwaikambo's avatar
Zwane Mwaikambo committed
751
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
752 753
}

754
static int takedown_cpu(unsigned int cpu)
Linus Torvalds's avatar
Linus Torvalds committed
755
{
756
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
757
	int err;
Linus Torvalds's avatar
Linus Torvalds committed
758

759
	/* Park the smpboot threads */
760
	kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread);
761
	smpboot_park_threads(cpu);
762

763
	/*
764 765
	 * Prevent irq alloc/free while the dying cpu reorganizes the
	 * interrupt affinities.
766
	 */
767
	irq_lock_sparse();
768

769 770 771
	/*
	 * So now all preempt/rcu users must observe !cpu_active().
	 */
772
	err = stop_machine_cpuslocked(take_cpu_down, NULL, cpumask_of(cpu));
773
	if (err) {
774
		/* CPU refused to die */
775
		irq_unlock_sparse();
776 777
		/* Unpark the hotplug thread so we can rollback there */
		kthread_unpark(per_cpu_ptr(&cpuhp_state, cpu)->thread);
778
		return err;
779
	}
780
	BUG_ON(cpu_online(cpu));
Linus Torvalds's avatar
Linus Torvalds committed
781

782
	/*
783
	 * The CPUHP_AP_SCHED_MIGRATE_DYING callback will have removed all
784 785
	 * runnable tasks from the cpu, there's only the idle task left now
	 * that the migration thread is done doing the stop_machine thing.
Peter Zijlstra's avatar
Peter Zijlstra committed
786 787
	 *
	 * Wait for the stop thread to go away.
788
	 */
789
	wait_for_ap_thread(st, false);
790
	BUG_ON(st->state != CPUHP_AP_IDLE_DEAD);
Linus Torvalds's avatar
Linus Torvalds committed
791

792 793 794
	/* Interrupts are moved away from the dying cpu, reenable alloc/free */
	irq_unlock_sparse();

795
	hotplug_cpu__broadcast_tick_pull(cpu);
Linus Torvalds's avatar
Linus Torvalds committed
796 797 798
	/* This actually kills the CPU. */
	__cpu_die(cpu);

799
	tick_cleanup_dead_cpu(cpu);
800
	rcutree_migrate_callbacks(cpu);
801 802
	return 0;
}
Linus Torvalds's avatar
Linus Torvalds committed
803

804 805 806 807
static void cpuhp_complete_idle_dead(void *arg)
{
	struct cpuhp_cpu_state *st = arg;

808
	complete_ap_thread(st, false);
809 810
}

811 812 813 814 815
void cpuhp_report_idle_dead(void)
{
	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);

	BUG_ON(st->state != CPUHP_AP_OFFLINE);
816
	rcu_report_dead(smp_processor_id());
817 818 819 820 821 822 823
	st->state = CPUHP_AP_IDLE_DEAD;
	/*
	 * We cannot call complete after rcu_report_dead() so we delegate it
	 * to an online cpu.
	 */
	smp_call_function_single(cpumask_first(cpu_online_mask),
				 cpuhp_complete_idle_dead, st, 0);
824 825
}

826 827 828 829
static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st)
{
	for (st->state++; st->state < st->target; st->state++) {
		struct cpuhp_step *step = cpuhp_get_step(st->state);
830

831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851
		if (!step->skip_onerr)
			cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
	}
}

static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
				enum cpuhp_state target)
{
	enum cpuhp_state prev_state = st->state;
	int ret = 0;

	for (; st->state > target; st->state--) {
		ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
		if (ret) {
			st->target = prev_state;
			undo_cpu_down(cpu, st);
			break;
		}
	}
	return ret;
}
852

853
/* Requires cpu_add_remove_lock to be held */
854 855
static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
			   enum cpuhp_state target)
856
{
857 858
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
	int prev_state, ret = 0;
859 860 861 862

	if (num_online_cpus() == 1)
		return -EBUSY;

863
	if (!cpu_present(cpu))
864 865
		return -EINVAL;

866
	cpus_write_lock();
867 868 869

	cpuhp_tasks_frozen = tasks_frozen;

870
	prev_state = cpuhp_set_state(st, target);
871 872 873 874
	/*
	 * If the current CPU state is in the range of the AP hotplug thread,
	 * then we need to kick the thread.
	 */
875
	if (st->state > CPUHP_TEARDOWN_CPU) {
876
		st->target = max((int)target, CPUHP_TEARDOWN_CPU);
877 878 879 880 881 882 883 884 885 886 887 888
		ret = cpuhp_kick_ap_work(cpu);
		/*
		 * The AP side has done the error rollback already. Just
		 * return the error code..
		 */
		if (ret)
			goto out;

		/*
		 * We might have stopped still in the range of the AP hotplug
		 * thread. Nothing to do anymore.
		 */
889
		if (st->state > CPUHP_TEARDOWN_CPU)
890
			goto out;
891 892

		st->target = target;
893 894
	}
	/*
895
	 * The AP brought itself down to CPUHP_TEARDOWN_CPU. So we need
896 897
	 * to do the further cleanups.
	 */
898
	ret = cpuhp_down_callbacks(cpu, st, target);
899
	if (ret && st->state > CPUHP_TEARDOWN_CPU && st->state < prev_state) {
900 901
		cpuhp_reset_state(st, prev_state);
		__cpuhp_kick_ap(st);
902
	}
903

904
out:
905
	cpus_write_unlock();
906 907 908 909 910
	/*
	 * Do post unplug cleanup. This is still protected against
	 * concurrent CPU hotplug via cpu_add_remove_lock.
	 */
	lockup_detector_cleanup();
911
	return ret;
912 913
}

914
static int do_cpu_down(unsigned int cpu, enum cpuhp_state target)
915
{
916
	int err;
917

918
	cpu_maps_update_begin();
919 920

	if (cpu_hotplug_disabled) {
921
		err = -EBUSY;
922 923 924
		goto out;
	}

925
	err = _cpu_down(cpu, 0, target);
926

927
out:
928
	cpu_maps_update_done();
Linus Torvalds's avatar
Linus Torvalds committed
929 930
	return err;
}
931

932 933 934 935
int cpu_down(unsigned int cpu)
{
	return do_cpu_down(cpu, CPUHP_OFFLINE);
}
936
EXPORT_SYMBOL(cpu_down);
937 938 939

#else
#define takedown_cpu		NULL
Linus Torvalds's avatar
Linus Torvalds committed
940 941
#endif /*CONFIG_HOTPLUG_CPU*/

942
/**
943
 * notify_cpu_starting(cpu) - Invoke the callbacks on the starting CPU
944 945 946 947 948 949 950 951 952
 * @cpu: cpu that just started
 *
 * It must be called by the arch code on the new cpu, before the new cpu
 * enables interrupts and before the "boot" cpu returns from __cpu_up().
 */
void notify_cpu_starting(unsigned int cpu)
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
	enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE);
953
	int ret;
954

955
	rcu_cpu_starting(cpu);	/* Enables RCU usage on this CPU. */
956 957
	while (st->state < target) {
		st->state++;
958 959 960 961 962
		ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
		/*
		 * STARTING must not fail!
		 */
		WARN_ON_ONCE(ret);
963 964 965
	}
}

966
/*
967 968 969
 * Called from the idle task. Wake up the controlling task which brings the
 * stopper and the hotplug thread of the upcoming CPU up and then delegates
 * the rest of the online bringup to the hotplug thread.
970
 */
971
void cpuhp_online_idle(enum cpuhp_state state)
972
{
973 974 975 976 977 978 979
	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);

	/* Happens for the boot cpu */
	if (state != CPUHP_AP_ONLINE_IDLE)
		return;

	st->state = CPUHP_AP_ONLINE_IDLE;
980
	complete_ap_thread(st, true);
981 982
}

983
/* Requires cpu_add_remove_lock to be held */
984
static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
Linus Torvalds's avatar
Linus Torvalds committed
985
{
986
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
987
	struct task_struct *idle;
988
	int ret = 0;
Linus Torvalds's avatar
Linus Torvalds committed
989

990
	cpus_write_lock();
991

992
	if (!cpu_present(cpu)) {
993 994 995 996
		ret = -EINVAL;
		goto out;
	}

997 998 999 1000 1001
	/*
	 * The caller of do_cpu_up might have raced with another
	 * caller. Ignore it for now.
	 */
	if (st->state >= target)
1002
		goto out;
1003 1004 1005 1006 1007 1008 1009 1010

	if (st->state == CPUHP_OFFLINE) {
		/* Let it fail before we try to bring the cpu up */
		idle = idle_thread_get(cpu);
		if (IS_ERR(idle)) {
			ret = PTR_ERR(idle);
			goto out;
		}
1011
	}
1012

1013 1014
	cpuhp_tasks_frozen = tasks_frozen;

1015
	cpuhp_set_state(st, target);
1016 1017 1018 1019
	/*
	 * If the current CPU state is in the range of the AP hotplug thread,
	 * then we need to kick the thread once more.
	 */
1020
	if (st->state > CPUHP_BRINGUP_CPU) {
1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031
		ret = cpuhp_kick_ap_work(cpu);
		/*
		 * The AP side has done the error rollback already. Just
		 * return the error code..
		 */
		if (ret)
			goto out;
	}

	/*
	 * Try to reach the target state. We max out on the BP at
1032
	 * CPUHP_BRINGUP_CPU. After that the AP hotplug thread is
1033 1034
	 * responsible for bringing it up to the target state.
	 */
1035
	target = min((int)target, CPUHP_BRINGUP_CPU);
1036
	ret = cpuhp_up_callbacks(cpu, st, target);
1037
out:
1038
	cpus_write_unlock();
1039 1040 1041
	return ret;
}

1042
static int do_cpu_up(unsigned int cpu, enum cpuhp_state target)
1043 1044
{
	int err = 0;
1045

Rusty Russell's avatar
Rusty Russell committed
1046
	if (!cpu_possible(cpu)) {
1047 1048
		pr_err("can't online cpu %d because it is not configured as may-hotadd at boot time\n",
		       cpu);