[PATCH] steal the task stack during task sleeping

huang ying huang.ying.caritas at gmail.com
Sun Sep 24 01:52:12 CDT 2006


Hi all,

This is a patch to steal the task stack during task sleeping. The scheme
is as follow:

1. The virtual address area of task stacks is moved to area before area
   used by vmalloc.
2. The task stack allocation is reimplemented as allocate a page and map the
   page into task stack virtual address area.
3. When task is scheduled out, if the stack space used is fairly small
   (less than 1/8 PAGE_SIZE), the contents of stack will be copied into a
   new allocated small memory, and the stack itself will be freeed.
4. When task is scheduled in or page fault occurs for address in stack,
   a new page will be allocated and the original contents will be restored.

This patch is for x86 only now. I have tested it on my notebook, and it runs
fine with some performance penalty.

Hope it useful for someone.
Any comment is welcome.

Best Regards,
Huang Ying

diffstat:
 arch/i386/kernel/process.c     |   40 +++++++++++++++++++++++++++++
 arch/i386/mm/fault.c           |   11 ++++++++
 arch/i386/mm/init.c            |    4 ++
 include/asm-i386/pgtable.h     |    5 +++
 include/asm-i386/thread_info.h |   49 +++++++++++++++++++++++++++++++++++
 init/Kconfig                   |    9 ++++++
 kernel/fork.c                  |   11 ++++++++
 kernel/pid.c                   |    6 ++++
 kernel/sched.c                 |    8 +++++
 mm/vmalloc.c                   |   56 +++++++++++++++++++++++++++++++++++++++++
 10 files changed, 199 insertions(+)
diff -urNp linux-2.6.17.2/arch/i386/kernel/process.c
linux-2.6.17.2-ts/arch/i386/kernel/process.c
--- linux-2.6.17.2/arch/i386/kernel/process.c	2006-06-30
08:17:23.000000000 +0800
+++ linux-2.6.17.2-ts/arch/i386/kernel/process.c	2006-09-24
10:59:55.000000000 +0800
@@ -902,3 +902,43 @@ unsigned long arch_align_stack(unsigned
 		sp -= get_random_int() % 8192;
 	return sp & ~0xf;
 }
+
+#ifdef CONFIG_TASK_STACK_STEAL
+
+void task_stack_realize(unsigned long addr);
+void task_stack_virtualize(unsigned long addr);
+
+void _task_stack_reclaim(task_t *task)
+{
+	unsigned long addr;
+	int stack_depth;
+
+	addr = task->thread.esp & PAGE_MASK;
+	stack_depth = PAGE_SIZE - (task->thread.esp & ~PAGE_MASK);
+	task->thread_info->flags &= ~_TIF_TS_VIRTUAL;
+	task_stack_realize(addr);
+	memcpy((void *)addr, (void *)task->thread_info,
+	       sizeof(struct thread_info));
+	memcpy((void *)task->thread.esp,
+	       (char *)(task->thread_info + 1), stack_depth);
+	kfree(task->thread_info);
+	task->thread_info = (struct thread_info *)addr;
+}
+
+void _task_stack_steal(task_t *task)
+{
+	struct thread_info *ti;
+	int stack_depth;
+
+	stack_depth = PAGE_SIZE - (task->thread.esp & ~PAGE_MASK);
+	ti = kmalloc(stack_depth + sizeof(struct thread_info), GFP_KERNEL);
+	memcpy((void *)ti, task->thread_info,
+	       sizeof(struct thread_info));
+	memcpy((char *)(ti + 1),
+	       (void *)(task->thread.esp), stack_depth);
+	task_stack_virtualize((unsigned long)task->thread_info);
+	task->thread_info = ti;
+	task->thread_info->flags |= _TIF_TS_VIRTUAL;
+}
+
+#endif
diff -urNp linux-2.6.17.2/arch/i386/mm/fault.c
linux-2.6.17.2-ts/arch/i386/mm/fault.c
--- linux-2.6.17.2/arch/i386/mm/fault.c	2006-06-30 08:17:23.000000000 +0800
+++ linux-2.6.17.2-ts/arch/i386/mm/fault.c	2006-09-24 10:52:09.000000000 +0800
@@ -321,6 +321,17 @@ fastcall void __kprobes do_page_fault(st
 	if (unlikely(address >= TASK_SIZE)) {
 		if (!(error_code & 0x0000000d) && vmalloc_fault(address) >= 0)
 			return;
+#ifdef CONFIG_TASK_STACK_STEAL
+		if (address >= TS_AREA_START && \
+		    address < TS_AREA_START + TS_AREA_SIZE) {
+			int pid;
+			struct task_struct *task;
+			pid = (address - TS_AREA_START) / PAGE_SIZE;
+			task = find_task_by_pid(pid);
+			task_stack_reclaim(task);
+			return;
+		}
+#endif
 		if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
 						SIGSEGV) == NOTIFY_STOP)
 			return;
diff -urNp linux-2.6.17.2/arch/i386/mm/init.c
linux-2.6.17.2-ts/arch/i386/mm/init.c
--- linux-2.6.17.2/arch/i386/mm/init.c	2006-06-30 08:17:23.000000000 +0800
+++ linux-2.6.17.2-ts/arch/i386/mm/init.c	2006-09-24 11:01:06.000000000 +0800
@@ -597,6 +597,10 @@ void __init mem_init(void)
 	high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
 #endif

+#ifdef CONFIG_TASK_STACK_STEAL
+	vmalloc_earlyreserve = 2 * TS_AREA_SIZE;
+#endif
+
 	/* this will put all low memory onto the freelists */
 	totalram_pages += free_all_bootmem();

diff -urNp linux-2.6.17.2/include/asm-i386/pgtable.h
linux-2.6.17.2-ts/include/asm-i386/pgtable.h
--- linux-2.6.17.2/include/asm-i386/pgtable.h	2006-06-30
08:17:23.000000000 +0800
+++ linux-2.6.17.2-ts/include/asm-i386/pgtable.h	2006-09-24
11:01:41.000000000 +0800
@@ -88,6 +88,11 @@ void paging_init(void);
 # define VMALLOC_END	(FIXADDR_START-2*PAGE_SIZE)
 #endif

+#ifdef CONFIG_TASK_STACK_STEAL
+#define TS_AREA_SIZE	(PMD_SIZE)
+#define TS_AREA_START	((VMALLOC_START - TS_AREA_SIZE) & ~(TS_AREA_SIZE - 1))
+#endif
+
 /*
  * _PAGE_PSE set in the page directory entry just means that
  * the page directory entry points directly to a 4MB-aligned block of
diff -urNp linux-2.6.17.2/include/asm-i386/thread_info.h
linux-2.6.17.2-ts/include/asm-i386/thread_info.h
--- linux-2.6.17.2/include/asm-i386/thread_info.h	2006-06-30
08:17:23.000000000 +0800
+++ linux-2.6.17.2-ts/include/asm-i386/thread_info.h	2006-09-24
11:28:18.000000000 +0800
@@ -95,6 +95,47 @@ static inline struct thread_info *curren
 /* how to get the current stack pointer from C */
 register unsigned long current_stack_pointer asm("esp") __attribute_used__;

+#ifdef CONFIG_TASK_STACK_STEAL
+
+void *task_stack_alloc_page(struct task_struct *task);
+void task_stack_free_page(unsigned long addr);
+
+/* thread information allocation */
+#ifdef CONFIG_DEBUG_STACK_USAGE
+#define alloc_thread_info(tsk)					\
+	({							\
+		struct thread_info *ret;			\
+								\
+		ret = task_stack_alloc_page(tsk);		\
+		if (ret)					\
+			memset(ret, 0, PAGE_SIZE);		\
+		ret;						\
+	})
+#else
+#define alloc_thread_info(tsk) task_stack_alloc_page(tsk)
+#endif
+
+#define free_thread_info(info)	task_stack_free_page((unsigned long)info)
+
+void _task_stack_reclaim(struct task_struct *task);
+void _task_stack_steal(struct task_struct *task);
+
+#define task_stack_reclaim(tsk)					\
+	do {							\
+		if (tsk->thread_info->flags & _TIF_TS_VIRTUAL)	\
+			_task_stack_reclaim(tsk);		\
+	} while (0)
+
+#define task_stack_steal(tsk)					\
+	do {							\
+		if (PAGE_SIZE - (prev->thread.esp & ~PAGE_MASK) + \
+			sizeof(struct thread_info) < PAGE_SIZE / 8 && \
+			tsk->pid)				\
+			_task_stack_steal(tsk);			\
+	} while (0)
+
+#else /* CONFIG_TASK_STACK_STEAL */
+
 /* thread information allocation */
 #ifdef CONFIG_DEBUG_STACK_USAGE
 #define alloc_thread_info(tsk)					\
@@ -112,6 +153,8 @@ register unsigned long current_stack_poi

 #define free_thread_info(info)	kfree(info)

+#endif /* CONFIG_TASK_STACK_STEAL */
+
 #else /* !__ASSEMBLY__ */

 /* how to get the thread information struct from ASM */
@@ -143,6 +186,9 @@ register unsigned long current_stack_poi
 #define TIF_RESTORE_SIGMASK	9	/* restore signal mask in do_signal() */
 #define TIF_POLLING_NRFLAG	16	/* true if poll_idle() is polling
TIF_NEED_RESCHED */
 #define TIF_MEMDIE		17
+#ifdef CONFIG_TASK_STACK_STEAL
+#define TIF_TS_VIRTUAL		18
+#endif

 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
@@ -155,6 +201,9 @@ register unsigned long current_stack_poi
 #define _TIF_SECCOMP		(1<<TIF_SECCOMP)
 #define _TIF_RESTORE_SIGMASK	(1<<TIF_RESTORE_SIGMASK)
 #define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
+#ifdef CONFIG_TASK_STACK_STEAL
+#define _TIF_TS_VIRTUAL		(1<<TIF_TS_VIRTUAL)
+#endif

 /* work to do on interrupt/exception return */
 #define _TIF_WORK_MASK \
diff -urNp linux-2.6.17.2/init/Kconfig linux-2.6.17.2-ts/init/Kconfig
--- linux-2.6.17.2/init/Kconfig	2006-06-30 08:17:23.000000000 +0800
+++ linux-2.6.17.2-ts/init/Kconfig	2006-09-24 11:27:50.000000000 +0800
@@ -374,6 +374,15 @@ config SLAB
 	  SLOB is more space efficient but does not scale well and is
 	  more susceptible to fragmentation.

+config TASK_STACK_STEAL
+	bool "Steal task stack during task sleeping" if EMBEDDED
+	default n
+	depends on !BASE_FULL && X86 && 4KSTACKS
+	help
+	  When tasks are sleeping, the stack of task if of no use and can
+	  stealed for other usage. When tasks are waked up again, the task
+	  will be reclaim.
+
 endmenu		# General setup

 config TINY_SHMEM
diff -urNp linux-2.6.17.2/kernel/fork.c linux-2.6.17.2-ts/kernel/fork.c
--- linux-2.6.17.2/kernel/fork.c	2006-06-30 08:17:23.000000000 +0800
+++ linux-2.6.17.2-ts/kernel/fork.c	2006-09-24 11:05:53.000000000 +0800
@@ -153,7 +153,11 @@ void __init fork_init(unsigned long memp
 		init_task.signal->rlim[RLIMIT_NPROC];
 }

+#ifdef CONFIG_TASK_STACK_STEAL
+static struct task_struct *dup_task_struct(struct task_struct *orig, int pid)
+#else
 static struct task_struct *dup_task_struct(struct task_struct *orig)
+#endif
 {
 	struct task_struct *tsk;
 	struct thread_info *ti;
@@ -164,6 +168,9 @@ static struct task_struct *dup_task_stru
 	if (!tsk)
 		return NULL;

+#ifdef CONFIG_TASK_STACK_STEAL
+	tsk->pid = pid;
+#endif
 	ti = alloc_thread_info(tsk);
 	if (!ti) {
 		free_task_struct(tsk);
@@ -951,7 +958,11 @@ static task_t *copy_process(unsigned lon
 		goto fork_out;

 	retval = -ENOMEM;
+#ifdef CONFIG_TASK_STACK_STEAL
+	p = dup_task_struct(current, pid);
+#else
 	p = dup_task_struct(current);
+#endif
 	if (!p)
 		goto fork_out;

diff -urNp linux-2.6.17.2/kernel/pid.c linux-2.6.17.2-ts/kernel/pid.c
--- linux-2.6.17.2/kernel/pid.c	2006-06-30 08:17:23.000000000 +0800
+++ linux-2.6.17.2-ts/kernel/pid.c	2006-09-24 11:48:00.000000000 +0800
@@ -32,7 +32,13 @@ static struct hlist_head *pid_hash;
 static int pidhash_shift;
 static kmem_cache_t *pid_cachep;

+#ifndef CONFIG_TASK_STACK_STEAL
 int pid_max = PID_MAX_DEFAULT;
+#else
+#define TS_MAX_PID	(TS_AREA_SIZE / PAGE_SIZE)
+int pid_max = (TS_MAX_PID < PID_MAX_DEFAULT) ? TS_MAX_PID : PID_MAX_DEFAULT;
+#endif
+
 int last_pid;

 #define RESERVED_PIDS		300
diff -urNp linux-2.6.17.2/kernel/sched.c linux-2.6.17.2-ts/kernel/sched.c
--- linux-2.6.17.2/kernel/sched.c	2006-06-30 08:17:23.000000000 +0800
+++ linux-2.6.17.2-ts/kernel/sched.c	2006-09-24 11:12:03.000000000 +0800
@@ -1606,9 +1606,17 @@ task_t * context_switch(runqueue_t *rq,
 		rq->prev_mm = oldmm;
 	}

+#ifdef CONFIG_TASK_STACK_STEAL
+	task_stack_reclaim(next);
+#endif
+
 	/* Here we just switch the register state and the stack. */
 	switch_to(prev, next, prev);

+#ifdef CONFIG_TASK_STACK_STEAL
+	task_stack_steal(prev);
+#endif
+
 	return prev;
 }

diff -urNp linux-2.6.17.2/mm/vmalloc.c linux-2.6.17.2-ts/mm/vmalloc.c
--- linux-2.6.17.2/mm/vmalloc.c	2006-06-30 08:17:23.000000000 +0800
+++ linux-2.6.17.2-ts/mm/vmalloc.c	2006-09-24 11:00:35.000000000 +0800
@@ -630,3 +630,59 @@ finished:
 	read_unlock(&vmlist_lock);
 	return buf - buf_start;
 }
+
+#ifdef CONFIG_TASK_STACK_STEAL
+
+void task_stack_realize(unsigned long addr)
+{
+	struct page *pg;
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	pg = alloc_page(GFP_KERNEL);
+	pgd = pgd_offset_k(addr);
+	pud = pud_alloc(&init_mm, pgd, addr);
+	pmd = pmd_alloc(&init_mm, pud, addr);
+	pte = pte_alloc_kernel(pmd, addr);
+	set_pte_at(&init_mm, addr, pte, mk_pte(pg, PAGE_KERNEL));
+}
+
+void task_stack_virtualize(unsigned long addr)
+{
+	struct page *pg;
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	pgd = pgd_offset_k(addr);
+	pud = pud_alloc(&init_mm, pgd, addr);
+	pmd = pmd_alloc(&init_mm, pud, addr);
+	pte = pte_alloc_kernel(pmd, addr);
+	pg = pte_page(*pte);
+	pte_clear(&init_mm, addr, pte);
+	__free_page(pg);
+	flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
+}
+
+void *task_stack_alloc_page(struct task_struct *task)
+{
+	unsigned long addr;
+
+	addr = TS_AREA_START + task->pid * PAGE_SIZE;
+	task_stack_realize(addr);
+	//printk("alloc: %d - %d\n", task->pid, task->tgid);
+	return (void *)addr;
+}
+
+void task_stack_free_page(unsigned long addr)
+{
+	if (addr < TS_AREA_START || addr >= TS_AREA_START + TS_AREA_SIZE)
+		kfree((void *)addr);
+	else
+		task_stack_virtualize(addr);
+}
+
+#endif
-------------- next part --------------
A non-text attachment was scrubbed...
Name: task_stack_steal.diff
Type: text/x-patch
Size: 12164 bytes
Desc: not available
Url : http://www.selenic.com/pipermail/linux-tiny/attachments/20060924/66e91643/task_stack_steal-0001.bin


More information about the Linux-tiny mailing list