[PATCH] steal the task stack during task sleeping
huang ying
huang.ying.caritas at gmail.com
Sun Sep 24 01:52:12 CDT 2006
Hi all,
This is a patch to steal the task stack during task sleeping. The scheme
is as follow:
1. The virtual address area of task stacks is moved to area before area
used by vmalloc.
2. The task stack allocation is reimplemented as allocate a page and map the
page into task stack virtual address area.
3. When task is scheduled out, if the stack space used is fairly small
(less than 1/8 PAGE_SIZE), the contents of stack will be copied into a
new allocated small memory, and the stack itself will be freeed.
4. When task is scheduled in or page fault occurs for address in stack,
a new page will be allocated and the original contents will be restored.
This patch is for x86 only now. I have tested it on my notebook, and it runs
fine with some performance penalty.
Hope it useful for someone.
Any comment is welcome.
Best Regards,
Huang Ying
diffstat:
arch/i386/kernel/process.c | 40 +++++++++++++++++++++++++++++
arch/i386/mm/fault.c | 11 ++++++++
arch/i386/mm/init.c | 4 ++
include/asm-i386/pgtable.h | 5 +++
include/asm-i386/thread_info.h | 49 +++++++++++++++++++++++++++++++++++
init/Kconfig | 9 ++++++
kernel/fork.c | 11 ++++++++
kernel/pid.c | 6 ++++
kernel/sched.c | 8 +++++
mm/vmalloc.c | 56 +++++++++++++++++++++++++++++++++++++++++
10 files changed, 199 insertions(+)
diff -urNp linux-2.6.17.2/arch/i386/kernel/process.c
linux-2.6.17.2-ts/arch/i386/kernel/process.c
--- linux-2.6.17.2/arch/i386/kernel/process.c 2006-06-30
08:17:23.000000000 +0800
+++ linux-2.6.17.2-ts/arch/i386/kernel/process.c 2006-09-24
10:59:55.000000000 +0800
@@ -902,3 +902,43 @@ unsigned long arch_align_stack(unsigned
sp -= get_random_int() % 8192;
return sp & ~0xf;
}
+
+#ifdef CONFIG_TASK_STACK_STEAL
+
+void task_stack_realize(unsigned long addr);
+void task_stack_virtualize(unsigned long addr);
+
+void _task_stack_reclaim(task_t *task)
+{
+ unsigned long addr;
+ int stack_depth;
+
+ addr = task->thread.esp & PAGE_MASK;
+ stack_depth = PAGE_SIZE - (task->thread.esp & ~PAGE_MASK);
+ task->thread_info->flags &= ~_TIF_TS_VIRTUAL;
+ task_stack_realize(addr);
+ memcpy((void *)addr, (void *)task->thread_info,
+ sizeof(struct thread_info));
+ memcpy((void *)task->thread.esp,
+ (char *)(task->thread_info + 1), stack_depth);
+ kfree(task->thread_info);
+ task->thread_info = (struct thread_info *)addr;
+}
+
+void _task_stack_steal(task_t *task)
+{
+ struct thread_info *ti;
+ int stack_depth;
+
+ stack_depth = PAGE_SIZE - (task->thread.esp & ~PAGE_MASK);
+ ti = kmalloc(stack_depth + sizeof(struct thread_info), GFP_KERNEL);
+ memcpy((void *)ti, task->thread_info,
+ sizeof(struct thread_info));
+ memcpy((char *)(ti + 1),
+ (void *)(task->thread.esp), stack_depth);
+ task_stack_virtualize((unsigned long)task->thread_info);
+ task->thread_info = ti;
+ task->thread_info->flags |= _TIF_TS_VIRTUAL;
+}
+
+#endif
diff -urNp linux-2.6.17.2/arch/i386/mm/fault.c
linux-2.6.17.2-ts/arch/i386/mm/fault.c
--- linux-2.6.17.2/arch/i386/mm/fault.c 2006-06-30 08:17:23.000000000 +0800
+++ linux-2.6.17.2-ts/arch/i386/mm/fault.c 2006-09-24 10:52:09.000000000 +0800
@@ -321,6 +321,17 @@ fastcall void __kprobes do_page_fault(st
if (unlikely(address >= TASK_SIZE)) {
if (!(error_code & 0x0000000d) && vmalloc_fault(address) >= 0)
return;
+#ifdef CONFIG_TASK_STACK_STEAL
+ if (address >= TS_AREA_START && \
+ address < TS_AREA_START + TS_AREA_SIZE) {
+ int pid;
+ struct task_struct *task;
+ pid = (address - TS_AREA_START) / PAGE_SIZE;
+ task = find_task_by_pid(pid);
+ task_stack_reclaim(task);
+ return;
+ }
+#endif
if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
SIGSEGV) == NOTIFY_STOP)
return;
diff -urNp linux-2.6.17.2/arch/i386/mm/init.c
linux-2.6.17.2-ts/arch/i386/mm/init.c
--- linux-2.6.17.2/arch/i386/mm/init.c 2006-06-30 08:17:23.000000000 +0800
+++ linux-2.6.17.2-ts/arch/i386/mm/init.c 2006-09-24 11:01:06.000000000 +0800
@@ -597,6 +597,10 @@ void __init mem_init(void)
high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
#endif
+#ifdef CONFIG_TASK_STACK_STEAL
+ vmalloc_earlyreserve = 2 * TS_AREA_SIZE;
+#endif
+
/* this will put all low memory onto the freelists */
totalram_pages += free_all_bootmem();
diff -urNp linux-2.6.17.2/include/asm-i386/pgtable.h
linux-2.6.17.2-ts/include/asm-i386/pgtable.h
--- linux-2.6.17.2/include/asm-i386/pgtable.h 2006-06-30
08:17:23.000000000 +0800
+++ linux-2.6.17.2-ts/include/asm-i386/pgtable.h 2006-09-24
11:01:41.000000000 +0800
@@ -88,6 +88,11 @@ void paging_init(void);
# define VMALLOC_END (FIXADDR_START-2*PAGE_SIZE)
#endif
+#ifdef CONFIG_TASK_STACK_STEAL
+#define TS_AREA_SIZE (PMD_SIZE)
+#define TS_AREA_START ((VMALLOC_START - TS_AREA_SIZE) & ~(TS_AREA_SIZE - 1))
+#endif
+
/*
* _PAGE_PSE set in the page directory entry just means that
* the page directory entry points directly to a 4MB-aligned block of
diff -urNp linux-2.6.17.2/include/asm-i386/thread_info.h
linux-2.6.17.2-ts/include/asm-i386/thread_info.h
--- linux-2.6.17.2/include/asm-i386/thread_info.h 2006-06-30
08:17:23.000000000 +0800
+++ linux-2.6.17.2-ts/include/asm-i386/thread_info.h 2006-09-24
11:28:18.000000000 +0800
@@ -95,6 +95,47 @@ static inline struct thread_info *curren
/* how to get the current stack pointer from C */
register unsigned long current_stack_pointer asm("esp") __attribute_used__;
+#ifdef CONFIG_TASK_STACK_STEAL
+
+void *task_stack_alloc_page(struct task_struct *task);
+void task_stack_free_page(unsigned long addr);
+
+/* thread information allocation */
+#ifdef CONFIG_DEBUG_STACK_USAGE
+#define alloc_thread_info(tsk) \
+ ({ \
+ struct thread_info *ret; \
+ \
+ ret = task_stack_alloc_page(tsk); \
+ if (ret) \
+ memset(ret, 0, PAGE_SIZE); \
+ ret; \
+ })
+#else
+#define alloc_thread_info(tsk) task_stack_alloc_page(tsk)
+#endif
+
+#define free_thread_info(info) task_stack_free_page((unsigned long)info)
+
+void _task_stack_reclaim(struct task_struct *task);
+void _task_stack_steal(struct task_struct *task);
+
+#define task_stack_reclaim(tsk) \
+ do { \
+ if (tsk->thread_info->flags & _TIF_TS_VIRTUAL) \
+ _task_stack_reclaim(tsk); \
+ } while (0)
+
+#define task_stack_steal(tsk) \
+ do { \
+ if (PAGE_SIZE - (prev->thread.esp & ~PAGE_MASK) + \
+ sizeof(struct thread_info) < PAGE_SIZE / 8 && \
+ tsk->pid) \
+ _task_stack_steal(tsk); \
+ } while (0)
+
+#else /* CONFIG_TASK_STACK_STEAL */
+
/* thread information allocation */
#ifdef CONFIG_DEBUG_STACK_USAGE
#define alloc_thread_info(tsk) \
@@ -112,6 +153,8 @@ register unsigned long current_stack_poi
#define free_thread_info(info) kfree(info)
+#endif /* CONFIG_TASK_STACK_STEAL */
+
#else /* !__ASSEMBLY__ */
/* how to get the thread information struct from ASM */
@@ -143,6 +186,9 @@ register unsigned long current_stack_poi
#define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal() */
#define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling
TIF_NEED_RESCHED */
#define TIF_MEMDIE 17
+#ifdef CONFIG_TASK_STACK_STEAL
+#define TIF_TS_VIRTUAL 18
+#endif
#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
@@ -155,6 +201,9 @@ register unsigned long current_stack_poi
#define _TIF_SECCOMP (1<<TIF_SECCOMP)
#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK)
#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
+#ifdef CONFIG_TASK_STACK_STEAL
+#define _TIF_TS_VIRTUAL (1<<TIF_TS_VIRTUAL)
+#endif
/* work to do on interrupt/exception return */
#define _TIF_WORK_MASK \
diff -urNp linux-2.6.17.2/init/Kconfig linux-2.6.17.2-ts/init/Kconfig
--- linux-2.6.17.2/init/Kconfig 2006-06-30 08:17:23.000000000 +0800
+++ linux-2.6.17.2-ts/init/Kconfig 2006-09-24 11:27:50.000000000 +0800
@@ -374,6 +374,15 @@ config SLAB
SLOB is more space efficient but does not scale well and is
more susceptible to fragmentation.
+config TASK_STACK_STEAL
+ bool "Steal task stack during task sleeping" if EMBEDDED
+ default n
+ depends on !BASE_FULL && X86 && 4KSTACKS
+ help
+ When tasks are sleeping, the stack of task if of no use and can
+ stealed for other usage. When tasks are waked up again, the task
+ will be reclaim.
+
endmenu # General setup
config TINY_SHMEM
diff -urNp linux-2.6.17.2/kernel/fork.c linux-2.6.17.2-ts/kernel/fork.c
--- linux-2.6.17.2/kernel/fork.c 2006-06-30 08:17:23.000000000 +0800
+++ linux-2.6.17.2-ts/kernel/fork.c 2006-09-24 11:05:53.000000000 +0800
@@ -153,7 +153,11 @@ void __init fork_init(unsigned long memp
init_task.signal->rlim[RLIMIT_NPROC];
}
+#ifdef CONFIG_TASK_STACK_STEAL
+static struct task_struct *dup_task_struct(struct task_struct *orig, int pid)
+#else
static struct task_struct *dup_task_struct(struct task_struct *orig)
+#endif
{
struct task_struct *tsk;
struct thread_info *ti;
@@ -164,6 +168,9 @@ static struct task_struct *dup_task_stru
if (!tsk)
return NULL;
+#ifdef CONFIG_TASK_STACK_STEAL
+ tsk->pid = pid;
+#endif
ti = alloc_thread_info(tsk);
if (!ti) {
free_task_struct(tsk);
@@ -951,7 +958,11 @@ static task_t *copy_process(unsigned lon
goto fork_out;
retval = -ENOMEM;
+#ifdef CONFIG_TASK_STACK_STEAL
+ p = dup_task_struct(current, pid);
+#else
p = dup_task_struct(current);
+#endif
if (!p)
goto fork_out;
diff -urNp linux-2.6.17.2/kernel/pid.c linux-2.6.17.2-ts/kernel/pid.c
--- linux-2.6.17.2/kernel/pid.c 2006-06-30 08:17:23.000000000 +0800
+++ linux-2.6.17.2-ts/kernel/pid.c 2006-09-24 11:48:00.000000000 +0800
@@ -32,7 +32,13 @@ static struct hlist_head *pid_hash;
static int pidhash_shift;
static kmem_cache_t *pid_cachep;
+#ifndef CONFIG_TASK_STACK_STEAL
int pid_max = PID_MAX_DEFAULT;
+#else
+#define TS_MAX_PID (TS_AREA_SIZE / PAGE_SIZE)
+int pid_max = (TS_MAX_PID < PID_MAX_DEFAULT) ? TS_MAX_PID : PID_MAX_DEFAULT;
+#endif
+
int last_pid;
#define RESERVED_PIDS 300
diff -urNp linux-2.6.17.2/kernel/sched.c linux-2.6.17.2-ts/kernel/sched.c
--- linux-2.6.17.2/kernel/sched.c 2006-06-30 08:17:23.000000000 +0800
+++ linux-2.6.17.2-ts/kernel/sched.c 2006-09-24 11:12:03.000000000 +0800
@@ -1606,9 +1606,17 @@ task_t * context_switch(runqueue_t *rq,
rq->prev_mm = oldmm;
}
+#ifdef CONFIG_TASK_STACK_STEAL
+ task_stack_reclaim(next);
+#endif
+
/* Here we just switch the register state and the stack. */
switch_to(prev, next, prev);
+#ifdef CONFIG_TASK_STACK_STEAL
+ task_stack_steal(prev);
+#endif
+
return prev;
}
diff -urNp linux-2.6.17.2/mm/vmalloc.c linux-2.6.17.2-ts/mm/vmalloc.c
--- linux-2.6.17.2/mm/vmalloc.c 2006-06-30 08:17:23.000000000 +0800
+++ linux-2.6.17.2-ts/mm/vmalloc.c 2006-09-24 11:00:35.000000000 +0800
@@ -630,3 +630,59 @@ finished:
read_unlock(&vmlist_lock);
return buf - buf_start;
}
+
+#ifdef CONFIG_TASK_STACK_STEAL
+
+void task_stack_realize(unsigned long addr)
+{
+ struct page *pg;
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ pg = alloc_page(GFP_KERNEL);
+ pgd = pgd_offset_k(addr);
+ pud = pud_alloc(&init_mm, pgd, addr);
+ pmd = pmd_alloc(&init_mm, pud, addr);
+ pte = pte_alloc_kernel(pmd, addr);
+ set_pte_at(&init_mm, addr, pte, mk_pte(pg, PAGE_KERNEL));
+}
+
+void task_stack_virtualize(unsigned long addr)
+{
+ struct page *pg;
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ pgd = pgd_offset_k(addr);
+ pud = pud_alloc(&init_mm, pgd, addr);
+ pmd = pmd_alloc(&init_mm, pud, addr);
+ pte = pte_alloc_kernel(pmd, addr);
+ pg = pte_page(*pte);
+ pte_clear(&init_mm, addr, pte);
+ __free_page(pg);
+ flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
+}
+
+void *task_stack_alloc_page(struct task_struct *task)
+{
+ unsigned long addr;
+
+ addr = TS_AREA_START + task->pid * PAGE_SIZE;
+ task_stack_realize(addr);
+ //printk("alloc: %d - %d\n", task->pid, task->tgid);
+ return (void *)addr;
+}
+
+void task_stack_free_page(unsigned long addr)
+{
+ if (addr < TS_AREA_START || addr >= TS_AREA_START + TS_AREA_SIZE)
+ kfree((void *)addr);
+ else
+ task_stack_virtualize(addr);
+}
+
+#endif
-------------- next part --------------
A non-text attachment was scrubbed...
Name: task_stack_steal.diff
Type: text/x-patch
Size: 12164 bytes
Desc: not available
Url : http://www.selenic.com/pipermail/linux-tiny/attachments/20060924/66e91643/task_stack_steal-0001.bin
More information about the Linux-tiny
mailing list