1.综述
用一段话来描述slab。就是讲内存按照对象为单位进行组织,一定数目的对象聚集在一起称为一个slab。系统通过操作slab来进行内存的分配。每个CPU上的per-CPU结构中,保存了空闲的对象指针,通过这些指针,系统可以快速有效的进行对象分配。
上面的图来自PLK中, 很明显,kmem_cache控制了内存节点的slab内存分配。结构中有指向per-CPU缓存的arraycache指针,也有指向slab链表的三个指针。
2.着色原理
所谓的着色,目的只是为了让slab中相同偏移量的对象进入CPUcache的时候,尽量不会出现处在同一cache行中
3.kmem_cache的操作
cache创建
kmem_cache_create.
创建kmem_cache的数据结构,会调用kmem_cache_alloc来创建slab
slab创建
kmem_cache_alloc调用_cache_alloc,最终调用__cache_alloc。__cache_alloc会查看array_cache中是否有空闲的对象,如果有则直接分配,如果没有,就需要重新填写array_cache。
slab销毁
kmem_cache_free
4.kmalloc过程
简单地说,先查找合适的内存对象,在通过__cache_alloc进行对象分配。这里需要分成两种情况,一种情况,当申请的内存大小在编译时确定的话,则直接记进行了编译优化
static __always_inline void *kmalloc(size_t size, gfp_t flags)
{
struct kmem_cache *cachep;
void *ret;
if (__builtin_constant_p(size)) {
int i = 0;
if (!size)
return ZERO_SIZE_PTR;
#define CACHE(x) \
if (size <= x) \
goto found; \
else \
i++;
#include
#undef CACHE
return NULL;
found:
#ifdef CONFIG_ZONE_DMA
if (flags & GFP_DMA)
cachep = malloc_sizes[i].cs_dmacachep;
else
#endif
cachep = malloc_sizes[i].cs_cachep;
ret = kmem_cache_alloc_notrace(cachep, flags);
trace_kmalloc(_THIS_IP_, ret,
size, slab_buffer_size(cachep), flags);
return ret;
}
return __kmalloc(size, flags);
}
否则,直接调用__kmalloc进行查找符合大小的cache。
5, vmalloc过程
vmalloc负责申请非连续区域的内存。需要知道的是,vmalloc和Kmalloc都是内核中申请内存的函数,而malloc则是属于用户态的内存申请,由glibc通过系统调用brk,mmap实现。
static void *__vmalloc_node(unsigned long size, unsigned long align,
gfp_t gfp_mask, pgprot_t prot,
int node, void *caller)
{
struct vm_struct *area;
void *addr;
unsigned long real_size = size;
size = PAGE_ALIGN(size);
if (!size || (size >> PAGE_SHIFT) > totalram_pages)
return NULL;
area = __get_vm_area_node(size, align, VM_ALLOC, VMALLOC_START,
VMALLOC_END, node, gfp_mask, caller);
if (!area)
return NULL;
addr = __vmalloc_area_node(area, gfp_mask, prot, node, caller);
/*
* A ref_count = 3 is needed because the vm_struct and vmap_area
* structures allocated in the __get_vm_area_node() function contain
* references to the virtual address of the vmalloc'ed block.
*/
kmemleak_alloc(addr, real_size, 3, gfp_mask);
return addr;
vmalloc最终会调用到__vmalloc_node,之后则是先通过__get_vm_area_node获取一段虚拟地址区域,然后通过__vmalloc_area_node映射到具体的地址空间上。
static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
pgprot_t prot, int node, void *caller)
{
struct page **pages;
unsigned int nr_pages, array_size, i;
nr_pages = (area->size - PAGE_SIZE) >> PAGE_SHIFT;
array_size = (nr_pages * sizeof(struct page *));
area->nr_pages = nr_pages;
/* Please note that the recursion is strictly bounded. */
if (array_size > PAGE_SIZE) {
pages = __vmalloc_node(array_size, 1, gfp_mask | __GFP_ZERO,
PAGE_KERNEL, node, caller);
area->flags |= VM_VPAGES;
} else {
pages = kmalloc_node(array_size,
(gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO,
node);
}
area->pages = pages;
area->caller = caller;
if (!area->pages) {
remove_vm_area(area->addr);
kfree(area);
return NULL;
}
for (i = 0; i < area->nr_pages; i++) {
struct page *page;
if (node < 0)
page = alloc_page(gfp_mask);
else
page = alloc_pages_node(node, gfp_mask, 0);
if (unlikely(!page)) {
/* Successfully allocated i pages, free them in __vunmap() */
area->nr_pages = i;
goto fail;
}
area->pages[i] = page;
}
if (map_vm_area(area, prot, &pages))
goto fail;
return area->addr;
fail:
vfree(area->addr);
return NULL;
}
该函数首先通过kmalloc申请page管理数据结构的内存,然后通过alloc_page逐个申请内存。
slab分配器 - 实例用法1.定义一个 kmem_cache 对象,然后对其进行初始化
static struct kmem_cache *sample_cachep;
static void init_sample_cachep( void )
{
sample_cachep = kmem_cache_create(
"sample_cachep", /* Name */
32, /* Object Size */
0, /* Alignment */
SLAB_HWCACHE_ALIGN, /* Flags */
NULL); /* Constructor */
return;
}
这里是采用的通用slab缓存方式实现,这个特定的缓存包含 32 字节的对象,并且是硬件缓存对齐的(由标志参数 SLAB_HWCACHE_ALIGN 定义)。
如果采用专用slab缓存,那么必须要知道建立缓存对象的大小,比如,我们需要为sample_struct结构体建立一个专用的slab缓存,那么其调用格式如下:
static struct kmem_cache *sample_struct_cachep;
static void init_sample_struct_cache( void )
{
sample_struct_cachep = kmem_cache_create(
"sample_struct_cachep", /* Name */
sizeof(struct sample_struct), /* Object Size */
0, /* Alignment */
SLAB_HWCACHE_ALIGN, /* Flags */
NULL); /* Constructor */
return;
}
接下来以专用slab缓存为实例
2.使用所分配的 slab 缓存对象,或释放所分配的 slab 缓存对象
int slab_test( void )
{
struct sample_struct *object;
printk( "Cache name is %s/n", kmem_cache_name( sample_struct_cachep ) );
printk( "Cache object size is %d/n", kmem_cache_size( sample_struct_cachep ) );
object = kmem_cache_alloc(sample_struct_cachep, GFP_KERNEL);
if (object) {
kmem_cache_free(sample_struct_cachep, object);
}
return 0;
}
3.slab 缓存的销毁。调用者必须确保在执行销毁操作过程中,不要从缓存中分配对象。
static void remove_sample_struct_cache( void )
{
if (sample_struct_cachep)
kmem_cache_destroy( sample_struct_cachep );
return;
}
4.所在slab缓存使用过程中,可以通过slabtop查看。
5.实例
/**********************************************
* Author: [email]lewiyon@hotmail.com[/email]
* File name: slabmod.c
* Description: slab缓存使用实例
* Date: 2012-07-26
*********************************************/
#include
#include
#include
#include
//#include
#include
MODULE_LICENSE("GPL");
#define first 1000 /* 第一次尝试分配1000个对象 */
struct sample_struct {
int id;
char name[20];
char address[50];
};
static struct kmem_cache *sample_struct_cachep;
static struct sample_struct *sample1[first];
static int sample_mod_init(void)
{
int i;
sample_struct_cachep = kmem_cache_create(
"sample_struct_cachep", /* Name */
sizeof(struct sample_struct), /* Object Size */
0, /* Alignment */
SLAB_HWCACHE_ALIGN, /* Flags */
NULL); /* Constructor */
/* 确保创建成功:有可能失败 */
if (NULL == sample_struct_cachep)
return 1;
printk(KERN_INFO "Cache name is %s\n",
kmem_cache_name(sample_struct_cachep));
/* 首次分配 */
for (i = 0; i < first; i++)
{
sample1[i] = kmem_cache_alloc(sample_struct_cachep, GFP_KERNEL);
if (NULL == sample1[i])
{
int ii;
printk("First alloc ERR: %d/n", i);
for (ii = 0; ii < i; ii++)
{
kmem_cache_free(sample_struct_cachep, sample1[ii]);
sample1[ii] = NULL;
}
}
}
return 0;
}
static void sample_mod_exit(void)
{
int i;
if (sample1[0])
{
for (i = 0; i < first; i++)
{
kmem_cache_free(sample_struct_cachep, sample1[i]);
sample1[i] = NULL;
}
}
if (sample_struct_cachep)
{
kmem_cache_destroy( sample_struct_cachep );
printk(KERN_INFO "Destroy sample_struct_cachep!\n");
}
return ;
}
module_init(sample_mod_init);
module_exit(sample_mod_exit);
MODULE_AUTHOR("lewiyon@hotmail.com");
MODULE_DESCRIPTION("A Simple slab sample");
插入模块、删除模块前后slabtop观察结果
[root@RedHat ~]# slabtop -o | egrep "OBJS|sample_struct_cachep"
OBJS ACTIVE USE OBJ SIZE SLABS OBJ/SLAB CACHE SIZE NAME
[root@RedHat ~]# slabtop -o | egrep "OBJS|sample_struct_cachep"
OBJS ACTIVE USE OBJ SIZE SLABS OBJ/SLAB CACHE SIZE NAME
1020 1020 100% 0.12K 34 30 136K sample_struct_cachep
[root@RedHat ~]# slabtop -o | egrep "OBJS|sample_struct_cachep"
OBJS ACTIVE USE OBJ SIZE SLABS OBJ/SLAB CACHE SIZE NAME
1020 1010 99% 0.12K 34 30 136K sample_struct_cachep
[root@RedHat ~]# slabtop -o | egrep "OBJS|sample_struct_cachep"
OBJS ACTIVE USE OBJ SIZE SLABS OBJ/SLAB CACHE SIZE NAME
1020 1002 98% 0.12K 34 30 136K sample_struct_cachep
[root@RedHat ~]# slabtop -o | egrep "OBJS|sample_struct_cachep"
OBJS ACTIVE USE OBJ SIZE SLABS OBJ/SLAB CACHE SIZE NAME
1020 1001 98% 0.12K 34 30 136K sample_struct_cachep
[root@RedHat ~]# slabtop -o | egrep "OBJS|sample_struct_cachep"
OBJS ACTIVE USE OBJ SIZE SLABS OBJ/SLAB CACHE SIZE NAME
1020 1000 98% 0.12K 34 30 136K sample_struct_cachep
[root@RedHat ~]# slabtop -o | egrep "OBJS|sample_struct_cachep"
OBJS ACTIVE USE OBJ SIZE SLABS OBJ/SLAB CACHE SIZE NAME
1020 1000 98% 0.12K 34 30 136K sample_struct_cachep
[root@RedHat ~]# slabtop -o | egrep "OBJS|sample_struct_cachep"
OBJS ACTIVE USE OBJ SIZE SLABS OBJ/SLAB CACHE SIZE NAME
1020 1000 98% 0.12K 34 30 136K sample_struct_cachep
[root@RedHat ~]# slabtop -o | egrep "OBJS|sample_struct_cachep"
OBJS ACTIVE USE OBJ SIZE SLABS OBJ/SLAB CACHE SIZE NAME
1020 1000 98% 0.12K 34 30 136K sample_struct_cachep
[root@RedHat ~]# slabtop -o | egrep "OBJS|sample_struct_cachep"
OBJS ACTIVE USE OBJ SIZE SLABS OBJ/SLAB CACHE SIZE NAME
1020 1000 98% 0.12K 34 30 136K sample_struct_cachep
[root@RedHat ~]# slabtop -o | egrep "OBJS|sample_struct_cachep"
OBJS ACTIVE USE OBJ SIZE SLABS OBJ/SLAB CACHE SIZE NAME
[root@RedHat ~]# slabtop -o | egrep "OBJS|sample_struct_cachep"
OBJS ACTIVE USE OBJ SIZE SLABS OBJ/SLAB CACHE SIZE NAME
[root@RedHat ~]#
分析:
a. 再没有插入模块时,没有数据
b. 插入模块时,新建slab,这是数据都是active的,此时对象数目为OBJS = SLABS * OBJ/SLAB = 34*30 = 1020;
c. 随着模块稳定下来, 未使用的对象变为inactive,那么active数目就变味了1000;(与程序相对应)
d. 删除模块后,数据消失