C++内存管理

现在的位置: 首页 > 综合 > 正文

2018年02月07日 ⁄ 综合 ⁄ 共 4854字 ⁄ 字号小中大 ⁄ 评论关闭

C++内存池初探

原创文章，转载请注明出处：http://blog.csdn.net/fastsort/article/details/12356369

///mymem.h 使用自己定义的内存管理需要包含这个头文件

#ifndef	__mymem_h__
#define	__mymem_h__

template	<class	T>
class	CachedObj
{
public:
	void	*operator new(std::size_t);
	void	operator  delete(void *, std::size_t);
	virtual	~CachedObj(){}

protected:
	T	* next;

private:
	static	void				AddToFreeList(T *);
	static	std::allocator<T>	allocMem;
	static	T *					freeStore;
	static	const	std::size_t	chunk;
};

//////////////////////////////////////////////////////
///函数实现和数据初始化
///模板中将头文件和实现文件分离在vs中有问题
//////////////////////////////////////////////////////

template	<class	T>
void	*CachedObj<T>::operator new(size_t sz)
{
	//cout<<"op new"<<endl;
	if(sz != sizeof(T))///在vs下这里没问题，在Linux下这里需要修改一下
		throw	std::runtime_error
				("CachedObj : wrong size object in op new. ");
	if(!freeStore)
	{
		T * a = allocMem.allocate(chunk);
		for(size_t i=0; i!= chunk; i++)
			AddToFreeList(&a[i]);
	}
	T	* p = freeStore;
	freeStore = freeStore->CachedObj<T>::next;
	return p;
}
template	<class T>
void	CachedObj<T>::operator delete(void *p, size_t sz)
{
	//cout<<"op delete"<<endl;
	if(p)	AddToFreeList(static_cast<T*>(p));
}
template	<class T>
void	CachedObj<T>::AddToFreeList(T * p)
{
	p->CachedObj<T>::next = freeStore;
	freeStore = p;
}

template	<class T>	std::allocator< T >	CachedObj<T>::allocMem ;
template	<class T>	T *			CachedObj<T>::freeStore = 0	;
template	<class T>	const	size_t		CachedObj<T>::chunk = 20 ;

#endif //#endif __mymem_h__

由于使用了模板，如果把头文件和实现代码分离，编译出错。

在网上也查询了一些解决办法，但是都有局限性，不同编译器的支持度也不一样，所以干脆实现代码也放在头文件里了。

//// main.cpp 测试用的主函数

#include	<iostream>
#include	<time.h>
#include	<fstream>

#include	"mymem.h"	///自己定义的内存管理基类

using	namespace	std;


ofstream	ofs("perfRec.txt",ios::app);

/***优化S类的内存管理 **/
class	S:	public	CachedObj<S>
{
public	:
	S()
	{
	}
	void	fun()
	{
		cout<<"fun is called!"<<endl;
	}
	~S()
	{
	}
};

/**不优化T的内存管理**/
class	T
{
public	:
	T()
	{
	}
	void	fun()
	{
		cout<<"fun is called!"<<endl;
	}
	~T()
	{
	}
};
/**记录测试结果的函数**/
void	rec(int	cnt, int sd, int td)
{
	ofs<<cnt<<"\t\t";			///count
	ofs<<sd<<"\t\t"<<td<<"\t\t";//s , d
	if(sd)
		ofs<< ((int)(1.0*td/sd*10)/10.0) <<endl;///加速比,保留一位小数
	else
		ofs<< "-" <<endl;
	ofs.flush();
}

/**申请完立即释放测试**/
void	testA(int n=1024000)
{
	cout<< __FUNCTION__ <<" count = "<< n <<endl;
	//int		CTest = n;
	clock_t	start,end;

	start = clock();
	for(int i=0; i<n; i++)
	{
		S	*ps = new	S();
		delete	ps;
	}
	end = clock();
	int	sd = end-start;
	cout<<"S duratuon : "<< sd <<"ms"<<endl;
	
	start = clock();
	for(int i=0; i<n; i++)
	{
		T	*ps = new	T();
		delete	ps;
	}
	end = clock();
	int td = end-start;
	cout<<"T duration : "<< td <<"ms"<<endl;
	rec(n,sd,td);
}

/**申请完不立即释放测试***/
void	testB(int n=1024000)
{
	cout<< __FUNCTION__ <<" count = "<< n <<endl;
	S	** psa = new S*[n] ;
	clock_t	start,end;

	start = clock();
	for(int i=0;  i<n; i++)
	{
		psa[i] = new S();
	}
	for(int i=n-1;  i>=0; i--)
	{
		delete psa[i];
	}
	end  = clock();
	int	sd = end-start;
	cout<<"S duratuon : "<< sd <<"ms"<<endl;

	T	** pta = new T*[n];
	start = clock();
	for(int i=0;  i<n; i++)
	{
		pta[i] = new T();
	}
	for(int i=n-1;  i>=0; i--)
	{
		delete pta[i];
	}
	end  = clock();
	int td = end-start;
	cout<<"T duration : "<< td <<"ms"<<endl;
	rec(n,sd,td);
	delete	[]psa;
	delete	[]pta;
}

void	fa(int n)
{
	int i,s;
	ofs<<"===================Table a==================="<<endl;
	ofs<<"!!! release after per alloc at once !!!"<<endl;
	ofs<<"count"<<"\t\t"<<"s duration(ms)"<<"\t\t"<<"t duration(ms)"<<endl;
	
	for(i=0,s=1000; i<n; i++)
	{
		s *= 10;
		testA(s);
	}
	//testA(s*5);
}
void	fb(int n)
{
	int i,s;
	ofs<<"===================Table B==================="<<endl;
	ofs<<"!!! release after alloc all elements !!!"<<endl;
	ofs<<"count"<<"\t\t"<<"s duration(ms)"<<"\t\t"<<"t duration(ms)"<<endl;
	
	for(i=0,s=1000; i<n; i++)
	{
		s *= 10;
		testB(s);
	}
	//testB(s*5);
}
int	main()
{
	ofs<<"========performance test in win7(x64)========"<<endl;
	int n=4;
	int cnt=10;
	while(cnt--)
	{
		fa(n);
		fb(n);
	}
	ofs<<"======test end=========\n\n\n\n\n\n\n"<<endl;

	return 0;
}

如果要对某个类进行手动内存管理（或者说用我们自己定义的内存池），只需要让该类继承CachedObj类即可。

需要注意的是，CachedObj是一个模板，继承应该这样写：

class 	S:	public	CachedObj<S>
{
...
};

为了对比，同时定义了另外一个类T，使用系统的内存管理策略。

测试结果：

（第一列为申请/释放内存次数，第二列为S的结果，第三列为T的结果，最后一列是T/S的值）

在win7(x64)下：

===================Table a===================
!!! release after per alloc at once !!!
count		s duration(ms)		t duration(ms)
10000		0		1		-
100000		1		10		10
1000000		8		96		12
10000000		71		977		13.7
===================Table B===================
!!! release after alloc all elements !!!
count		s duration(ms)		t duration(ms)
10000		0		1		-
100000		2		12		6
1000000		12		115		9.5
10000000		123		1185		9.6

效果十分明显，效率提升约十倍！

但是代码放到Linux下，就不是那么明显了：

========performance test in ubuntu ========
===================Table a===================
!!! release after per alloc at once !!!
count		s duration(m)		t duration(ms)
10000		0		0		-
100000		10000		10000		1
1000000		50000		60000		1.2
10000000		530000		560000		1
50000000		2640000		2860000		1
===================Table B===================
!!! release after alloc all elements !!!
count		s duration(m)		t duration(ms)
10000		0		0		-
100000		10000		10000		1
1000000		70000		80000		1.1
10000000		670000		830000		1.2
50000000		3250000		4130000		1.2

不得不说，Linux比win的内存管理貌似牛x很多。

继续测试，将chunk修改为每次增大一倍，直到不能增大，但是效果还是不明显，仍然在1附近。

后来无意中想起优化选项，

于是编译时添加-O0和-O3，对比其结果有什么不同。

开始很奇怪的是，-O3不仅没有提高效率，反而降低了效率。

但是后来分析发现，只是加速比增加的很明显，而实际上，绝对速度都比未优化的要快：