[sldev] Optimization target: Avatar skinning, LLViewerJointMesh, matrix multiply

James Cook james at lindenlab.com
Sat Apr 14 08:17:14 PDT 2007


Recently I was playing with Shark, the Mac profiling tool.  I noticed
that Second Life has a hot spot in several related functions:
LLViewerJointMesh::updateGeometry(), operator*(LLVector3, LLMatrix4) and
operator*(LLVector3, LLMatrix3).

On the Mac we default avatar vertex programs off (I think for
compatibility with some graphics card/driver combinations).  This means
that "skinning" of the avatar mesh has to be done on the CPU instead of
the GPU.  I think skinning refers to blending the vertices of the mesh
around the joints so you don't see gaps.  I'm not a graphics guy.

Regardless, we do about 1000 vector/matrix multiplies per avatar per
update.  I decided to try to speed it up.  Below I've attached two
replacement functions that cut 0.5 to 1 ms off the frame time per
rendered avatar on the Mac.  I believe it speeds up any platform that
has avatar vertex programs off.

My next step is to try to speed up this math using SSE instructions.
References I found online suggest SSE can double the speed of Vector3
Matrix4 multiplies.  If anyone here has experience with SSE, I could
sure use some advice.

I think the matrix lerp() math can also be done with SSE.

James

(Suggestion to hackers: Move the operator*(LLVector3, LLMatrix4)
function to the .cpp file.  It isn't inlined anyway and you don't want
to have to recompile everything each time you change it.)

LLVector3 operator*(const LLVector3 &a, const LLMatrix4 &b)
{
	// This is better than making a temporary LLVector3.  This eliminates an
	// unnecessary LLVector3() constructor and also helps the compiler to
	// realize that the output floats do not alias the input floats, hence
	// eliminating redundant loads of a.mV[0], etc.  JC
	return LLVector3(a.mV[VX] * b.mMatrix[VX][VX] +
					 a.mV[VY] * b.mMatrix[VY][VX] +
					 a.mV[VZ] * b.mMatrix[VZ][VX] +
					 b.mMatrix[VW][VX],
					
					 a.mV[VX] * b.mMatrix[VX][VY] +
					 a.mV[VY] * b.mMatrix[VY][VY] +
					 a.mV[VZ] * b.mMatrix[VZ][VY] +
					 b.mMatrix[VW][VY],
					
					 a.mV[VX] * b.mMatrix[VX][VZ] +
					 a.mV[VY] * b.mMatrix[VY][VZ] +
					 a.mV[VZ] * b.mMatrix[VZ][VZ] +
					 b.mMatrix[VW][VZ]);
}

LLVector4 operator*(const LLVector4 &a, const LLMatrix4 &b)
{
	// Operate "to the left" on row-vector a
	return LLVector4(a.mV[VX] * b.mMatrix[VX][VX] +
					 a.mV[VY] * b.mMatrix[VY][VX] +
					 a.mV[VZ] * b.mMatrix[VZ][VX] +
					 a.mV[VW] * b.mMatrix[VW][VX],

					 a.mV[VX] * b.mMatrix[VX][VY] +
					 a.mV[VY] * b.mMatrix[VY][VY] +
					 a.mV[VZ] * b.mMatrix[VZ][VY] +
					 a.mV[VW] * b.mMatrix[VW][VY],

					 a.mV[VX] * b.mMatrix[VX][VZ] +
					 a.mV[VY] * b.mMatrix[VY][VZ] +
					 a.mV[VZ] * b.mMatrix[VZ][VZ] +
					 a.mV[VW] * b.mMatrix[VW][VZ],

					 a.mV[VX] * b.mMatrix[VX][VW] +
					 a.mV[VY] * b.mMatrix[VY][VW] +
					 a.mV[VZ] * b.mMatrix[VZ][VW] +
					 a.mV[VW] * b.mMatrix[VW][VW]);
}

void LLViewerJointMesh::updateGeometry()
{
	if (!(mValid
		  && mMesh
		  && mFace
		  && mMesh->hasWeights()
		  && mFace->mVertexBuffer.notNull()
		  && LLShaderMgr::getVertexShaderLevel(LLShaderMgr::SHADER_AVATAR) == 0))
	{
		return;
	}
	
	uploadJointMatrices();

	LLStrider<LLVector3> o_vertices;
	LLStrider<LLVector3> o_normals;

	//get vertex and normal striders
	LLVertexBuffer *buffer = mFace->mVertexBuffer;
	buffer->getVertexStrider(o_vertices,  0);
	buffer->getNormalStrider(o_normals,   0);

	F32 last_weight = F32_MAX;
	LLMatrix4 gBlendMat;
	LLMatrix3 gBlendRotMat;

	const F32* weights = mMesh->getWeights();
	const LLVector3* coords = mMesh->getCoords();
	const LLVector3* normals = mMesh->getNormals();
	for (U32 index = 0; index < mMesh->getNumVertices(); index++)
	{
		U32 bidx = index + mMesh->mFaceVertexOffset;
		
		// blend by first matrix
		F32 w = weights[index];
		
		// Maybe we don't have to change gBlendMat.
		// Profiles of a single-avatar scene on a Mac show this to be a very
		// common case.  JC
		if (w == last_weight)
		{
			o_vertices[bidx] = coords[index] * gBlendMat;
			o_normals[bidx] = normals[index] * gBlendRotMat;
			continue;
		}
		
		last_weight = w;

		S32 joint = llfloor(w);
		w -= joint;
		
		// No lerp required in this case.
		if (w == 1.0f)
		{
			gBlendMat = gJointMat[joint+1];
			o_vertices[bidx] = coords[index] * gBlendMat;
			gBlendRotMat = gJointRot[joint+1];
			o_normals[bidx] = normals[index] * gBlendRotMat;
			continue;
		}
		
		// Try to keep all the accesses to the matrix data as close
		// together as possible.  This function is a hot spot on the
		// Mac. JC
		LLMatrix4 &m0 = gJointMat[joint+1];
		LLMatrix4 &m1 = gJointMat[joint+0];
		
		gBlendMat.mMatrix[VX][VX] = lerp(m1.mMatrix[VX][VX],
m0.mMatrix[VX][VX], w);
		gBlendMat.mMatrix[VX][VY] = lerp(m1.mMatrix[VX][VY],
m0.mMatrix[VX][VY], w);
		gBlendMat.mMatrix[VX][VZ] = lerp(m1.mMatrix[VX][VZ],
m0.mMatrix[VX][VZ], w);

		gBlendMat.mMatrix[VY][VX] = lerp(m1.mMatrix[VY][VX],
m0.mMatrix[VY][VX], w);
		gBlendMat.mMatrix[VY][VY] = lerp(m1.mMatrix[VY][VY],
m0.mMatrix[VY][VY], w);
		gBlendMat.mMatrix[VY][VZ] = lerp(m1.mMatrix[VY][VZ],
m0.mMatrix[VY][VZ], w);

		gBlendMat.mMatrix[VZ][VX] = lerp(m1.mMatrix[VZ][VX],
m0.mMatrix[VZ][VX], w);
		gBlendMat.mMatrix[VZ][VY] = lerp(m1.mMatrix[VZ][VY],
m0.mMatrix[VZ][VY], w);
		gBlendMat.mMatrix[VZ][VZ] = lerp(m1.mMatrix[VZ][VZ],
m0.mMatrix[VZ][VZ], w);

		gBlendMat.mMatrix[VW][VX] = lerp(m1.mMatrix[VW][VX],
m0.mMatrix[VW][VX], w);
		gBlendMat.mMatrix[VW][VY] = lerp(m1.mMatrix[VW][VY],
m0.mMatrix[VW][VY], w);
		gBlendMat.mMatrix[VW][VZ] = lerp(m1.mMatrix[VW][VZ],
m0.mMatrix[VW][VZ], w);

		o_vertices[bidx] = coords[index] * gBlendMat;
		
		LLMatrix3 &n0 = gJointRot[joint+1];
		LLMatrix3 &n1 = gJointRot[joint+0];
		
		gBlendRotMat.mMatrix[VX][VX] = lerp(n1.mMatrix[VX][VX],
n0.mMatrix[VX][VX], w);
		gBlendRotMat.mMatrix[VX][VY] = lerp(n1.mMatrix[VX][VY],
n0.mMatrix[VX][VY], w);
		gBlendRotMat.mMatrix[VX][VZ] = lerp(n1.mMatrix[VX][VZ],
n0.mMatrix[VX][VZ], w);

		gBlendRotMat.mMatrix[VY][VX] = lerp(n1.mMatrix[VY][VX],
n0.mMatrix[VY][VX], w);
		gBlendRotMat.mMatrix[VY][VY] = lerp(n1.mMatrix[VY][VY],
n0.mMatrix[VY][VY], w);
		gBlendRotMat.mMatrix[VY][VZ] = lerp(n1.mMatrix[VY][VZ],
n0.mMatrix[VY][VZ], w);

		gBlendRotMat.mMatrix[VZ][VX] = lerp(n1.mMatrix[VZ][VX],
n0.mMatrix[VZ][VX], w);
		gBlendRotMat.mMatrix[VZ][VY] = lerp(n1.mMatrix[VZ][VY],
n0.mMatrix[VZ][VY], w);
		gBlendRotMat.mMatrix[VZ][VZ] = lerp(n1.mMatrix[VZ][VZ],
n0.mMatrix[VZ][VZ], w);
		
		o_normals[bidx] = normals[index] * gBlendRotMat;
	}
}

-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 249 bytes
Desc: OpenPGP digital signature
Url : http://lists.secondlife.com/pipermail/sldev/attachments/20070414/0b71803b/signature.pgp


More information about the SLDev mailing list