[sldev] Optimization target: Avatar skinning, LLViewerJointMesh,
matrix multiply
James Cook
james at lindenlab.com
Sat Apr 14 08:17:14 PDT 2007
Recently I was playing with Shark, the Mac profiling tool. I noticed
that Second Life has a hot spot in several related functions:
LLViewerJointMesh::updateGeometry(), operator*(LLVector3, LLMatrix4) and
operator*(LLVector3, LLMatrix3).
On the Mac we default avatar vertex programs off (I think for
compatibility with some graphics card/driver combinations). This means
that "skinning" of the avatar mesh has to be done on the CPU instead of
the GPU. I think skinning refers to blending the vertices of the mesh
around the joints so you don't see gaps. I'm not a graphics guy.
Regardless, we do about 1000 vector/matrix multiplies per avatar per
update. I decided to try to speed it up. Below I've attached two
replacement functions that cut 0.5 to 1 ms off the frame time per
rendered avatar on the Mac. I believe it speeds up any platform that
has avatar vertex programs off.
My next step is to try to speed up this math using SSE instructions.
References I found online suggest SSE can double the speed of Vector3
Matrix4 multiplies. If anyone here has experience with SSE, I could
sure use some advice.
I think the matrix lerp() math can also be done with SSE.
James
(Suggestion to hackers: Move the operator*(LLVector3, LLMatrix4)
function to the .cpp file. It isn't inlined anyway and you don't want
to have to recompile everything each time you change it.)
LLVector3 operator*(const LLVector3 &a, const LLMatrix4 &b)
{
// This is better than making a temporary LLVector3. This eliminates an
// unnecessary LLVector3() constructor and also helps the compiler to
// realize that the output floats do not alias the input floats, hence
// eliminating redundant loads of a.mV[0], etc. JC
return LLVector3(a.mV[VX] * b.mMatrix[VX][VX] +
a.mV[VY] * b.mMatrix[VY][VX] +
a.mV[VZ] * b.mMatrix[VZ][VX] +
b.mMatrix[VW][VX],
a.mV[VX] * b.mMatrix[VX][VY] +
a.mV[VY] * b.mMatrix[VY][VY] +
a.mV[VZ] * b.mMatrix[VZ][VY] +
b.mMatrix[VW][VY],
a.mV[VX] * b.mMatrix[VX][VZ] +
a.mV[VY] * b.mMatrix[VY][VZ] +
a.mV[VZ] * b.mMatrix[VZ][VZ] +
b.mMatrix[VW][VZ]);
}
LLVector4 operator*(const LLVector4 &a, const LLMatrix4 &b)
{
// Operate "to the left" on row-vector a
return LLVector4(a.mV[VX] * b.mMatrix[VX][VX] +
a.mV[VY] * b.mMatrix[VY][VX] +
a.mV[VZ] * b.mMatrix[VZ][VX] +
a.mV[VW] * b.mMatrix[VW][VX],
a.mV[VX] * b.mMatrix[VX][VY] +
a.mV[VY] * b.mMatrix[VY][VY] +
a.mV[VZ] * b.mMatrix[VZ][VY] +
a.mV[VW] * b.mMatrix[VW][VY],
a.mV[VX] * b.mMatrix[VX][VZ] +
a.mV[VY] * b.mMatrix[VY][VZ] +
a.mV[VZ] * b.mMatrix[VZ][VZ] +
a.mV[VW] * b.mMatrix[VW][VZ],
a.mV[VX] * b.mMatrix[VX][VW] +
a.mV[VY] * b.mMatrix[VY][VW] +
a.mV[VZ] * b.mMatrix[VZ][VW] +
a.mV[VW] * b.mMatrix[VW][VW]);
}
void LLViewerJointMesh::updateGeometry()
{
if (!(mValid
&& mMesh
&& mFace
&& mMesh->hasWeights()
&& mFace->mVertexBuffer.notNull()
&& LLShaderMgr::getVertexShaderLevel(LLShaderMgr::SHADER_AVATAR) == 0))
{
return;
}
uploadJointMatrices();
LLStrider<LLVector3> o_vertices;
LLStrider<LLVector3> o_normals;
//get vertex and normal striders
LLVertexBuffer *buffer = mFace->mVertexBuffer;
buffer->getVertexStrider(o_vertices, 0);
buffer->getNormalStrider(o_normals, 0);
F32 last_weight = F32_MAX;
LLMatrix4 gBlendMat;
LLMatrix3 gBlendRotMat;
const F32* weights = mMesh->getWeights();
const LLVector3* coords = mMesh->getCoords();
const LLVector3* normals = mMesh->getNormals();
for (U32 index = 0; index < mMesh->getNumVertices(); index++)
{
U32 bidx = index + mMesh->mFaceVertexOffset;
// blend by first matrix
F32 w = weights[index];
// Maybe we don't have to change gBlendMat.
// Profiles of a single-avatar scene on a Mac show this to be a very
// common case. JC
if (w == last_weight)
{
o_vertices[bidx] = coords[index] * gBlendMat;
o_normals[bidx] = normals[index] * gBlendRotMat;
continue;
}
last_weight = w;
S32 joint = llfloor(w);
w -= joint;
// No lerp required in this case.
if (w == 1.0f)
{
gBlendMat = gJointMat[joint+1];
o_vertices[bidx] = coords[index] * gBlendMat;
gBlendRotMat = gJointRot[joint+1];
o_normals[bidx] = normals[index] * gBlendRotMat;
continue;
}
// Try to keep all the accesses to the matrix data as close
// together as possible. This function is a hot spot on the
// Mac. JC
LLMatrix4 &m0 = gJointMat[joint+1];
LLMatrix4 &m1 = gJointMat[joint+0];
gBlendMat.mMatrix[VX][VX] = lerp(m1.mMatrix[VX][VX],
m0.mMatrix[VX][VX], w);
gBlendMat.mMatrix[VX][VY] = lerp(m1.mMatrix[VX][VY],
m0.mMatrix[VX][VY], w);
gBlendMat.mMatrix[VX][VZ] = lerp(m1.mMatrix[VX][VZ],
m0.mMatrix[VX][VZ], w);
gBlendMat.mMatrix[VY][VX] = lerp(m1.mMatrix[VY][VX],
m0.mMatrix[VY][VX], w);
gBlendMat.mMatrix[VY][VY] = lerp(m1.mMatrix[VY][VY],
m0.mMatrix[VY][VY], w);
gBlendMat.mMatrix[VY][VZ] = lerp(m1.mMatrix[VY][VZ],
m0.mMatrix[VY][VZ], w);
gBlendMat.mMatrix[VZ][VX] = lerp(m1.mMatrix[VZ][VX],
m0.mMatrix[VZ][VX], w);
gBlendMat.mMatrix[VZ][VY] = lerp(m1.mMatrix[VZ][VY],
m0.mMatrix[VZ][VY], w);
gBlendMat.mMatrix[VZ][VZ] = lerp(m1.mMatrix[VZ][VZ],
m0.mMatrix[VZ][VZ], w);
gBlendMat.mMatrix[VW][VX] = lerp(m1.mMatrix[VW][VX],
m0.mMatrix[VW][VX], w);
gBlendMat.mMatrix[VW][VY] = lerp(m1.mMatrix[VW][VY],
m0.mMatrix[VW][VY], w);
gBlendMat.mMatrix[VW][VZ] = lerp(m1.mMatrix[VW][VZ],
m0.mMatrix[VW][VZ], w);
o_vertices[bidx] = coords[index] * gBlendMat;
LLMatrix3 &n0 = gJointRot[joint+1];
LLMatrix3 &n1 = gJointRot[joint+0];
gBlendRotMat.mMatrix[VX][VX] = lerp(n1.mMatrix[VX][VX],
n0.mMatrix[VX][VX], w);
gBlendRotMat.mMatrix[VX][VY] = lerp(n1.mMatrix[VX][VY],
n0.mMatrix[VX][VY], w);
gBlendRotMat.mMatrix[VX][VZ] = lerp(n1.mMatrix[VX][VZ],
n0.mMatrix[VX][VZ], w);
gBlendRotMat.mMatrix[VY][VX] = lerp(n1.mMatrix[VY][VX],
n0.mMatrix[VY][VX], w);
gBlendRotMat.mMatrix[VY][VY] = lerp(n1.mMatrix[VY][VY],
n0.mMatrix[VY][VY], w);
gBlendRotMat.mMatrix[VY][VZ] = lerp(n1.mMatrix[VY][VZ],
n0.mMatrix[VY][VZ], w);
gBlendRotMat.mMatrix[VZ][VX] = lerp(n1.mMatrix[VZ][VX],
n0.mMatrix[VZ][VX], w);
gBlendRotMat.mMatrix[VZ][VY] = lerp(n1.mMatrix[VZ][VY],
n0.mMatrix[VZ][VY], w);
gBlendRotMat.mMatrix[VZ][VZ] = lerp(n1.mMatrix[VZ][VZ],
n0.mMatrix[VZ][VZ], w);
o_normals[bidx] = normals[index] * gBlendRotMat;
}
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 249 bytes
Desc: OpenPGP digital signature
Url : http://lists.secondlife.com/pipermail/sldev/attachments/20070414/0b71803b/signature.pgp
More information about the SLDev
mailing list