One of my colleagues, Ferhat S., one day decided to use Unity3D to develop mobile games. He wanted hundreds of characters to act in the game he wanted to make, because he played a strategy game on his computer and was so impressed with it that he decided to make it for mobile. When he tried to use sketal animated characters in the game, which I don’t know where he found it, he only got 1 fps on mobile. As always, he asked me for help and I decided to write an article on the subject.
The problem is mobile phone hardware limitations, rendering optimization technology on many PCs cannot be used directly. At present, in order to achieve the effect of multi-unit combat, some games have various schemes to reduce the number of bones and the number of model faces, but they can’t escape the link of skeletal animation calculation.
Let’s take a picture. There are scenes of 100 soldiers and 10 cute babies. The actions of each character are controlled separately. It can run smoothly at 60fps on Xiaomi Mi 3. I have tried it before, with more than 300 characters. Animation can also run at around 55 fps

Mobile SS1

Mobile SS2

First explain the advantages and disadvantages of this program:
Advantages: no need to calculate skeletal animation, and it can play skeletal animation smoothly (in fact, it is no longer skeletal animation), each character action can also be controlled separately, so it consumes little CPU.
Disadvantages: There is no transition in action switching (in group combat, this basically does not affect the aesthetics), it consumes a little memory (in fact, it is good).
Then explain the idea of ​​the plan: In order to save the calculation of skeletal animation, the existing skeletal animation system cannot be used. Then the only option available is vertex deformation animation, and traditional vertex animation is not low in terms of memory usage, and unity support is not good.
So since only vertex animation can be selected and in order to reduce memory consumption, you can rewrite the shader and use GPU to interpolate to achieve the effect of vertex animation playing skeletal animation.
There are indeed many shader interpolation schemes, but in combination with the specific implementation, I chose two relatively simple schemes:

  1. In the vertex shader, the position of the vertices is controlled by the uv offset sampling texture sampled in each frame. When I have implemented it on the pc side, I suddenly found an obvious problem: sampling the texture in the vertex shader, the graphics card on the pc side is Supported, but don’t think about the mobile gpu.
  2. So there is only the second solution: interpolating the vertices in the vextex shader to play the animation, then there is a question, where does the vertex data come from? The mesh structure of unity has many channels, vertices, colors, uv, uv2, normals, and tangents. In these channels, except for the color (mainly accuracy issues) and uv (which still have to give texture coordinates a position) channels, I can use them to store vertex data, and then combine the vertices for interpolation by controlling the time point, but there is still a problem It needs to be solved. In this way, a mesh can only interpolate 4 key vertices. What about a longer animation? You can switch by generating multiple meshes in advance.

Animation interception can be achieved through unity’s BakeMesh function. The following is the code for combining mesh:

byte [] Make(Mesh mesh1, Mesh mesh2, Mesh mesh3, Mesh mesh4, float clipTimeLenghts, float frame2Pos, float frame3Pos)
{
Mesh[] meshs = new Mesh[] {mesh1, mesh2, mesh3, mesh4};
VertexAnimationResManager.ClipMeshData meshData = new VertexAnimationResManager.ClipMeshData();
meshData.subMeshCount = meshs[ 0 ].subMeshCount;

int count = meshs[ 0 ].vertices.Length;
 // Vertex 
if (meshs[ 0 ].vertices != null && meshs[ 0 ].vertices.Length> 0 )
{
meshData.vertexBuffer = new  float [count * 3 ];

for ( int i = 0 ; i <meshs[ 0 ].vertices.Length; i++ )
{
meshData.vertexBuffer[i * 3 ] = meshs[ 0 ].vertices[i].x;
meshData.vertexBuffer[i * 3 + 1 ] = meshs[ 0 ].vertices[i].y;
meshData.vertexBuffer[i * 3 + 2 ] = meshs[ 0 ].vertices[i].z;
} 
}
// uv 
if (meshs[ 0 ].uv != null && meshs[ 0 ].uv.Length> 0 )
{
meshData.uvBuffer = new  float [count * 2 ];

for ( int i = 0 ; i <meshs[ 0 ].vertices.Length; i++ )
{
meshData.uvBuffer[i * 2 ] = meshs[ 0 ].uv[i].x;
meshData.uvBuffer[i * 2 + 1 ] = meshs[ 0 ].uv[i].y;
}
}

//The normal is used to store the vertex information of the second frame of the animation 
if (meshs[ 1 ].vertices != null && meshs[ 1 ].vertices.Length> 0 )
{
meshData.normalBuffer = new  float [count * 3 ];

for ( int i = 0 ; i <meshs[ 0 ].vertices.Length; i++ )
{
meshData.normalBuffer[i * 3 ] = meshs[ 1 ].vertices[i].x;
meshData.normalBuffer[i * 3 + 1 ] = meshs[ 1 ].vertices[i].y;
meshData.normalBuffer[i * 3 + 2 ] = meshs[ 1 ].vertices[i].z;
}
}

//The tangent is used to store the vertex information of the third frame of the animation 
if (meshs[ 2 ].vertices != null && meshs[ 2 ].vertices.Length> 0 )
{
meshData.tangentBuffer = new  float [count * 4 ];

for ( int i = 0 ; i <meshs[ 0 ].vertices.Length; i++ )
{
meshData.tangentBuffer[i * 4 ] = meshs[ 2 ].vertices[i].x;
meshData.tangentBuffer[i * 4 + 1 ] = meshs[ 2 ].vertices[i].y;
meshData.tangentBuffer[i * 4 + 2 ] = meshs[ 2 ].vertices[i].z;
meshData.tangentBuffer[i * 4 + 3 ] = meshs[ 3 ].vertices[i].x;
}
}

// UV2 is used to store the YZ coordinate of the vertex of the fourth key frame rate and the X coordinate is stored by the W channel of the tangent 
if (meshs[ 3 ].vertices != null && meshs[ 3 ].vertices.Length> 0 )
{
meshData.uv2Buffer = new  float [count * 2 ];

for ( int i = 0 ; i <meshs[ 0 ].vertices.Length; i++ )
{
meshData.uv2Buffer[i * 2 ] = meshs[ 3 ].vertices[i].y;
meshData.uv2Buffer[i * 2 + 1 ] = meshs[ 3 ].vertices[i].z;
} 
}

count = 0 ;
 int len = 0 ;
meshData.subMeshTriangleLens = new  int [meshData.subMeshCount];
 for ( int i = 0 ; i <meshData.subMeshCount; i++ )
{
len = meshs[ 0 ].GetTriangles(i).Length;
count += len;
meshData.subMeshTriangleLens[i] = len;
}

meshData.triangleBuffer = new  int [count];

len = 0 ;
 for ( int i = 0 ; i <meshData.subMeshCount; i++ )
{
meshs[ 0 ].GetTriangles(i).CopyTo(meshData.triangleBuffer, len);
len += meshData.subMeshTriangleLens[i];
}

ByteBuffer bbuffer = new ByteBuffer();
bbuffer.WriteFloat(clipTimeLenghts);
bbuffer.WriteFloat(frame2Pos);
bbuffer.WriteFloat(frame3Pos);
bbuffer.WriteInt(meshs[ 0 ].subMeshCount);

for ( int i = 0 ;i<meshData.subMeshTriangleLens.Length;i++ )
{
bbuffer.WriteInt(meshData.subMeshTriangleLens[i]);
}

bbuffer.WriteInt(meshData.triangleBuffer.Length);
for ( int i = 0 ; i <meshData.triangleBuffer.Length; i++ )
{ 
bbuffer.WriteInt(meshData.triangleBuffer[i]);
}

bbuffer.WriteInt(meshData.vertexBuffer.Length);
for ( int i = 0 ; i <meshData.vertexBuffer.Length; i++ )
{
bbuffer.WriteFloat(meshData.vertexBuffer[i]);
}

bbuffer.WriteInt(meshData.normalBuffer.Length);
for ( int i = 0 ; i <meshData.normalBuffer.Length; i++ )
{
bbuffer.WriteFloat(meshData.normalBuffer[i]);
}

bbuffer.WriteInt(meshData.tangentBuffer.Length);
for ( int i = 0 ; i <meshData.tangentBuffer.Length; i++ )
{
bbuffer.WriteFloat(meshData.tangentBuffer[i]);
}

bbuffer.WriteInt(meshData.uvBuffer.Length);
for ( int i = 0 ; i <meshData.uvBuffer.Length; i++ )
{
bbuffer.WriteFloat(meshData.uvBuffer[i]);
}

bbuffer.WriteInt(meshData.uv2Buffer.Length);
for ( int i = 0 ; i <meshData.uv2Buffer.Length; i++ )
{
bbuffer.WriteFloat(meshData.uv2Buffer[i]);
} 
return bbuffer.ToBytes();
}

After the interception, save it as your own binary file, and the code to be loaded and parsed when running is as follows:

public  void AddAnimationInfo( string aniName, byte [] clipData)
{
VertexAnimationClipInfo clipInfo = null ;

AnimationClipInfos.TryGetValue(aniName, out clipInfo);

if (clipInfo!= null )
{
Debug.LogError( " animation clip has exits! " );
 return ;
}

clipInfo = new VertexAnimationClipInfo();

ByteBuffer bbuffer = new ByteBuffer(clipData);
 int Count = bbuffer.ReadInt();

for ( int i = 0 ; i <Count; i++ )
{
ClipMeshData meshData = GetMeshData(bbuffer);
clipInfo.clipTotalTimeLen += meshData.timeLenth;
clipInfo.clipLenghts.Add(meshData.timeLenth);
clipInfo.everyClipFrameTimePoints.Add( new Vector3(meshData.Frame2TimePoint, meshData.Frame3TimePoint)); // ,meshData.Frame4TimePoint 
clipInfo.clipMeshs.Add(meshData.GenMesh());
}

bbuffer.Close();

AnimationClipInfos.Add(aniName, clipInfo);
}

VertexAnimationClipInfo is defined as follows:

[Serializable]
 public  class VertexAnimationClipInfo
{
public  float clipTotalTimeLen = 0 ;
 public List<Mesh> clipMeshs = new List<Mesh> ();
 public List<Vector2> everyClipFrameTimePoints = new List<Vector2> ();
 public List< float > clipLenghts = new List< float > () ;
}

ClipMeshData is defined as follows:

public  class ClipMeshData
{
public  float timeLenth;

/// Frame1TimePoint =0 Frame4TimePoint = 1 
public  float Frame2TimePoint = 0.333f ;
 public  float Frame3TimePoint = 0.666f ;
 // public float Frame4TimePoint = 0.75f;

public  int subMeshCount;
 public  int [] subMeshTriangleLens;
 public  int [] triangleBuffer;
 public  float [] vertexBuffer;
 public  float [] normalBuffer;
 public  float [] tangentBuffer;
 public  float [] uvBuffer;
 public  float [] uv2Buffer;
 // public float [] colorBuffer;

public Mesh GenMesh()
{ 
Mesh mesh = new Mesh();

int vertexCount = vertexBuffer.Length / 3 ;

mesh.subMeshCount = subMeshCount;
 // Vertex 
Vector3[] vertexs = new Vector3[vertexCount];
 for ( int i = 0 ; i <vertexCount; i++ )
{
vertexs[i] = new Vector3(vertexBuffer[i * 3 ], vertexBuffer[i * 3 + 1 ], vertexBuffer[i * 3 + 2 ]);
}
mesh.vertices = vertexs; 
 // uv 
Vector2[] uv = new Vector2[vertexCount];
 for ( int i = 0 ; i <uv.Length; i++ )
{
uv[i] = new Vector2(uvBuffer[i * 2 ], uvBuffer[i * 2 + 1 ]);
}
mesh.uv = uv;
 // uv2 
Vector2[] uv2 = new Vector2[vertexCount];
 for ( int i = 0 ; i <uv.Length; i++ )
{
uv2[i] = new Vector2(uv2Buffer[i * 2 ], uv2Buffer[i * 2 + 1 ]);
}
mesh.uv2 = uv2;

// Normal 
Vector3[] normals = new Vector3[vertexCount];
 for ( int i = 0 ; i <normals.Length; i++ )
{
normals[i] = new Vector3(normalBuffer[i * 3 ], normalBuffer[i * 3 + 1 ], normalBuffer[i * 3 + 2 ]);
} 
mesh.normals = normals;

// Tangent 
var tangents = new Vector4[vertexCount]; 
 for ( int i = 0 ; i <tangents.Length; i++ )
{
tangents[i] = new Vector4(tangentBuffer[i * 4 ], tangentBuffer[i * 4 + 1 ], tangentBuffer[i * 4 + 2 ], tangentBuffer[i * 4 + 3 ]);
} 
mesh.tangents = tangents;
 /// /color 
// Color[] colors = new Color[colorBuffer.Length / 4];
 // for (int i = 0; i <colors.Length; i++)
 // {
 // colors[i] = new Vector4(colorBuffer[i * 4], colorBuffer[i * 4 + 1], colorBuffer[i * 4 + 2], 1);
 // }
 // mesh.colors = colors;

// Triangle 
int startIndex = 0 ;
 int bufferLen = 0 ;

for ( int i = 0 ; i <subMeshCount; i++ )
{
bufferLen = subMeshTriangleLens[i];
 if (bufferLen <= 0 ) continue ;
 var triIndexBuffer = new  int [bufferLen];
Array.Copy(triangleBuffer, startIndex, triIndexBuffer, 0 , bufferLen);
mesh.SetTriangles(triIndexBuffer, i);
startIndex += bufferLen;
}
return mesh;
}
}

Just play the animation and switch the corresponding mesh. The following is the shader code used for gpu interpolation:

Shader " LXZ_TEST/VertexAnimation-NoColorBuf " {
Properties {
_MainTex ( " Base (RGB) " , 2D) = " white " {}
_CurTime( " Time " , Float) = 0  
_Frame2Time( " Frame2Time " , Float) = 0.333 
_Frame3Time( " Frame3Time " , Float) = 0.666 
_Color ( " MainColor " , color) = ( 1 , 1 , 1 , 1 )
}
SubShader {
// Tags {"QUEUE"="Geometry" "RenderType"="Opaque"}

Pass { 
Blend SrcAlpha OneMinusSrcAlpha    
CGPROGRAM
#pragma vertex vert
 #pragma fragment frag
 // #include "UnityCG.cginc"


#pragma glsl_no_auto_normalization

sampler2D _MainTex;
float _CurTime;
 float _Frame2Time;
 float _Frame3Time;
float4 _Color;

struct appdata {
float4 vertex: POSITION;
float3 vertex1: NORMAL;
float4 vertex2: TANGENT;
float2 texcoord: TEXCOORD0;
float2 vertex3: TEXCOORD1;
// float3 vertex4: COLOR; 
};

struct v2f {
float4 pos: POSITION;
float2 uv: TEXCOORD0;
};

v2f vert(appdata v) {
v2f result; 

float a = _CurTime- _Frame2Time;
 float b = _CurTime- _Frame3Time;

float3 vec; 

float3 vertex3 = float3(v.vertex2.w,v.vertex3.xy);

if (a< 0 )
vec = v.vertex.xyz + (v.vertex1-v.vertex.xyz)* _CurTime/ _Frame2Time;
 else  if (a>= 0 && b< 0 )
{ 
vec = v.vertex1 + (v.vertex2.xyz-v.vertex1)* a/(_Frame3Time- _Frame2Time);
} 
else 
vec = v.vertex2.xyz + (vertex3-v.vertex2.xyz)* b/( 1 - _Frame3Time);


result.pos = mul(UNITY_MATRIX_MVP, float4(vec, 1 )); 
 // result.pos = mul(UNITY_MATRIX_MVP, float4(v.vertex1.xyz,1)); 
 // result.pos = mul(UNITY_MATRIX_MVP, float4( v.vertex2.xyz,1)); 
 // result.pos = mul(UNITY_MATRIX_MVP, float4(vertex3.xyz,1)); 
 // result.pos = mul(UNITY_MATRIX_MVP, float4(vertex4.xyz,1)); 
result.uv = v.texcoord;

return result;
}

float4 frag(v2f i): COLOR
{
float4 color = tex2D(_MainTex, i.uv);
 return color * _Color;
}

ENDCG 
}
} 
FallBack " Diffuse " 
}

Thanks to Ferhat S. who encouraged me to do this work and good luck with his game.