[toc]
一直想学习一下 Nanite 是如何实现的,最近悄悄打开 UE 源码小鹿乱撞的看了一下,然后发现了一个奇观
Nanite 的剖分核心代码文件只有这么多 XD
这么一来,忽然感觉也有机会稍微不那么彻底的将 Nanite 的实现进行一次理解与拆分么?那么就开始吧
这一篇的目的是先对目前已知的模块进行大体的拆分
# Nanite 的大体思路
todo: 这一部分其实之前已经比较熟悉了,现在需要考虑的是 UE是如何具体的实现的
所以这一章暂时先搁置啦
# 1. Nanite Mesh 的切分方式
倘若给我们一个如上图那么复杂的三维模型,让我们尝试将这个三维模型切分成 Nanite 中的每簇 Cluster
,那我们首先需要思考的就是 剖分算法
,即如何将一整个 Mesh 合理的划分成不同的 Cluster, 而且还要能够支持 Cluster 的 LOD
UE 是这么做的 (暂时还没看实现算法,只是把大体逻辑罗列一下)
首先在 NaniteBuilder.cpp
的 ClusterTriangles
函数中,将原本的 MeshData
传入,调用 FGraphPartitioner
进行切分
然后在 FGraphPartitioner
方法的 PartitionStrict
函数中进行 Mesh 的切分
每次切分,调用 BisectGraph
函数进行建图,同时,会在这个函数中调用 METIS
库进行实际的 Mesh 切分
在这个函数中, METIS_PartGraphRecursive
是调用 Metis 库的入口
个人思考 1 - 如何对 Mesh 进行合理的 Cluster 的切分?在稍微了解一下 metis 库后,忽然明白,实际上的 Mesh 的构成就是一个标准的 图结构
,而我们只需要将这个图切分成几个合理的子图即可。
这样问题就从如何切分 Mesh, 转化成了,如何获得一个图结构的子图。
而在得到子图以后,实际上就轻松很多了,耳裁剪,三角剖分什么的,都可以使用了
# First Step: Build
最初的入口是 BuildNaniteData
函数头
1 2 3 4 5 6 7 8 9 static bool BuildNaniteData ( FResources& Resources, IBuilderModule::FVertexMeshData& InputMeshData, TArray< int32 >& MaterialIndexes, TArray< uint32 >& MeshTriangleCounts, TArrayView< IBuilderModule::FVertexMeshData >& OutputLODMeshData, uint32 NumTexCoords, const FMeshNaniteSettings& Settings ) {}
参数
意义
Resources
用来在 Build 时记录当前 Mesh Build 后的信息
InputMeshData
输入的 MeshData
MaterialIndexes
记录的每个 Mesh 的三角形的 Indexes
MeshTriangleCounts
输入的 Mesh 可能不止一个,记录每个 Mesh 的三角形数据
OutputLODMeshData
输出的 Lod Mesh 数据
NumTexCoords
?
Settings
?(配置,看起来像是切分的配置)
先用 Channel 与顶点色进行 &
来判断是否有顶点色,其中会用 ResourceFlags
来记录当前 Mesh 的属性 (只要有一个有颜色,就被标记为有顶点色)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 uint32 Channel = 255 ; for ( auto & Vert : InputMeshData.Vertices ){ VertexBounds += Vert.Position; Channel &= Vert.Color.R; Channel &= Vert.Color.G; Channel &= Vert.Color.B; Channel &= Vert.Color.A; } const bool bHasVertexColor = Channel != 255 ;if (bHasVertexColor){ Resources.ResourceFlags = NANITE_RESOURCE_FLAG_HAS_VERTEX_COLOR; }
Nanite 会用 Resourcs
这个结构体来记录顶点数目 / Cluster 数目 / 三角形数目等信息
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 struct FResources { TArray< uint8 > RootData; FByteBulkData StreamablePages; TArray< uint16 > ImposterAtlas; TArray< FPackedHierarchyNode > HierarchyNodes; TArray< uint32 > HierarchyRootOffsets; TArray< FPageStreamingState > PageStreamingStates; TArray< uint32 > PageDependencies; uint32 NumRootPages = 0 ; int32 PositionPrecision = 0 ; int32 NormalPrecision = 0 ; uint32 NumInputTriangles = 0 ; uint32 NumInputVertices = 0 ; uint16 NumInputMeshes = 0 ; uint16 NumInputTexCoords = 0 ; uint32 NumClusters = 0 ; uint32 ResourceFlags = 0 ; -----More Code }
Mesh 数据赋值,这一层后,会将基本的顶点 / 三角形等数据存到 Resources
中
从下述代码里来看,似乎在 Mesh 只有一个时候,会进行一次细分 ( 为什么多个Mesh时没有细分?
)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 if ( MeshTriangleCounts.Num () == 1 && Settings.TrimRelativeError != 0.0f ){ uint32 Time0 = FPlatformTime::Cycles (); TessellateAndDisplace ( InputMeshData.Vertices, InputMeshData.TriangleIndices, MaterialIndexes, VertexBounds, Settings ); MeshTriangleCounts[0 ] = InputMeshData.TriangleIndices.Num () / 3 ; uint32 Time1 = FPlatformTime::Cycles (); UE_LOG ( LogStaticMesh, Log, TEXT ("Adaptive tessellate [%.2fs], tris: %i" ), FPlatformTime::ToMilliseconds ( Time1 - Time0 ) / 1000.0f , MeshTriangleCounts[0 ] ); } Resources.NumInputTriangles = InputMeshData.TriangleIndices.Num () / 3 ; Resources.NumInputVertices = InputMeshData.Vertices.Num (); Resources.NumInputMeshes = MeshTriangleCounts.Num (); Resources.NumInputTexCoords = NumTexCoords;
开始进行 Mesh 的切分 Cluster入口
依次如下:
4.1 先遍历所有的 Mesh
4.2 在处理当前的 Mesh 前先获得 Clusters.Num ()
4.3 然后对当前 Mesh 进行 ClusterTriangles
的 Build
4.4 处理完当前的 Mesh 后,将当前 Mesh 划分的 Cluster 数量加入 ClusterCountPerMesh
列表
4.5 记录总的三角形数量 (切分前)
4.6 最后统计一下所有 Cluster 的表面积 SurfaceArea
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 TArray< uint32 > ClusterCountPerMesh; TArray< FCluster > Clusters; { uint32 BaseTriangle = 0 ; for (uint32 NumTriangles : MeshTriangleCounts) { uint32 NumClustersBefore = Clusters.Num (); if (NumTriangles) { ClusterTriangles ( InputMeshData.Vertices, TArrayView < const uint32 >( &InputMeshData.TriangleIndices[BaseTriangle * 3 ], NumTriangles * 3 ), TArrayView < const int32 >( &MaterialIndexes[BaseTriangle], NumTriangles ), Clusters, VertexBounds, NumTexCoords, bHasVertexColor, Settings.bPreserveArea ); } ClusterCountPerMesh.Add (Clusters.Num () - NumClustersBefore); BaseTriangle += NumTriangles; } } float SurfaceArea = 0.0f ;for ( FCluster& Cluster : Clusters ) SurfaceArea += Cluster.SurfaceArea;
# Second Step: Per Mesh ClusterTriangles
其中 ClusterTriangles
是针对每个 Mesh
来进行的
先计算三角形数目
1 uint32 NumTriangles = Indexes.Num () / 3 ;
锁边,hash
在这一步提供了一个 lambda 函数 GetPosition
, 简单看来,这个函数的目的是根据它定义的 边
的下标来获取三角形的某个边上的顶点的坐标
1 2 3 4 5 6 7 FAdjacency Adjacency ( Indexes.Num() ) ;FEdgeHash EdgeHash ( Indexes.Num() ) ;auto GetPosition = [ &Verts, &Indexes ]( uint32 EdgeIndex ){ return Verts[ Indexes[ EdgeIndex ] ].Position; };
2.1 先来看一下边的概念 EdgeHash
实际上看起来并没有什么特别的,大概就是将整个 Mesh 变成了一个由 顶点为点
, 三角形边为双向边
的 图
, EdgeHash
本质上是把每个边给 Hash 了一下
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 class FEdgeHash { public : FHashTable HashTable; FEdgeHash ( int32 Num ) : HashTable ( 1 << FMath::FloorLog2 ( Num ), Num ) {} template < typename FGetPosition > void Add_Concurrent ( int32 EdgeIndex, FGetPosition&& GetPosition ) { const FVector3f& Position0 = GetPosition ( EdgeIndex ); const FVector3f& Position1 = GetPosition ( Cycle3 ( EdgeIndex ) ); uint32 Hash0 = HashPosition ( Position0 ); uint32 Hash1 = HashPosition ( Position1 ); uint32 Hash = Murmur32 ( { Hash0, Hash1 } ); HashTable.Add_Concurrent ( Hash, EdgeIndex ); } template < typename FGetPosition, typename FuncType > void ForAllMatching ( int32 EdgeIndex, bool bAdd, FGetPosition&& GetPosition, FuncType&& Function ) { const FVector3f& Position0 = GetPosition ( EdgeIndex ); const FVector3f& Position1 = GetPosition ( Cycle3 ( EdgeIndex ) ); uint32 Hash0 = HashPosition ( Position0 ); uint32 Hash1 = HashPosition ( Position1 ); uint32 Hash = Murmur32 ( { Hash1, Hash0 } ); for ( uint32 OtherEdgeIndex = HashTable.First ( Hash ); HashTable.IsValid ( OtherEdgeIndex ); OtherEdgeIndex = HashTable.Next ( OtherEdgeIndex ) ) { if ( Position0 == GetPosition ( Cycle3 ( OtherEdgeIndex ) ) && Position1 == GetPosition ( OtherEdgeIndex ) ) { Function ( EdgeIndex, OtherEdgeIndex ); } } if ( bAdd ) HashTable.Add ( Murmur32 ( { Hash0, Hash1 } ), EdgeIndex ); } };
2.2 边的 Hash 与边的匹配
先使用任务系统多线程构建边 Hash
然后再使用任务系统,匹配当前边的临接边 (如果有多个临界边,就把 Direct 赋值为 - 2?)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 ParallelFor ( TEXT ("Nanite.ClusterTriangles.PF" ), Indexes.Num (), 4096 , [&]( int32 EdgeIndex ) { EdgeHash.Add_Concurrent ( EdgeIndex, GetPosition ); } ); ParallelFor ( TEXT ("Nanite.ClusterTriangles.PF" ), Indexes.Num (), 1024 , [&]( int32 EdgeIndex ) { int32 AdjIndex = -1 ; int32 AdjCount = 0 ; EdgeHash.ForAllMatching ( EdgeIndex, false , GetPosition, [&]( int32 EdgeIndex, int32 OtherEdgeIndex ) { AdjIndex = OtherEdgeIndex; AdjCount++; } ); if ( AdjCount > 1 ) AdjIndex = -2 ; Adjacency.Direct[ EdgeIndex ] = AdjIndex; } );
2.3 通过并查集合并所有的临接边 从临接边的代码和并查集的代码来看,就是将所有的临接边以并查集的方式合并到当前的边上
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 FDisjointSet DisjointSet ( NumTriangles ) ;for ( uint32 EdgeIndex = 0 , Num = Indexes.Num (); EdgeIndex < Num; EdgeIndex++ ){ if ( Adjacency.Direct[ EdgeIndex ] == -2 ) { TArray< TPair< int32, int32 >, TInlineAllocator< 16 > > Edges; EdgeHash.ForAllMatching ( EdgeIndex, false , GetPosition, [&]( int32 EdgeIndex0, int32 EdgeIndex1 ) { Edges.Emplace ( EdgeIndex0, EdgeIndex1 ); } ); Edges.Sort (); for ( const TPair< int32, int32 >& Edge : Edges ) { Adjacency.Link ( Edge.Key, Edge.Value ); } } Adjacency.ForAll ( EdgeIndex, [&]( int32 EdgeIndex0, int32 EdgeIndex1 ) { if ( EdgeIndex0 > EdgeIndex1 ) DisjointSet.UnionSequential ( EdgeIndex0 / 3 , EdgeIndex1 / 3 ); } ); }
准备构建分割图
由于 Nanite 对于 Mesh 的切分,实际上是使用 Metis
库进行切分的,所以在 Build 时,Nanite 最主要的作用是将 UE 内部的 Mesh 组装成 Metis 需要的数据排布, 比如这里就没有非常的执着于三角形的格式,而是很直白的将Mesh转换成了一个图的结构
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 FGraphPartitioner Partitioner ( NumTriangles ) ;class FGraphPartitioner { public : struct FGraphData { int32 Offset; int32 Num; TArray< idx_t > Adjacency; TArray< idx_t > AdjacencyCost; TArray< idx_t > AdjacencyOffset; }; struct FRange { uint32 Begin; uint32 End; bool operator <( const FRange& Other) const { return Begin < Other.Begin; } }; TArray< FRange > Ranges; TArray< uint32 > Indexes; TArray< uint32 > SortedTo; public : FGraphPartitioner ( uint32 InNumElements ); FGraphData* NewGraph ( uint32 NumAdjacency ) const ; void AddAdjacency ( FGraphData* Graph, uint32 AdjIndex, idx_t Cost ) ; void AddLocalityLinks ( FGraphData* Graph, uint32 Index, idx_t Cost ) ; template < typename FGetCenter > void BuildLocalityLinks ( FDisjointSet& DisjointSet, const FBounds3f& Bounds, TArrayView< const int32 > GroupIndexes, FGetCenter& GetCenter ) ; void Partition ( FGraphData* Graph, int32 InMinPartitionSize, int32 InMaxPartitionSize ) ; void PartitionStrict ( FGraphData* Graph, int32 InMinPartitionSize, int32 InMaxPartitionSize, bool bThreaded ) ; private : void BisectGraph ( FGraphData* Graph, FGraphData* ChildGraphs[2 ] ) ; void RecursiveBisectGraph ( FGraphData* Graph ) ; uint32 NumElements; int32 MinPartitionSize = 0 ; int32 MaxPartitionSize = 0 ; TAtomic< uint32 > NumPartitions; TArray< idx_t > PartitionIDs; TArray< int32 > SwappedWith; TMultiMap< uint32, uint32 > LocalityLinks; };
3.1 获取三角形中心的函数
1 2 3 4 5 6 7 8 auto GetCenter = [ &Verts, &Indexes ]( uint32 TriIndex ){ FVector3f Center; Center = Verts[ Indexes[ TriIndex * 3 + 0 ] ].Position; Center += Verts[ Indexes[ TriIndex * 3 + 1 ] ].Position; Center += Verts[ Indexes[ TriIndex * 3 + 2 ] ].Position; return Center * (1.0f / 3.0f ); };
3.2 建图
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 Partitioner.BuildLocalityLinks ( DisjointSet, Bounds, MaterialIndexes, GetCenter ); auto * RESTRICT Graph = Partitioner.NewGraph ( NumTris * 3 );for ( uint32 i = 0 ; i < NumTris; i++ ){ Graph->AdjacencyOffset[i] = Graph->Adjacency.Num (); uint32 TriIndex = Partitioner.Indexes[i]; for ( int k = 0 ; k < 3 ; k++ ) { Adjacency.ForAll ( 3 * TriIndex + k, [ &Partitioner, Graph ]( int32 EdgeIndex, int32 AdjIndex ) { Partitioner.AddAdjacency ( Graph, AdjIndex / 3 , 4 * 65 ); } ); } Partitioner.AddLocalityLinks ( Graph, TriIndex, 1 ); } Graph->AdjacencyOffset[ NumTris ] = Graph->Adjacency.Num ();
其中有一个方法是 BuildLocalityLinks
, 这个方法大概是会建立一个链接 (暂时不知道这个链接是干嘛的) 但是从下述代码最开始可以看出来,大概是对之前获得的所有元素的 包围盒中心
计算 Morton码
,然后使用 Morton码
来降维,并且将这些元素按照 Morton 码存储在 SortedTo
中,方便后续计算
后面再来学习一下这个到底是干嘛的
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 template < typename FGetCenter >void FGraphPartitioner::BuildLocalityLinks ( FDisjointSet& DisjointSet, const FBounds3f& Bounds, TArrayView< const int32 > GroupIndexes, FGetCenter& GetCenter ) { TArray< uint32 > SortKeys; SortKeys.AddUninitialized ( NumElements ); SortedTo.AddUninitialized ( NumElements ); const bool bElementGroups = !GroupIndexes.IsEmpty (); ParallelFor ( TEXT ("BuildLocalityLinks.PF" ), NumElements, 4096 , [&]( uint32 Index ) { FVector3f Center = GetCenter ( Index ); FVector3f CenterLocal = ( Center - Bounds.Min ) / FVector3f ( Bounds.Max - Bounds.Min ).GetMax (); uint32 Morton; Morton = FMath::MortonCode3 ( CenterLocal.X * 1023 ); Morton = FMath::MortonCode3 ( CenterLocal.Y * 1023 ) << 1 ; Morton = FMath::MortonCode3 ( CenterLocal.Z * 1023 ) << 2 ; SortKeys[ Index ] = Morton; }); RadixSort32 ( SortedTo.GetData (), Indexes.GetData (), NumElements, [&]( uint32 Index ) { return SortKeys[ Index ]; } ); SortKeys.Empty (); Swap ( Indexes, SortedTo ); for ( uint32 i = 0 ; i < NumElements; i++ ) { SortedTo[ Indexes[i] ] = i; } TArray< FRange > IslandRuns; IslandRuns.AddUninitialized ( NumElements ); { uint32 RunIslandID = 0 ; uint32 RunFirstElement = 0 ; for ( uint32 i = 0 ; i < NumElements; i++ ) { uint32 IslandID = DisjointSet.Find ( Indexes[i] ); if ( RunIslandID != IslandID ) { for ( uint32 j = RunFirstElement; j < i; j++ ) { IslandRuns[j].End = i - 1 ; } RunIslandID = IslandID; RunFirstElement = i; } IslandRuns[i].Begin = RunFirstElement; } for ( uint32 j = RunFirstElement; j < NumElements; j++ ) { IslandRuns[j].End = NumElements - 1 ; } } for ( uint32 i = 0 ; i < NumElements; i++ ) { uint32 Index = Indexes[i]; uint32 RunLength = IslandRuns[i].End - IslandRuns[i].Begin + 1 ; if ( RunLength < 128 ) { uint32 IslandID = DisjointSet[ Index ]; int32 GroupID = bElementGroups ? GroupIndexes[ Index ] : 0 ; FVector3f Center = GetCenter ( Index ); const uint32 MaxLinksPerElement = 5 ; uint32 ClosestIndex[MaxLinksPerElement]; float ClosestDist2[MaxLinksPerElement]; for (int32 k = 0 ; k < MaxLinksPerElement; k++) { ClosestIndex[k] = ~0u ; ClosestDist2[k] = MAX_flt; } for ( int Direction = 0 ; Direction < 2 ; Direction++ ) { uint32 Limit = Direction ? NumElements - 1 : 0 ; uint32 Step = Direction ? 1 : -1 ; uint32 Adj = i; for ( int32 Iterations = 0 ; Iterations < 16 ; Iterations++ ) { if ( Adj == Limit ) break ; Adj += Step; uint32 AdjIndex = Indexes[ Adj ]; uint32 AdjIslandID = DisjointSet[ AdjIndex ]; int32 AdjGroupID = bElementGroups ? GroupIndexes[AdjIndex] : 0 ; if ( IslandID == AdjIslandID ( GroupID != AdjGroupID ) ) { if ( Direction ) Adj = IslandRuns[ Adj ].End; else Adj = IslandRuns[ Adj ].Begin; } else { float AdjDist2 = ( Center - GetCenter ( AdjIndex ) ).SizeSquared (); for ( int k = 0 ; k < MaxLinksPerElement; k++ ) { if ( AdjDist2 < ClosestDist2[k] ) { Swap ( AdjIndex, ClosestIndex[k] ); Swap ( AdjDist2, ClosestDist2[k] ); } } } } } for ( int k = 0 ; k < MaxLinksPerElement; k++ ) { if ( ClosestIndex[k] != ~0u ) { LocalityLinks.AddUnique ( Index, ClosestIndex[k] ); LocalityLinks.AddUnique ( ClosestIndex[k], Index ); } } } } }
3.3 切分 Mesh 从这里就进入到了 调用Metis库切分Mesh
的流程了
1 Partitioner.PartitionStrict ( Graph, ClusterSize - 4 , ClusterSize, false );