nlf 3d pose 部署学习笔记
目录
算法原理剖析
1. 预测1024个3d顶点和55个关节点
2. 使用smpl取拟合vertices_flat,joints_flat
创建SemanticRenderer 来源multi_hmr
依赖项:
rlemasklib
测试依赖项:
tf版demo:
metrabs模型,config都在:
推理报错'Namespace' object has no attribute 'config_name':
barecat window是编译报错,linux编译ok
推理代码
渲染代码:
图片推理代码:
最新torch推理进展:
load_crop_model加载ok
vertex_subset_1024.npz报错没有
调用原版render,没成功
算法原理剖析
1. 预测1024个3d顶点和55个关节点
E:\project\jijia_4d\nlf_torch\nlf\multiperson\multiperson_model.py
当suppress_implausible_poses被启用时: # 通过抑制不合理姿态来减少误报
# 对结果中的姿态进行筛选以确保其合理性并减少虚假正
boxes, poser3D, poser2D, uncertain = self._filter_poses(boxes=boxes,
poser3D=poses3d,
poser2D=poses2d,
uncertain=certainty)
2. 使用smpl取拟合vertices_flat,joints_flat
用的pose和smlx生成vertices,也有vertices3d
hmr2,用的是网络生成的vertices进行渲染。
nlf地址:
GitHub -isarandi/nlf: NeurIPS 2024 Neuro Fields for Representing Continuous-Time Human Body Poses and Their Shape Reconstructibility
GitHub -isarandi/nlf: NeurIPS 2024 Neuro Fields for Representing Continuous-Time Human Body Poses and Their Shape Reconstructibility
创建SemanticRenderer 来源multi_hmr
在Python中将self.renderer赋值给SemanticRenderer类的一个实例,并指定相关配置参数:SMPL模型配置文件中的utral_10层的faces属性和lbs_weights权重系数矩阵以及viewport_size设置为(896, 896)
依赖项:
rlemasklib
The GitHub repository isarandi/rlemasklib provides functionality to manipulate run-length encoded image masks.
测试依赖项:
python -c "import posepile.joint_info"
tf版demo:
python -c "import tensorflow as tf;import tensorflow_hub as tfhub;import tensorflow_io as tfio"
metrabs模型,config都在:
出处:
GitHub - isarandi/metrabs: Compute full-body pose estimation from RGB images.
wget -O - https://bit.ly/metrabs_l_pt | tar -xzvf -
python -m metrabs_pytorch.scripts.demo_image --model-dir metrabs_eff2l_384px_800k_28ds_pytorch --image img/test_image_3dpw.jpg
推理报错'Namespace' object has no attribute 'config_name':
File "/shared_disk/users/lbg/project/human_4d/nlf_code_new/nlf-main/nlf/pt/metrabs_pytorch/util.py", line 48, in get_config
configuration_name = configuration_name if configuration_name is not None else spu.FLAGS.configuration_name
AttributeError: 'Namespace' object has no attribute 'configuration_name'
barecat window是编译报错,linux编译ok
推理代码
pose_demo.py
# coding=utf-8
import sys
import os
import cv2
current_dir = os.path.dirname(os.path.abspath(__file__))
os.chdir(current_dir)
paths = [os.path.abspath(__file__).split('scripts')[0]]
print('current_dir',current_dir)
paths.append(os.path.abspath(os.path.join(current_dir, 'src')))
for path in paths:
sys.path.insert(0, path)
os.environ['PYTHONPATH'] = (os.environ.get('PYTHONPATH', '') + ':' + path).strip(':')
import glob
os.environ['CUDA_VISIBLE_DEVICES'] = '2'
import torch
import torchvision
from pathlib import Path
import pickle
import gc
import time
import warnings
import os
warnings.filterwarnings("ignore")
def load_video_frames(video_path,mode='bgr'):
frames = []
count = 0
if os.path.isdir(video_path):
files=glob(video_path+'/*.png')+glob(video_path+'/*.jpg')
for file in files:
img=cv2.imread(file)
if mode!='bgr':
img=cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
frames.append(img)
count += 1
return frames, 25
video = cv2.VideoCapture(video_path)
fps = int(video.get(cv2.CAP_PROP_FPS))
while True:
ret, frame = video.read()
if not ret:
break
else:
count += 1
if mode!='bgr':
frame=cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frames.append(frame)
return frames, fps
# Load the TorchScript model
model = torch.jit.load('./nlf_l_multi.torchscript').cuda().eval()
batch_size = 1
key_names = ['boxes', 'pose', 'betas', 'trans', 'vertices3d', 'joints3d', 'vertices2d', 'joints2d', 'vertices3d_nonparam', 'joints3d_nonparam', 'vertices2d_nonparam', 'joints2d_nonparam',
'vertex_uncertainties', 'joint_uncertainties']
paths=glob.glob(r"/shared_disk/users/lbg/project/inpaint/see3d/mp4s/person_big_0108.mp4")
paths=glob.glob(r"/shared_disk/users/liuzhichao/codes/torch-serve-handlers/GroundedSAM2_SMPL/test_data/single_1.mp4")
out_dir='output/'
os.makedirs(out_dir, exist_ok=True)
REMAINING_VIDEOS = True
videos_not_processed = []
for vi, video_path in enumerate(paths):
try:
imgs, fps= load_video_frames(video_path)
frames, _, _ = torchvision.io.read_video(video_path, pts_unit="sec")
# Convert frames to tensor and move to GPU
frames = frames.permute(0, 3, 1, 2).cuda() # Shape: (num_frames, C, H, W)
# Process video frames in batches
num_frames = frames.shape[0]
print("frames shape: ", frames.shape, num_frames)
results = {key: [] for key in key_names}
# with torch.inference_mode():
with torch.no_grad():
for i in range(0, num_frames, batch_size):
frame_batch = frames[i:i + batch_size]
print("i: ", i,"i end: ", i + batch_size,frame_batch.shape)
start=time.time()
preds = model.detect_smpl_batched(frame_batch, model_name='smplx')
print(i,'time',time.time()-start)
for key in preds:
results[key].extend([p.cpu() for p in preds[key]])
# 获取2D关节和顶点
joints2ds = preds['joints2d'][0].cpu().numpy() # (N, 2) numpy array
vertices2ds = preds['vertices2d'][0].cpu().numpy() # (M, 2) numpy array
# 转换为OpenCV兼容的格式(图像尺寸)
img = imgs[i]
# 创建一个副本进行可视化
img_copy = img.copy()
img_vertices = img.copy()
for joints2d in joints2ds:
# 绘制2D关节
for joint in joints2d:
# 绘制每个关节
x, y = int(joint[0]), int(joint[1])
cv2.circle(img_copy, (x, y), 5, (0, 0, 255), -1) # 绘制绿色圆点
for vertices2d in vertices2ds:
# 绘制2D顶点
for vertex in vertices2d:
x, y = int(vertex[0]), int(vertex[1])
cv2.circle(img_vertices, (x, y), 2, (255, 0, 0), -1) # 绘制蓝色圆点
cv2.imwrite(f'{out_dir}/{i}.jpg',img_copy)
cv2.imwrite(f'{out_dir}/{i}_vertices.jpg',img_vertices)
del frames
torch.cuda.empty_cache()
gc.collect()
print("output: ", len(results['pose']))
print("output: ", len(results['betas']))
video_path = str(video_path).replace('.mp4', '')
video_path = video_path.split('/')
video_name = video_path[-1]
subset = video_path[-2]
save_path = out_dir + f"{video_name}.pkl"
with open(save_path, 'wb') as f:
pickle.dump(results, f)
# del results
time.sleep(1)
print("reading picklefile")
with open(save_path, 'rb') as f:
results = pickle.load(f)
print("results: ", len(results['pose']))
print("results: ", len(results['betas']))
except Exception as e:
videos_not_processed.append(str(video_path))
print(f"Error processing video: {video_path}")
print(e)
print(f"Videos not processed:", len(videos_not_processed))
output_file = out_dir+ "videos_not_processed.txt"
with open(output_file, 'w') as f:
f.write("\n".join(videos_not_processed))
渲染代码:
控制全局旋转,方向
控制渲染距离:
值越大,距离越远,渲染出来越小
camera_pose[:3, 3] = np.array([0, 0,2]) # Move camera further away
# coding=utf-8
import sys
import os
import imageio
current_dir = os.path.dirname(os.path.abspath(__file__))
os.chdir(current_dir)
paths = [os.path.abspath(__file__).split('scripts')[0]]
print('current_dir',current_dir)
paths.append(os.path.abspath(os.path.join(current_dir, 'src')))
for path in paths:
sys.path.insert(0, path)
os.environ['PYTHONPATH'] = (os.environ.get('PYTHONPATH', '') + ':' + path).strip(':')
import pickle
import numpy as np
import torch
import numpy as np
import cv2
import pyrender
from smplx import SMPLX
from tqdm import tqdm
import trimesh
import os
os.environ["PYOPENGL_PLATFORM"] = "egl"
os.environ['EGL_DEVICE_ID'] = '0'
video_path = r"/shared_disk/users/lbg/project/inpaint/see3d/mp4s/person_big_0108.mp4"
pred_pkl = r"/shared_disk/users/lbg/project/human_4d/nlf_pose/output/big_0108/person_big_0108.pkl"
with open(pred_pkl, 'rb') as f:
pred = pickle.load(f)
pose = pred['pose']
target_shape = pose[0].shape # 以第一个张量的形状为基准
inconsistent_frames = []
for i, tensor in enumerate(pose):
if tensor.shape != target_shape:
inconsistent_frames.append(i)
pose[i] = torch.zeros(1, 165)
print(f"Frame {i} has inconsistent shape: {tensor.shape} (expected {target_shape})")
else:
pose_a=pose[i]
# pose_a=pose_a.reshape(-1, 55, 3)
# vertices_flipped = pose_a.clone()
# vertices_flipped[0, :, 1] = -vertices_flipped[0, :, 1] # 取反 y 坐标
# vertices_flipped[0, :, 2] = -vertices_flipped[0, :, 2] # 取反 z 坐标
# pose[i]=vertices_flipped.reshape(-1, 165)
pose = torch.cat(pose, dim=0)
# print("pred: ", pred.shape)
# pred = pred[:, :3+63].reshape(-1, 22, 3)
# motion = pose[:-1]
# Load SMPL-X model
model_path = './SMPLX_NEUTRAL.npz'
model = SMPLX(model_path, gender='neutral', use_pca=False)
render_w=800
render_h=800
num_frames = len(pred)
os.makedirs("out",exist_ok=True)
renderer = pyrender.OffscreenRenderer(viewport_width=render_w, viewport_height=render_h)
import gc
frames = []
for i in tqdm(range(num_frames)):
# if i>3:
# break
# Extract parameters
root_orient = pose[i][ :3].unsqueeze(0)
pose_body = pose[i][ 3:22*3].unsqueeze(0)
trans = pred['trans'][i]
betas = pred['betas'][i]
# xxx
# rotation_y_180 = torch.tensor([[np.cos(np.pi), 0, np.sin(np.pi)],
# [0, 1, 0],
# [-np.sin(np.pi), 0, np.cos(np.pi)]], dtype=torch.float32)
# # 旋转矩阵绕 X 轴 180 度,模型头朝上
# rotation_x_180 = torch.tensor([[1, 0, 0],
# [0, -1, 0],
# [0, 0, -1]], dtype=torch.float32)
# # 组合这两个旋转矩阵
# combined_rotation = torch.matmul(rotation_y_180, rotation_x_180)
# # 将旋转矩阵应用到 root_orient
# root_orient = torch.matmul(root_orient, combined_rotation.unsqueeze(0)) # 旋转后的根部方向
#xxx
# Generate SMPL-X model output
output = model(global_orient=torch.tensor([[-0.2, 0, 0]]), body_pose=pose_body,
betas=betas, transl=trans)
vertices = output.vertices.detach().cpu().numpy().squeeze()
faces = model.faces
mesh = trimesh.Trimesh(vertices=vertices, faces=faces)
# Create a pyrender scene
scene = pyrender.Scene()
mesh = pyrender.Mesh.from_trimesh(mesh)
scene.add(mesh)
# Set up the camera and light
camera = pyrender.PerspectiveCamera(yfov=np.pi / 3.0, aspectRatio=1.0)
camera_pose = np.eye(4)
camera_pose[:3, 3] = np.array([0, 0,2]) # Move camera further away
scene.add(camera, pose=camera_pose)
light = pyrender.DirectionalLight(color=[1.0, 1.0, 1.0], intensity=2.0)
scene.add(light, pose=camera_pose)
# Render the scene
# renderer = pyrender.OffscreenRenderer(viewport_width=800, viewport_height=800)
color, depth = renderer.render(scene)
cv2.imwrite(f"out/{i}.jpg",cv2.cvtColor(color, cv2.COLOR_RGB2BGR))
frames.append(color)
scene.clear() # 清除场景,释放资源
gc.collect() # 强制垃圾回收
print("here")
# Save as video
imageio.mimsave( 'output.mp4', frames, fps=25,macro_block_size=None)
if 0:
video_path = 'output.mp4'
out = cv2.VideoWriter(video_path, cv2.VideoWriter_fourcc(*'mp4v'), 1, (800, 800))
for frame in frames:
out.write(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
cv2.imwrite("111.jpg",cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
out.release()
print(f'Video saved at {video_path}')
图片推理代码:
# coding=utf-8
import sys
import os
import cv2
import imageio
current_dir = os.path.dirname(os.path.abspath(__file__))
os.chdir(current_dir)
paths = [os.path.abspath(__file__).split('scripts')[0]]
print('current_dir',current_dir)
paths.append(os.path.abspath(os.path.join(current_dir, 'src')))
for path in paths:
sys.path.insert(0, path)
os.environ['PYTHONPATH'] = (os.environ.get('PYTHONPATH', '') + ':' + path).strip(':')
import glob
os.environ['CUDA_VISIBLE_DEVICES'] = '7'
import torch
import torchvision
from pathlib import Path
import pickle
import gc
import time
import warnings
import os
warnings.filterwarnings("ignore")
# Load the TorchScript model
model = torch.jit.load('./nlf_l_multi.torchscript').cuda().eval()
batch_size = 1
key_names = ['boxes', 'pose', 'betas', 'trans', 'vertices3d', 'joints3d', 'vertices2d', 'joints2d', 'vertices3d_nonparam', 'joints3d_nonparam', 'vertices2d_nonparam', 'joints2d_nonparam',
'vertex_uncertainties', 'joint_uncertainties']
# paths=glob.glob(r"/shared_disk/users/lbg/project/human_4d/nlf_pose/data_pic/*.png")
paths=glob.glob(r"/shared_disk/users/lbg/project/human_4d/nlf_pose/data_pic/a2/*.jpg")
for vi, img_path in enumerate(paths):
try:
video_path_r = str(img_path).replace('.mp4', '')
video_path_r = video_path_r.split('/')
video_name = video_path_r[-1]
subset = video_path_r[-2]
save_path = img_path[:-4] + f".pkl"
frame= torchvision.io.read_image(img_path)
frames=torch.unsqueeze(frame, 0)
frames = frames.cuda() # Shape: (num_frames, C, H, W)
num_frames = frames.shape[0]
print("frames shape: ", frames.shape, num_frames)
results = {key: [] for key in key_names}
# with torch.inference_mode():
reid=0
with torch.no_grad():
for i in range(0, num_frames, batch_size):
frame_batch = frames
start=time.time()
preds = model.detect_smpl_batched(frame_batch, model_name='smplx')
print("i:", i,num_frames,frame_batch.shape,'time',time.time()-start)
for key in preds:
results[key].extend([p.cpu()[reid:reid+1] for p in preds[key]])
# 获取2D关节和顶点
joints2ds = preds['joints2d'][0].cpu().numpy() # (N, 2) numpy array
vertices2ds = preds['vertices2d'][0].cpu().numpy() # (M, 2) numpy array
# 转换为OpenCV兼容的格式(图像尺寸)
img = cv2.imread(img_path)
# 创建一个副本进行可视化
img_copy = img.copy()
for id,joints2d in enumerate(joints2ds):
if id!=reid:
continue
for joint in joints2d:
# 绘制每个关节
x, y = int(joint[0]), int(joint[1])
cv2.circle(img_copy, (x, y), 5, (0, 0, 255), -1) # 绘制绿色圆点
for id,vertices2d in enumerate(vertices2ds):
if id!=reid:
continue
for vertex in vertices2d:
x, y = int(vertex[0]), int(vertex[1])
cv2.circle(img_copy, (x, y), 2, (255, 0, 0), -1) # 绘制蓝色圆点
print("img_path:",img_path)
cv2.imwrite(f'{img_path[:-4]}_key.jpg',img_copy)
del frames
torch.cuda.empty_cache()
gc.collect()
print("len_pose:", len(results['pose']),"len_betas", len(results['betas']))
with open(save_path, 'wb') as f:
pickle.dump(results, f)
print("results: ", len(results['pose']))
except Exception as e:
print(f"Error processing video: {img_path}")
print(e)
print(f"img processed:")
最新torch推理进展:
load_crop_model加载ok
def load_crop_model():
cfg = get_config()
ji_np = np.load(f'{spu.FLAGS.model_dir}/joint_info.npz')
ji = posepile.joint_info.JointInfo(ji_np['joint_names'], ji_np['joint_edges'])
backbone_raw = getattr(effnet_pt, f'efficientnet_v2_{cfg.efficientnet_size}')()
preproc_layer = effnet_pt.PreprocLayer()
backbone = torch.nn.Sequential(preproc_layer, backbone_raw.features)
model = metrabs_pt.Metrabs(backbone, ji)
model.eval()
inp = torch.zeros((1, 3, cfg.proc_side, cfg.proc_side), dtype=torch.float32)
intr = torch.eye(3, dtype=torch.float32)[np.newaxis]
model((inp, intr))
model.load_state_dict(torch.load(f'{spu.FLAGS.model_dir}/ckpt.pt'))
return model
vertex_subset_1024.npz报错没有
FileNotFoundError: [Errno 2] 该文件或目录不存在:'body_models/smpl/vertex_subset_1024.npz'
调用原版render,没成功
# coding=utf-8
import sys
import os
import imageio
current_dir = os.path.dirname(os.path.abspath(__file__))
os.chdir(current_dir)
paths = [os.path.abspath(__file__).split('scripts')[0]]
print('current_dir',current_dir)
paths.append(os.path.abspath(os.path.join(current_dir, 'src')))
paths.append('/shared_disk/users/lbg/project/human_4d/nlf_pose')
for path in paths:
sys.path.insert(0, path)
os.environ['PYTHONPATH'] = (os.environ.get('PYTHONPATH', '') + ':' + path).strip(':')
import pickle
import numpy as np
import torch
import numpy as np
import cv2
import pyrender
from tqdm import tqdm
import trimesh
import os
os.environ["PYOPENGL_PLATFORM"] = "egl"
os.environ['EGL_DEVICE_ID'] = '0'
from nlf_2.nlf.rendering import MeshViewer
render_w=720
render_h=1280
fov_degrees=np.radians(55)
aspectRatio=359.5/639.5
aspectRatio=720/1280
camera = pyrender.PerspectiveCamera(yfov=fov_degrees, aspectRatio=aspectRatio)
# camera = pyrender.PerspectiveCamera(yfov=np.radians(fov_degrees), aspectRatio=aspectRatio)
camera_pose = np.eye(4)
# camera_pose[:3, 3] = np.array([0, 0,aaa[0]*0.88]) # Move camera further away
# camera_pose[:3, 3] = np.array([0,0,aaa[0]-0.8]) # Move camera further away
mesh_v=MeshViewer((render_w,render_h),camera)
pred_pkl=r'/shared_disk/users/lbg/project/human_4d/nlf_pose/output/single_1/single_1.pkl'
with open(pred_pkl, 'rb') as f:
pred = pickle.load(f)
pose = pred['pose']
num_frames = len(pose)
target_shape = pose[0].shape # 以第一个张量的形状为基准
inconsistent_frames = []
import gc
from smplx import SMPLX
model_path = '/shared_disk/users/lbg/project/human_4d/nlf_pose/SMPLX_NEUTRAL.npz'
model = SMPLX(model_path, gender='neutral', use_pca=False)
os.makedirs("out2",exist_ok=True)
for i, tensor in enumerate(pose):
if tensor.shape != target_shape:
inconsistent_frames.append(i)
pose[i] = torch.zeros(1, 165)
print(f"Frame {i} has inconsistent shape: {tensor.shape} (expected {target_shape})")
pose = torch.cat(pose, dim=0)
frames=[]
for i in tqdm(range(num_frames)):
vertices = pred['vertices3d'][i].cpu().numpy().squeeze()
mean = np.mean(vertices)
std = np.std(vertices)
standardized_matrix = (vertices - mean) / std
faces = model.faces
mesh = trimesh.Trimesh(vertices=standardized_matrix, faces=faces)
mesh_v.set_mesh(mesh, color=None, material=None)
light = pyrender.DirectionalLight(color=[1.0, 1.0, 1.0], intensity=2.0)
scene = pyrender.Scene()
scene.add(light, pose=camera_pose)
camera = pyrender.PerspectiveCamera(yfov=fov_degrees, aspectRatio=aspectRatio)
# 创建透视相机
fov_y = np.pi / 3 # 垂直视野角度(弧度)
aspect_ratio = 720/1280 # 宽高比
# 假设图像的宽度和高度
image_width = 720
image_height = 1280
# 计算焦距
f_y = image_height / (2 * np.tan(fov_y / 2))
f_x = f_y * aspect_ratio
# 计算主点坐标
c_x = image_width / 2
c_y = image_height / 2
# 构建内参矩阵
camera.intrinsic_matrix = np.array([
[f_x, 0, c_x],
[0, f_y, c_y],
[0, 0, 1]
])
camera_pose = np.eye(4)
mesh_v.set_camera(camera,camera_pose)
color_img, depth_img=mesh_v.render()
cv2.imwrite(f"out2/{i}.jpg",cv2.cvtColor(color_img, cv2.COLOR_RGB2BGR))
frames.append(color_img)
scene.clear() # 清除场景,释放资源
gc.collect() # 强制垃圾回收
