Skip to main content

Chapter 5: High-Fidelity Simulation with NVIDIA Isaac Sim

5.1 Introduction to Omniverse & Isaac Sim

What is NVIDIA Isaac Sim?

Isaac Sim is NVIDIA's robotics simulation platform built on Omniverse, providing photorealistic rendering, physics simulation, and AI-powered tools for robotics development. It bridges the gap between simulation and reality with unprecedented realism.

Key Features

  • Photorealistic Rendering: RTX-powered ray tracing for realistic visuals
  • Advanced Physics: PhysX-based simulation with accurate material properties
  • Synthetic Data Generation: AI-ready training data at scale
  • Cloud-Native: Distributed simulation across multiple GPUs
  • ROS 2 Integration: Seamless connection with robot software

System Requirements

Minimum Requirements:

  • NVIDIA RTX GPU (RTX 3060 or better)
  • 16GB RAM
  • Intel i7 or AMD Ryzen 7 CPU
  • 100GB SSD storage

Recommended Setup:

  • NVIDIA RTX 4090 or A100
  • 32GB+ RAM
  • Intel i9 or AMD Ryzen 9 CPU
  • 500GB NVMe SSD

Installation

# Download Isaac Sim from NVIDIA Developer portal
# Extract and run the installer

# Set up environment
export ISAAC_SIM_PATH=/path/to/isaac-sim
export PYTHONPATH=$ISAAC_SIM_PATH/kit/python:$PYTHONPATH
export LD_LIBRARY_PATH=$ISAAC_SIM_PATH/kit/lib:$LD_LIBRARY_PATH

# Launch Isaac Sim
cd $ISAAC_SIM_PATH
./python.sh standalone_examples/api/omni.isaac.sim/main.py

5.2 Photorealistic Environments & Synthetic Data

Creating Photorealistic Scenes

# isaac_sim_environment.py
import carb
import omni.kit.commands
from omni.isaac.core import World
from omni.isaac.core.utils.nucleus import get_assets_root_path
from omni.isaac.core.utils.stage import add_reference_to_stage, get_current_stage
from pxr import Usd, UsdGeom, Gf
import numpy as np

class PhotorealisticEnvironment:
def __init__(self):
self.world = World()
self.assets_root = get_assets_root_path()
self.stage = get_current_stage()

def create_indoor_environment(self):
"""Create a photorealistic indoor environment"""

# Load kitchen environment
kitchen_usd = f"{self.assets_root}/Isaac/Environments/Simple_Warehouse/simple_warehouse.usd"
add_reference_to_stage(kitchen_usd, "/simple_warehouse")

# Configure lighting for realism
self.setup_realistic_lighting()

# Add materials with PBR properties
self.add_realistic_materials()

# Configure camera with real-world parameters
self.setup_realistic_cameras()

def setup_realistic_lighting(self):
"""Setup physically-based lighting"""

# Dome light for global illumination
from omni.isaac.core.utils.prims import define_prim
from pxr import UsdLux

# HDRI dome light
dome_light = define_prim("/DomeLight", "DomeLight")
dome_light.CreateAttribute("texture:file", Sdf.ValueTypeNames.Asset).Set(
f"{self.assets_root}/Isaac/Environments/Indoor/studio_small_03_4k.hdr"
)

# Area lights for local illumination
area_light1 = define_prim("/AreaLight1", "RectLight")
area_light1.GetAttribute("intensity").Set(5000)
area_light1.GetAttribute("width").Set(2.0)
area_light1.GetAttribute("height").Set(2.0)
area_light1.GetAttribute("color").Set(Gf.Vec3f(1.0, 0.95, 0.8))

# Point lights for accent
point_light1 = define_prim("/PointLight1", "PointLight")
point_light1.GetAttribute("intensity").Set(10000)
point_light1.GetAttribute("position").Set(Gf.Vec3f(2.0, 2.0, 3.0))

def add_realistic_materials(self):
"""Add physically-based rendering materials"""

from omni.isaac.core.utils.materials import OmniPBRSurface

# Wood material for floor
wood_material = OmniPBRSurface(
prim_path="/Looks/WoodMaterial",
props={
"diffuse_texture": f"{self.assets_root}/Isaac/Environments/Indoor/wood_floor_diffuse.png",
"normal_texture": f"{self.assets_root}/Isaac/Environments/Indoor/wood_floor_normal.png",
"roughness_texture": f"{self.assets_root}/Isaac/Environments/Indoor/wood_floor_roughness.png",
"metallic": 0.0,
"roughness": 0.8
}
)

# Metal material for robot parts
metal_material = OmniPBRSurface(
prim_path="/Looks/MetalMaterial",
props={
"diffuse_color": (0.7, 0.7, 0.7),
"metallic": 1.0,
"roughness": 0.3,
"normal_texture": f"{self.assets_root}/Isaac/Environments/Indoor/metal_normal.png"
}
)

# Apply materials to objects
self.apply_material_to_prim("/simple_warehouse/floor", wood_material)
self.apply_material_to_prim("/humanoid", metal_material)

def setup_realistic_cameras(self):
"""Setup cameras with real-world parameters"""

from omni.isaac.core.sensors import Camera

# Main RGB camera with DSLR parameters
rgb_camera = Camera(
prim_path="/humanoid/head_camera",
position=(0.1, 0, 0.5),
frequency=30,
resolution=(1920, 1080),
data_types=["rgb", "depth", "semantic_segmentation"],
clipping_range=(0.1, 100.0)
)

# Configure camera properties
rgb_camera.set_aperture(f_stop=2.8)
rgb_camera.set_focal_length(35) # 35mm equivalent
rgb_camera.set_shutter_speed(1/125)
rgb_camera.set_iso(400)

# Add camera noise for realism
rgb_camera.add_noise_model(
noise_type="gaussian",
mean=0.0,
std_dev=0.01
)

return rgb_camera

def apply_material_to_prim(self, prim_path, material):
"""Apply material to a primitive"""
prim = self.stage.GetPrimAtPath(prim_path)
if prim:
material.apply(prim)

# Initialize environment
env = PhotorealisticEnvironment()
env.create_indoor_environment()

Synthetic Data Generation

# synthetic_data_generator.py
import omni.kit.commands
from omni.isaac.core import World
from omni.isaac.core.utils.nucleus import get_assets_root_path
from omni.isaac.synthetic_utils import SyntheticDataHelper
import numpy as np
import cv2
import os

class SyntheticDataGenerator:
def __init__(self, output_dir="./synthetic_data"):
self.world = World()
self.output_dir = output_dir
self.sd_helper = SyntheticDataHelper()

# Create output directories
os.makedirs(f"{output_dir}/rgb", exist_ok=True)
os.makedirs(f"{output_dir}/depth", exist_ok=True)
os.makedirs(f"{output_dir}/semantic", exist_ok=True)
os.makedirs(f"{output_dir}/annotations", exist_ok=True)

def generate_training_data(self, num_samples=1000):
"""Generate synthetic training data for AI models"""

for i in range(num_samples):
# Randomize environment
self.randomize_environment()

# Randomize robot pose
self.randomize_robot_pose()

# Randomize lighting
self.randomize_lighting()

# Capture data
rgb_data = self.capture_rgb()
depth_data = self.capture_depth()
semantic_data = self.capture_semantic()

# Generate annotations
annotations = self.generate_annotations()

# Save data
self.save_sample(i, rgb_data, depth_data, semantic_data, annotations)

if i % 100 == 0:
print(f"Generated {i} samples...")

def randomize_environment(self):
"""Randomize environment for domain randomization"""

# Random object positions
objects = ["table", "chair", "box", "bottle"]
for obj in objects:
x = np.random.uniform(-2, 2)
y = np.random.uniform(-2, 2)
z = 0.0
orientation = np.random.uniform(0, 2*np.pi)

# Move object
obj_prim = self.stage.GetPrimAtPath(f"/{obj}")
if obj_prim:
omni.kit.commands.execute(
"TransformPrimCommand",
path=obj_prim.GetPath(),
new_transform=Gf.Transform(
Gf.Rotation(Gf.Vec3d(0, 0, 1), orientation),
Gf.Vec3d(x, y, z)
)
)

# Random material properties
self.randomize_materials()

def randomize_robot_pose(self):
"""Randomize humanoid robot pose"""

# Random joint positions
joint_names = ["left_hip", "left_knee", "right_hip", "right_knee",
"left_shoulder", "right_shoulder", "neck"]

for joint in joint_names:
angle = np.random.uniform(-np.pi/4, np.pi/4)
# Apply joint angle
self.set_joint_angle(joint, angle)

def randomize_lighting(self):
"""Randomize lighting conditions"""

# Random dome light intensity
dome_light = self.stage.GetPrimAtPath("/DomeLight")
if dome_light:
intensity = np.random.uniform(1000, 5000)
dome_light.GetAttribute("intensity").Set(intensity)

# Random area light positions and colors
for i in range(3):
light_path = f"/AreaLight{i+1}"
light = self.stage.GetPrimAtPath(light_path)
if light:
# Random position
x = np.random.uniform(-3, 3)
y = np.random.uniform(-3, 3)
z = np.random.uniform(2, 4)
light.GetAttribute("position").Set(Gf.Vec3f(x, y, z))

# Random color temperature
temp = np.random.uniform(3000, 7000) # Kelvin
color = self.kelvin_to_rgb(temp)
light.GetAttribute("color").Set(Gf.Vec3f(*color))

def kelvin_to_rgb(self, kelvin):
"""Convert color temperature to RGB"""
# Simplified color temperature conversion
if kelvin <= 6600:
r = 255
g = max(0, min(255, -155.25 + 0.445 * kelvin / 100 - 99.2))
b = max(0, min(255, -254.76 + 0.827 * kelvin / 100 + 104.96))
else:
r = max(0, min(255, 351.98 - 0.114 * kelvin / 100))
g = max(0, min(255, 325.45 - 0.079 * kelvin / 100))
b = 255

return (r/255, g/255, b/255)

def capture_rgb(self):
"""Capture RGB image with realistic camera effects"""

# Get camera data
rgb_data = self.sd_helper.get_rgb_data("/humanoid/head_camera")

# Add realistic camera effects
rgb_data = self.add_camera_effects(rgb_data)

return rgb_data

def add_camera_effects(self, image):
"""Add realistic camera effects like lens distortion, chromatic aberration"""

# Lens distortion
h, w = image.shape[:2]
fx, fy = w/2, h/2
cx, cy = w/2, h/2

# Barrel distortion
k1, k2 = -0.1, 0.01

# Apply distortion
distorted = np.zeros_like(image)
for y in range(h):
for x in range(w):
# Normalize coordinates
x_norm = (x - cx) / fx
y_norm = (y - cy) / fy

# Apply distortion
r2 = x_norm**2 + y_norm**2
distortion = 1 + k1 * r2 + k2 * r2**2

x_dist = x_norm * distortion * fx + cx
y_dist = y_norm * distortion * fy + cy

# Bilinear interpolation
if 0 <= x_dist < w-1 and 0 <= y_dist < h-1:
x1, y1 = int(x_dist), int(y_dist)
x2, y2 = min(x1+1, w-1), min(y1+1, h-1)

dx, dy = x_dist - x1, y_dist - y1

# Interpolate for each channel
for c in range(3):
val = (image[y1, x1, c] * (1-dx) * (1-dy) +
image[y1, x2, c] * dx * (1-dy) +
image[y2, x1, c] * (1-dx) * dy +
image[y2, x2, c] * dx * dy)
distorted[y, x, c] = val

return distorted

def capture_depth(self):
"""Capture depth data with realistic sensor noise"""

depth_data = self.sd_helper.get_depth_data("/humanoid/head_camera")

# Add sensor noise
noise = np.random.normal(0, 0.01, depth_data.shape)
depth_data = depth_data + noise

# Clip to valid range
depth_data = np.clip(depth_data, 0.1, 10.0)

return depth_data

def capture_semantic(self):
"""Capture semantic segmentation data"""

semantic_data = self.sd_helper.get_semantic_data("/humanoid/head_camera")

return semantic_data

def generate_annotations(self):
"""Generate annotations for training"""

annotations = {
"objects": self.detect_objects(),
"poses": self.get_robot_poses(),
"scene_info": self.get_scene_info()
}

return annotations

def save_sample(self, index, rgb, depth, semantic, annotations):
"""Save training sample"""

# Save RGB image
cv2.imwrite(f"{self.output_dir}/rgb/sample_{index:06d}.png", rgb)

# Save depth (convert to 16-bit)
depth_16bit = (depth * 1000).astype(np.uint16)
cv2.imwrite(f"{self.output_dir}/depth/sample_{index:06d}.png", depth_16bit)

# Save semantic
cv2.imwrite(f"{self.output_dir}/semantic/sample_{index:06d}.png", semantic)

# Save annotations as JSON
import json
with open(f"{self.output_dir}/annotations/sample_{index:06d}.json", "w") as f:
json.dump(annotations, f, indent=2)

# Generate synthetic data
generator = SyntheticDataGenerator()
generator.generate_training_data(num_samples=1000)

5.3 Sim-to-Real Pipeline

Domain Randomization

# domain_randomization.py
import numpy as np
from omni.isaac.core import World
from omni.isaac.core.utils.prims import define_prim
from pxr import Usd, UsdGeom, Gf

class DomainRandomization:
def __init__(self):
self.world = World()
self.randomization_params = {
"lighting": {"intensity_range": (500, 5000), "color_temp_range": (3000, 7000)},
"materials": {"roughness_range": (0.1, 0.9), "metallic_range": (0.0, 1.0)},
"physics": {"gravity_range": (-10.5, -9.5), "friction_range": (0.5, 1.0)},
"camera": {"noise_range": (0.001, 0.05), "distortion_range": (0.0, 0.2)}
}

def randomize_all(self):
"""Apply all domain randomization"""
self.randomize_lighting()
self.randomize_materials()
self.randomize_physics()
self.randomize_camera()

def randomize_lighting(self):
"""Randomize lighting conditions"""

# Random dome light
dome_light = self.stage.GetPrimAtPath("/DomeLight")
if dome_light:
intensity = np.random.uniform(*self.randomization_params["lighting"]["intensity_range"])
dome_light.GetAttribute("intensity").Set(intensity)

# Random area lights
for i in range(3):
light_path = f"/AreaLight{i+1}"
light = self.stage.GetPrimAtPath(light_path)
if light:
# Random intensity
intensity = np.random.uniform(1000, 10000)
light.GetAttribute("intensity").Set(intensity)

# Random color
temp = np.random.uniform(*self.randomization_params["lighting"]["color_temp_range"])
color = self.kelvin_to_rgb(temp)
light.GetAttribute("color").Set(Gf.Vec3f(*color))

def randomize_materials(self):
"""Randomize material properties"""

materials = ["/Looks/WoodMaterial", "/Looks/MetalMaterial", "/Looks/PlasticMaterial"]

for mat_path in materials:
material = self.stage.GetPrimAtPath(mat_path)
if material:
# Random roughness
roughness = np.random.uniform(*self.randomization_params["materials"]["roughness_range"])
material.GetAttribute("roughness").Set(roughness)

# Random metallic
metallic = np.random.uniform(*self.randomization_params["materials"]["metallic_range"])
material.GetAttribute("metallic").Set(metallic)

def randomize_physics(self):
"""Randomize physics parameters"""

# Random gravity
gravity = np.random.uniform(*self.randomization_params["physics"]["gravity_range"])
self.world.set_gravity(gravity)

# Random friction for ground plane
ground = self.stage.GetPrimAtPath("/ground_plane")
if ground:
friction = np.random.uniform(*self.randomization_params["physics"]["friction_range"])
# Apply friction to collision material

def randomize_camera(self):
"""Randomize camera parameters"""

camera = self.stage.GetPrimAtPath("/humanoid/head_camera")
if camera:
# Random noise
noise = np.random.uniform(*self.randomization_params["camera"]["noise_range"])
# Apply noise to camera sensor

# Random distortion
distortion = np.random.uniform(*self.randomization_params["camera"]["distortion_range"])
# Apply lens distortion

Transfer Learning Pipeline

# sim_to_real_transfer.py
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import numpy as np

class Sim2RealTransfer:
def __init__(self, sim_model_path, real_dataset_path):
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load simulation-trained model
self.sim_model = self.load_model(sim_model_path)

# Load real-world dataset
self.real_dataset = self.load_real_dataset(real_dataset_path)

# Create adaptation model
self.adaptation_model = self.create_adaptation_model()

def load_model(self, path):
"""Load pre-trained simulation model"""
model = torch.load(path)
model.eval()
return model.to(self.device)

def create_adaptation_model(self):
"""Create domain adaptation network"""

class AdaptationNet(nn.Module):
def __init__(self, feature_dim=512):
super().__init__()
self.feature_extractor = nn.Sequential(
nn.Conv2d(3, 64, 3, padding=1),
nn.ReLU(),
nn.Conv2d(64, 128, 3, padding=1),
nn.ReLU(),
nn.AdaptiveAvgPool2d((1, 1)),
nn.Flatten(),
nn.Linear(128, feature_dim)
)

self.domain_classifier = nn.Sequential(
nn.Linear(feature_dim, 256),
nn.ReLU(),
nn.Linear(256, 1),
nn.Sigmoid()
)

self.task_classifier = nn.Sequential(
nn.Linear(feature_dim, 256),
nn.ReLU(),
nn.Linear(256, 10) # 10 classes
)

def forward(self, x):
features = self.feature_extractor(x)
domain_pred = self.domain_classifier(features)
task_pred = self.task_classifier(features)
return domain_pred, task_pred

return AdaptationNet().to(self.device)

def train_adaptation(self, epochs=100):
"""Train domain adaptation"""

optimizer = torch.optim.Adam(self.adaptation_model.parameters(), lr=0.001)

for epoch in range(epochs):
total_loss = 0
domain_loss = 0
task_loss = 0

for batch in self.real_dataset:
# Get simulation features
sim_features = self.get_sim_features(batch)

# Get real features
real_features = self.adaptation_model.feature_extractor(batch)

# Domain adversarial training
domain_pred, task_pred = self.adaptation_model(batch)

# Compute losses
d_loss = self.domain_loss(domain_pred, real_features)
t_loss = self.task_loss(task_pred, batch.labels)

# Combined loss
loss = d_loss + t_loss

# Backward pass
optimizer.zero_grad()
loss.backward()
optimizer.step()

total_loss += loss.item()
domain_loss += d_loss.item()
task_loss += t_loss.item()

print(f"Epoch {epoch}: Total Loss: {total_loss:.4f}, "
f"Domain Loss: {domain_loss:.4f}, Task Loss: {task_loss:.4f}")

def evaluate_transfer(self, test_dataset):
"""Evaluate transfer performance"""

correct = 0
total = 0

with torch.no_grad():
for batch in test_dataset:
_, task_pred = self.adaptation_model(batch)
predicted = torch.argmax(task_pred, dim=1)
total += batch.labels.size(0)
correct += (predicted == batch.labels).sum().item()

accuracy = 100 * correct / total
print(f"Transfer Accuracy: {accuracy:.2f}%")
return accuracy

5.4 Training AI Models in Simulation

Reinforcement Learning Environment

# rl_environment.py
import gym
from gym import spaces
import numpy as np
from omni.isaac.core import World
from omni.isaac.core.objects import DynamicCuboid
from omni.isaac.core.utils.nucleus import get_assets_root_path

class HumanoidRLEnvironment(gym.Env):
def __init__(self):
super().__init__()

self.world = World()
self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(12,), dtype=np.float32)
self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(48,), dtype=np.float32)

# Initialize humanoid
self.humanoid = self.create_humanoid()

# Target position
self.target_pos = np.array([2.0, 0.0, 0.0])

# Episode parameters
self.max_steps = 1000
self.current_step = 0

def create_humanoid(self):
"""Create humanoid robot in simulation"""

from omni.isaac.core.robots import Robot

# Load humanoid robot
humanoid = Robot(
prim_path="/humanoid",
usd_path=f"{get_assets_root_path()}/Isaac/Robots/Humanoid/humanoid.usd",
name="humanoid"
)

# Add to world
self.world.add_robot(humanoid)

return humanoid

def reset(self):
"""Reset environment"""

# Reset humanoid position
self.humanoid.set_world_pose(position=[0, 0, 1.0])

# Reset joint positions
initial_joints = np.zeros(12)
self.humanoid.set_joint_positions(initial_joints)

# Random target position
self.target_pos = np.array([
np.random.uniform(-3, 3),
np.random.uniform(-3, 3),
0.0
])

# Reset step counter
self.current_step = 0

return self.get_observation()

def step(self, action):
"""Execute action in environment"""

# Apply joint commands
self.humanoid.set_joint_positions(action)

# Step simulation
self.world.step(render=True)

# Get observation
obs = self.get_observation()

# Calculate reward
reward = self.calculate_reward()

# Check if done
done = self.current_step >= self.max_steps or self.check_success()

self.current_step += 1

return obs, reward, done, {}

def get_observation(self):
"""Get current observation"""

# Get robot pose
pose = self.humanoid.get_world_pose()

# Get joint positions and velocities
joint_pos = self.humanoid.get_joint_positions()
joint_vel = self.humanoid.get_joint_velocities()

# Get target relative position
robot_pos = np.array(pose[0])
target_rel = self.target_pos - robot_pos

# Combine observations
obs = np.concatenate([
pose[0], # position (3)
pose[1], # orientation (4)
joint_pos, # joint positions (12)
joint_vel, # joint velocities (12)
target_rel, # target relative position (3)
[self.current_step / self.max_steps] # progress (1)
])

return obs.astype(np.float32)

def calculate_reward(self):
"""Calculate reward based on current state"""

# Distance to target
robot_pos = np.array(self.humanoid.get_world_pose()[0])
distance = np.linalg.norm(self.target_pos - robot_pos)

# Reward components
distance_reward = -distance * 0.1

# Energy penalty
joint_vel = self.humanoid.get_joint_velocities()
energy_penalty = -np.sum(joint_vel**2) * 0.001

# Upright bonus
orientation = self.humanoid.get_world_pose()[1]
upright_bonus = orientation[3] # quaternion w component

# Total reward
reward = distance_reward + energy_penalty + upright_bonus * 0.1

return reward

def check_success(self):
"""Check if task is completed"""

robot_pos = np.array(self.humanoid.get_world_pose()[0])
distance = np.linalg.norm(self.target_pos - robot_pos)

return distance < 0.5 # Success if within 0.5m of target

Training Loop

# train_humanoid_rl.py
import torch
import torch.nn as nn
import torch.optim as optim
from torch.distributions import Normal
import numpy as np
from rl_environment import HumanoidRLEnvironment

class PPOAgent:
def __init__(self, state_dim, action_dim):
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Actor network
self.actor = nn.Sequential(
nn.Linear(state_dim, 256),
nn.ReLU(),
nn.Linear(256, 256),
nn.ReLU(),
nn.Linear(256, action_dim),
nn.Tanh()
).to(self.device)

# Critic network
self.critic = nn.Sequential(
nn.Linear(state_dim, 256),
nn.ReLU(),
nn.Linear(256, 256),
nn.ReLU(),
nn.Linear(256, 1)
).to(self.device)

self.optimizer = optim.Adam(
list(self.actor.parameters()) + list(self.critic.parameters()),
lr=3e-4
)

# PPO parameters
self.clip_ratio = 0.2
self.gamma = 0.99
self.lam = 0.95
self.epochs = 10
self.batch_size = 64

def get_action(self, state):
"""Get action from policy"""
state = torch.FloatTensor(state).unsqueeze(0).to(self.device)

with torch.no_grad():
action = self.actor(state)
value = self.critic(state)

# Add exploration noise
action = action.cpu().numpy()[0]
action += np.random.normal(0, 0.1, size=action.shape)
action = np.clip(action, -1.0, 1.0)

return action, value.item()

def update(self, states, actions, rewards, dones, values):
"""Update policy using PPO"""

# Calculate advantages
advantages = self.calculate_advantages(rewards, values, dones)

# Convert to tensors
states = torch.FloatTensor(states).to(self.device)
actions = torch.FloatTensor(actions).to(self.device)
advantages = torch.FloatTensor(advantages).to(self.device)
returns = torch.FloatTensor(rewards).to(self.device)

# PPO update
for _ in range(self.epochs):
# Get current policy
current_actions = self.actor(states)
current_values = self.critic(states)

# Calculate ratio
ratio = torch.exp(current_actions - actions)

# Clipped surrogate objective
surr1 = ratio * advantages
surr2 = torch.clamp(ratio, 1 - self.clip_ratio, 1 + self.clip_ratio) * advantages

# Actor loss
actor_loss = -torch.min(surr1, surr2).mean()

# Critic loss
critic_loss = nn.MSELoss()(current_values.squeeze(), returns)

# Total loss
loss = actor_loss + 0.5 * critic_loss

# Update
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()

def calculate_advantages(self, rewards, values, dones):
"""Calculate GAE advantages"""
advantages = []
gae = 0

for i in reversed(range(len(rewards))):
if i == len(rewards) - 1:
next_value = 0
else:
next_value = values[i + 1]

delta = rewards[i] + self.gamma * next_value * (1 - dones[i]) - values[i]
gae = delta + self.gamma * self.lam * (1 - dones[i]) * gae
advantages.insert(0, gae)

return advantages

def train_humanoid():
"""Train humanoid robot using RL"""

env = HumanoidRLEnvironment()
agent = PPOAgent(state_dim=48, action_dim=12)

# Training loop
for episode in range(1000):
state = env.reset()

# Collect trajectory
states, actions, rewards, dones, values = [], [], [], [], []

for step in range(1000):
action, value = agent.get_action(state)
next_state, reward, done, _ = env.step(action)

states.append(state)
actions.append(action)
rewards.append(reward)
dones.append(done)
values.append(value)

state = next_state

if done:
break

# Update agent
agent.update(states, actions, rewards, dones, values)

# Log progress
if episode % 10 == 0:
avg_reward = np.mean(rewards)
print(f"Episode {episode}: Average Reward: {avg_reward:.2f}")

# Save checkpoint
torch.save(agent.actor.state_dict(), f"humanoid_actor_episode_{episode}.pth")
torch.save(agent.critic.state_dict(), f"humanoid_critic_episode_{episode}.pth")

if __name__ == "__main__":
train_humanoid()

Next Chapter: In Chapter 6, we'll explore robot vision systems using RealSense cameras and OpenCV for perception.

Humanoid Robotics AI Tutor

Hello! I'm your Humanoid Robotics AI tutor.

Ask me anything about ROS 2, NVIDIA Isaac, VLA, or any Physical AI topics!