Chapter 5: High-Fidelity Simulation with NVIDIA Isaac Sim

5.1 Introduction to Omniverse & Isaac Sim

What is NVIDIA Isaac Sim?

Isaac Sim is NVIDIA's robotics simulation platform built on Omniverse, providing photorealistic rendering, physics simulation, and AI-powered tools for robotics development. It bridges the gap between simulation and reality with unprecedented realism.

Key Features

Photorealistic Rendering: RTX-powered ray tracing for realistic visuals
Advanced Physics: PhysX-based simulation with accurate material properties
Synthetic Data Generation: AI-ready training data at scale
Cloud-Native: Distributed simulation across multiple GPUs
ROS 2 Integration: Seamless connection with robot software

System Requirements

Minimum Requirements:

NVIDIA RTX GPU (RTX 3060 or better)
16GB RAM
Intel i7 or AMD Ryzen 7 CPU
100GB SSD storage

Recommended Setup:

NVIDIA RTX 4090 or A100
32GB+ RAM
Intel i9 or AMD Ryzen 9 CPU
500GB NVMe SSD

Installation

# Download Isaac Sim from NVIDIA Developer portal
# Extract and run the installer

# Set up environment
export ISAAC_SIM_PATH=/path/to/isaac-sim
export PYTHONPATH=$ISAAC_SIM_PATH/kit/python:$PYTHONPATH
export LD_LIBRARY_PATH=$ISAAC_SIM_PATH/kit/lib:$LD_LIBRARY_PATH

# Launch Isaac Sim
cd $ISAAC_SIM_PATH
./python.sh standalone_examples/api/omni.isaac.sim/main.py

5.2 Photorealistic Environments & Synthetic Data

Creating Photorealistic Scenes

# isaac_sim_environment.py
import carb
import omni.kit.commands
from omni.isaac.core import World
from omni.isaac.core.utils.nucleus import get_assets_root_path
from omni.isaac.core.utils.stage import add_reference_to_stage, get_current_stage
from pxr import Usd, UsdGeom, Gf
import numpy as np

class PhotorealisticEnvironment:
    def __init__(self):
        self.world = World()
        self.assets_root = get_assets_root_path()
        self.stage = get_current_stage()
        
    def create_indoor_environment(self):
        """Create a photorealistic indoor environment"""
        
        # Load kitchen environment
        kitchen_usd = f"{self.assets_root}/Isaac/Environments/Simple_Warehouse/simple_warehouse.usd"
        add_reference_to_stage(kitchen_usd, "/simple_warehouse")
        
        # Configure lighting for realism
        self.setup_realistic_lighting()
        
        # Add materials with PBR properties
        self.add_realistic_materials()
        
        # Configure camera with real-world parameters
        self.setup_realistic_cameras()
        
    def setup_realistic_lighting(self):
        """Setup physically-based lighting"""
        
        # Dome light for global illumination
        from omni.isaac.core.utils.prims import define_prim
        from pxr import UsdLux
        
        # HDRI dome light
        dome_light = define_prim("/DomeLight", "DomeLight")
        dome_light.CreateAttribute("texture:file", Sdf.ValueTypeNames.Asset).Set(
            f"{self.assets_root}/Isaac/Environments/Indoor/studio_small_03_4k.hdr"
        )
        
        # Area lights for local illumination
        area_light1 = define_prim("/AreaLight1", "RectLight")
        area_light1.GetAttribute("intensity").Set(5000)
        area_light1.GetAttribute("width").Set(2.0)
        area_light1.GetAttribute("height").Set(2.0)
        area_light1.GetAttribute("color").Set(Gf.Vec3f(1.0, 0.95, 0.8))
        
        # Point lights for accent
        point_light1 = define_prim("/PointLight1", "PointLight")
        point_light1.GetAttribute("intensity").Set(10000)
        point_light1.GetAttribute("position").Set(Gf.Vec3f(2.0, 2.0, 3.0))
        
    def add_realistic_materials(self):
        """Add physically-based rendering materials"""
        
        from omni.isaac.core.utils.materials import OmniPBRSurface
        
        # Wood material for floor
        wood_material = OmniPBRSurface(
            prim_path="/Looks/WoodMaterial",
            props={
                "diffuse_texture": f"{self.assets_root}/Isaac/Environments/Indoor/wood_floor_diffuse.png",
                "normal_texture": f"{self.assets_root}/Isaac/Environments/Indoor/wood_floor_normal.png",
                "roughness_texture": f"{self.assets_root}/Isaac/Environments/Indoor/wood_floor_roughness.png",
                "metallic": 0.0,
                "roughness": 0.8
            }
        )
        
        # Metal material for robot parts
        metal_material = OmniPBRSurface(
            prim_path="/Looks/MetalMaterial",
            props={
                "diffuse_color": (0.7, 0.7, 0.7),
                "metallic": 1.0,
                "roughness": 0.3,
                "normal_texture": f"{self.assets_root}/Isaac/Environments/Indoor/metal_normal.png"
            }
        )
        
        # Apply materials to objects
        self.apply_material_to_prim("/simple_warehouse/floor", wood_material)
        self.apply_material_to_prim("/humanoid", metal_material)
        
    def setup_realistic_cameras(self):
        """Setup cameras with real-world parameters"""
        
        from omni.isaac.core.sensors import Camera
        
        # Main RGB camera with DSLR parameters
        rgb_camera = Camera(
            prim_path="/humanoid/head_camera",
            position=(0.1, 0, 0.5),
            frequency=30,
            resolution=(1920, 1080),
            data_types=["rgb", "depth", "semantic_segmentation"],
            clipping_range=(0.1, 100.0)
        )
        
        # Configure camera properties
        rgb_camera.set_aperture(f_stop=2.8)
        rgb_camera.set_focal_length(35)  # 35mm equivalent
        rgb_camera.set_shutter_speed(1/125)
        rgb_camera.set_iso(400)
        
        # Add camera noise for realism
        rgb_camera.add_noise_model(
            noise_type="gaussian",
            mean=0.0,
            std_dev=0.01
        )
        
        return rgb_camera
    
    def apply_material_to_prim(self, prim_path, material):
        """Apply material to a primitive"""
        prim = self.stage.GetPrimAtPath(prim_path)
        if prim:
            material.apply(prim)

# Initialize environment
env = PhotorealisticEnvironment()
env.create_indoor_environment()

Synthetic Data Generation

# synthetic_data_generator.py
import omni.kit.commands
from omni.isaac.core import World
from omni.isaac.core.utils.nucleus import get_assets_root_path
from omni.isaac.synthetic_utils import SyntheticDataHelper
import numpy as np
import cv2
import os

class SyntheticDataGenerator:
    def __init__(self, output_dir="./synthetic_data"):
        self.world = World()
        self.output_dir = output_dir
        self.sd_helper = SyntheticDataHelper()
        
        # Create output directories
        os.makedirs(f"{output_dir}/rgb", exist_ok=True)
        os.makedirs(f"{output_dir}/depth", exist_ok=True)
        os.makedirs(f"{output_dir}/semantic", exist_ok=True)
        os.makedirs(f"{output_dir}/annotations", exist_ok=True)
        
    def generate_training_data(self, num_samples=1000):
        """Generate synthetic training data for AI models"""
        
        for i in range(num_samples):
            # Randomize environment
            self.randomize_environment()
            
            # Randomize robot pose
            self.randomize_robot_pose()
            
            # Randomize lighting
            self.randomize_lighting()
            
            # Capture data
            rgb_data = self.capture_rgb()
            depth_data = self.capture_depth()
            semantic_data = self.capture_semantic()
            
            # Generate annotations
            annotations = self.generate_annotations()
            
            # Save data
            self.save_sample(i, rgb_data, depth_data, semantic_data, annotations)
            
            if i % 100 == 0:
                print(f"Generated {i} samples...")
    
    def randomize_environment(self):
        """Randomize environment for domain randomization"""
        
        # Random object positions
        objects = ["table", "chair", "box", "bottle"]
        for obj in objects:
            x = np.random.uniform(-2, 2)
            y = np.random.uniform(-2, 2)
            z = 0.0
            orientation = np.random.uniform(0, 2*np.pi)
            
            # Move object
            obj_prim = self.stage.GetPrimAtPath(f"/{obj}")
            if obj_prim:
                omni.kit.commands.execute(
                    "TransformPrimCommand",
                    path=obj_prim.GetPath(),
                    new_transform=Gf.Transform(
                        Gf.Rotation(Gf.Vec3d(0, 0, 1), orientation),
                        Gf.Vec3d(x, y, z)
                    )
                )
        
        # Random material properties
        self.randomize_materials()
    
    def randomize_robot_pose(self):
        """Randomize humanoid robot pose"""
        
        # Random joint positions
        joint_names = ["left_hip", "left_knee", "right_hip", "right_knee",
                      "left_shoulder", "right_shoulder", "neck"]
        
        for joint in joint_names:
            angle = np.random.uniform(-np.pi/4, np.pi/4)
            # Apply joint angle
            self.set_joint_angle(joint, angle)
    
    def randomize_lighting(self):
        """Randomize lighting conditions"""
        
        # Random dome light intensity
        dome_light = self.stage.GetPrimAtPath("/DomeLight")
        if dome_light:
            intensity = np.random.uniform(1000, 5000)
            dome_light.GetAttribute("intensity").Set(intensity)
        
        # Random area light positions and colors
        for i in range(3):
            light_path = f"/AreaLight{i+1}"
            light = self.stage.GetPrimAtPath(light_path)
            if light:
                # Random position
                x = np.random.uniform(-3, 3)
                y = np.random.uniform(-3, 3)
                z = np.random.uniform(2, 4)
                light.GetAttribute("position").Set(Gf.Vec3f(x, y, z))
                
                # Random color temperature
                temp = np.random.uniform(3000, 7000)  # Kelvin
                color = self.kelvin_to_rgb(temp)
                light.GetAttribute("color").Set(Gf.Vec3f(*color))
    
    def kelvin_to_rgb(self, kelvin):
        """Convert color temperature to RGB"""
        # Simplified color temperature conversion
        if kelvin <= 6600:
            r = 255
            g = max(0, min(255, -155.25 + 0.445 * kelvin / 100 - 99.2))
            b = max(0, min(255, -254.76 + 0.827 * kelvin / 100 + 104.96))
        else:
            r = max(0, min(255, 351.98 - 0.114 * kelvin / 100))
            g = max(0, min(255, 325.45 - 0.079 * kelvin / 100))
            b = 255
        
        return (r/255, g/255, b/255)
    
    def capture_rgb(self):
        """Capture RGB image with realistic camera effects"""
        
        # Get camera data
        rgb_data = self.sd_helper.get_rgb_data("/humanoid/head_camera")
        
        # Add realistic camera effects
        rgb_data = self.add_camera_effects(rgb_data)
        
        return rgb_data
    
    def add_camera_effects(self, image):
        """Add realistic camera effects like lens distortion, chromatic aberration"""
        
        # Lens distortion
        h, w = image.shape[:2]
        fx, fy = w/2, h/2
        cx, cy = w/2, h/2
        
        # Barrel distortion
        k1, k2 = -0.1, 0.01
        
        # Apply distortion
        distorted = np.zeros_like(image)
        for y in range(h):
            for x in range(w):
                # Normalize coordinates
                x_norm = (x - cx) / fx
                y_norm = (y - cy) / fy
                
                # Apply distortion
                r2 = x_norm**2 + y_norm**2
                distortion = 1 + k1 * r2 + k2 * r2**2
                
                x_dist = x_norm * distortion * fx + cx
                y_dist = y_norm * distortion * fy + cy
                
                # Bilinear interpolation
                if 0 <= x_dist < w-1 and 0 <= y_dist < h-1:
                    x1, y1 = int(x_dist), int(y_dist)
                    x2, y2 = min(x1+1, w-1), min(y1+1, h-1)
                    
                    dx, dy = x_dist - x1, y_dist - y1
                    
                    # Interpolate for each channel
                    for c in range(3):
                        val = (image[y1, x1, c] * (1-dx) * (1-dy) +
                               image[y1, x2, c] * dx * (1-dy) +
                               image[y2, x1, c] * (1-dx) * dy +
                               image[y2, x2, c] * dx * dy)
                        distorted[y, x, c] = val
        
        return distorted
    
    def capture_depth(self):
        """Capture depth data with realistic sensor noise"""
        
        depth_data = self.sd_helper.get_depth_data("/humanoid/head_camera")
        
        # Add sensor noise
        noise = np.random.normal(0, 0.01, depth_data.shape)
        depth_data = depth_data + noise
        
        # Clip to valid range
        depth_data = np.clip(depth_data, 0.1, 10.0)
        
        return depth_data
    
    def capture_semantic(self):
        """Capture semantic segmentation data"""
        
        semantic_data = self.sd_helper.get_semantic_data("/humanoid/head_camera")
        
        return semantic_data
    
    def generate_annotations(self):
        """Generate annotations for training"""
        
        annotations = {
            "objects": self.detect_objects(),
            "poses": self.get_robot_poses(),
            "scene_info": self.get_scene_info()
        }
        
        return annotations
    
    def save_sample(self, index, rgb, depth, semantic, annotations):
        """Save training sample"""
        
        # Save RGB image
        cv2.imwrite(f"{self.output_dir}/rgb/sample_{index:06d}.png", rgb)
        
        # Save depth (convert to 16-bit)
        depth_16bit = (depth * 1000).astype(np.uint16)
        cv2.imwrite(f"{self.output_dir}/depth/sample_{index:06d}.png", depth_16bit)
        
        # Save semantic
        cv2.imwrite(f"{self.output_dir}/semantic/sample_{index:06d}.png", semantic)
        
        # Save annotations as JSON
        import json
        with open(f"{self.output_dir}/annotations/sample_{index:06d}.json", "w") as f:
            json.dump(annotations, f, indent=2)

# Generate synthetic data
generator = SyntheticDataGenerator()
generator.generate_training_data(num_samples=1000)

5.3 Sim-to-Real Pipeline

Domain Randomization

# domain_randomization.py
import numpy as np
from omni.isaac.core import World
from omni.isaac.core.utils.prims import define_prim
from pxr import Usd, UsdGeom, Gf

class DomainRandomization:
    def __init__(self):
        self.world = World()
        self.randomization_params = {
            "lighting": {"intensity_range": (500, 5000), "color_temp_range": (3000, 7000)},
            "materials": {"roughness_range": (0.1, 0.9), "metallic_range": (0.0, 1.0)},
            "physics": {"gravity_range": (-10.5, -9.5), "friction_range": (0.5, 1.0)},
            "camera": {"noise_range": (0.001, 0.05), "distortion_range": (0.0, 0.2)}
        }
    
    def randomize_all(self):
        """Apply all domain randomization"""
        self.randomize_lighting()
        self.randomize_materials()
        self.randomize_physics()
        self.randomize_camera()
    
    def randomize_lighting(self):
        """Randomize lighting conditions"""
        
        # Random dome light
        dome_light = self.stage.GetPrimAtPath("/DomeLight")
        if dome_light:
            intensity = np.random.uniform(*self.randomization_params["lighting"]["intensity_range"])
            dome_light.GetAttribute("intensity").Set(intensity)
        
        # Random area lights
        for i in range(3):
            light_path = f"/AreaLight{i+1}"
            light = self.stage.GetPrimAtPath(light_path)
            if light:
                # Random intensity
                intensity = np.random.uniform(1000, 10000)
                light.GetAttribute("intensity").Set(intensity)
                
                # Random color
                temp = np.random.uniform(*self.randomization_params["lighting"]["color_temp_range"])
                color = self.kelvin_to_rgb(temp)
                light.GetAttribute("color").Set(Gf.Vec3f(*color))
    
    def randomize_materials(self):
        """Randomize material properties"""
        
        materials = ["/Looks/WoodMaterial", "/Looks/MetalMaterial", "/Looks/PlasticMaterial"]
        
        for mat_path in materials:
            material = self.stage.GetPrimAtPath(mat_path)
            if material:
                # Random roughness
                roughness = np.random.uniform(*self.randomization_params["materials"]["roughness_range"])
                material.GetAttribute("roughness").Set(roughness)
                
                # Random metallic
                metallic = np.random.uniform(*self.randomization_params["materials"]["metallic_range"])
                material.GetAttribute("metallic").Set(metallic)
    
    def randomize_physics(self):
        """Randomize physics parameters"""
        
        # Random gravity
        gravity = np.random.uniform(*self.randomization_params["physics"]["gravity_range"])
        self.world.set_gravity(gravity)
        
        # Random friction for ground plane
        ground = self.stage.GetPrimAtPath("/ground_plane")
        if ground:
            friction = np.random.uniform(*self.randomization_params["physics"]["friction_range"])
            # Apply friction to collision material
    
    def randomize_camera(self):
        """Randomize camera parameters"""
        
        camera = self.stage.GetPrimAtPath("/humanoid/head_camera")
        if camera:
            # Random noise
            noise = np.random.uniform(*self.randomization_params["camera"]["noise_range"])
            # Apply noise to camera sensor
            
            # Random distortion
            distortion = np.random.uniform(*self.randomization_params["camera"]["distortion_range"])
            # Apply lens distortion

Transfer Learning Pipeline

# sim_to_real_transfer.py
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import numpy as np

class Sim2RealTransfer:
    def __init__(self, sim_model_path, real_dataset_path):
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        
        # Load simulation-trained model
        self.sim_model = self.load_model(sim_model_path)
        
        # Load real-world dataset
        self.real_dataset = self.load_real_dataset(real_dataset_path)
        
        # Create adaptation model
        self.adaptation_model = self.create_adaptation_model()
        
    def load_model(self, path):
        """Load pre-trained simulation model"""
        model = torch.load(path)
        model.eval()
        return model.to(self.device)
    
    def create_adaptation_model(self):
        """Create domain adaptation network"""
        
        class AdaptationNet(nn.Module):
            def __init__(self, feature_dim=512):
                super().__init__()
                self.feature_extractor = nn.Sequential(
                    nn.Conv2d(3, 64, 3, padding=1),
                    nn.ReLU(),
                    nn.Conv2d(64, 128, 3, padding=1),
                    nn.ReLU(),
                    nn.AdaptiveAvgPool2d((1, 1)),
                    nn.Flatten(),
                    nn.Linear(128, feature_dim)
                )
                
                self.domain_classifier = nn.Sequential(
                    nn.Linear(feature_dim, 256),
                    nn.ReLU(),
                    nn.Linear(256, 1),
                    nn.Sigmoid()
                )
                
                self.task_classifier = nn.Sequential(
                    nn.Linear(feature_dim, 256),
                    nn.ReLU(),
                    nn.Linear(256, 10)  # 10 classes
                )
            
            def forward(self, x):
                features = self.feature_extractor(x)
                domain_pred = self.domain_classifier(features)
                task_pred = self.task_classifier(features)
                return domain_pred, task_pred
        
        return AdaptationNet().to(self.device)
    
    def train_adaptation(self, epochs=100):
        """Train domain adaptation"""
        
        optimizer = torch.optim.Adam(self.adaptation_model.parameters(), lr=0.001)
        
        for epoch in range(epochs):
            total_loss = 0
            domain_loss = 0
            task_loss = 0
            
            for batch in self.real_dataset:
                # Get simulation features
                sim_features = self.get_sim_features(batch)
                
                # Get real features
                real_features = self.adaptation_model.feature_extractor(batch)
                
                # Domain adversarial training
                domain_pred, task_pred = self.adaptation_model(batch)
                
                # Compute losses
                d_loss = self.domain_loss(domain_pred, real_features)
                t_loss = self.task_loss(task_pred, batch.labels)
                
                # Combined loss
                loss = d_loss + t_loss
                
                # Backward pass
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                
                total_loss += loss.item()
                domain_loss += d_loss.item()
                task_loss += t_loss.item()
            
            print(f"Epoch {epoch}: Total Loss: {total_loss:.4f}, "
                  f"Domain Loss: {domain_loss:.4f}, Task Loss: {task_loss:.4f}")
    
    def evaluate_transfer(self, test_dataset):
        """Evaluate transfer performance"""
        
        correct = 0
        total = 0
        
        with torch.no_grad():
            for batch in test_dataset:
                _, task_pred = self.adaptation_model(batch)
                predicted = torch.argmax(task_pred, dim=1)
                total += batch.labels.size(0)
                correct += (predicted == batch.labels).sum().item()
        
        accuracy = 100 * correct / total
        print(f"Transfer Accuracy: {accuracy:.2f}%")
        return accuracy

5.4 Training AI Models in Simulation

Reinforcement Learning Environment

# rl_environment.py
import gym
from gym import spaces
import numpy as np
from omni.isaac.core import World
from omni.isaac.core.objects import DynamicCuboid
from omni.isaac.core.utils.nucleus import get_assets_root_path

class HumanoidRLEnvironment(gym.Env):
    def __init__(self):
        super().__init__()
        
        self.world = World()
        self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(12,), dtype=np.float32)
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(48,), dtype=np.float32)
        
        # Initialize humanoid
        self.humanoid = self.create_humanoid()
        
        # Target position
        self.target_pos = np.array([2.0, 0.0, 0.0])
        
        # Episode parameters
        self.max_steps = 1000
        self.current_step = 0
        
    def create_humanoid(self):
        """Create humanoid robot in simulation"""
        
        from omni.isaac.core.robots import Robot
        
        # Load humanoid robot
        humanoid = Robot(
            prim_path="/humanoid",
            usd_path=f"{get_assets_root_path()}/Isaac/Robots/Humanoid/humanoid.usd",
            name="humanoid"
        )
        
        # Add to world
        self.world.add_robot(humanoid)
        
        return humanoid
    
    def reset(self):
        """Reset environment"""
        
        # Reset humanoid position
        self.humanoid.set_world_pose(position=[0, 0, 1.0])
        
        # Reset joint positions
        initial_joints = np.zeros(12)
        self.humanoid.set_joint_positions(initial_joints)
        
        # Random target position
        self.target_pos = np.array([
            np.random.uniform(-3, 3),
            np.random.uniform(-3, 3),
            0.0
        ])
        
        # Reset step counter
        self.current_step = 0
        
        return self.get_observation()
    
    def step(self, action):
        """Execute action in environment"""
        
        # Apply joint commands
        self.humanoid.set_joint_positions(action)
        
        # Step simulation
        self.world.step(render=True)
        
        # Get observation
        obs = self.get_observation()
        
        # Calculate reward
        reward = self.calculate_reward()
        
        # Check if done
        done = self.current_step >= self.max_steps or self.check_success()
        
        self.current_step += 1
        
        return obs, reward, done, {}
    
    def get_observation(self):
        """Get current observation"""
        
        # Get robot pose
        pose = self.humanoid.get_world_pose()
        
        # Get joint positions and velocities
        joint_pos = self.humanoid.get_joint_positions()
        joint_vel = self.humanoid.get_joint_velocities()
        
        # Get target relative position
        robot_pos = np.array(pose[0])
        target_rel = self.target_pos - robot_pos
        
        # Combine observations
        obs = np.concatenate([
            pose[0],  # position (3)
            pose[1],  # orientation (4)
            joint_pos,  # joint positions (12)
            joint_vel,  # joint velocities (12)
            target_rel,  # target relative position (3)
            [self.current_step / self.max_steps]  # progress (1)
        ])
        
        return obs.astype(np.float32)
    
    def calculate_reward(self):
        """Calculate reward based on current state"""
        
        # Distance to target
        robot_pos = np.array(self.humanoid.get_world_pose()[0])
        distance = np.linalg.norm(self.target_pos - robot_pos)
        
        # Reward components
        distance_reward = -distance * 0.1
        
        # Energy penalty
        joint_vel = self.humanoid.get_joint_velocities()
        energy_penalty = -np.sum(joint_vel**2) * 0.001
        
        # Upright bonus
        orientation = self.humanoid.get_world_pose()[1]
        upright_bonus = orientation[3]  # quaternion w component
        
        # Total reward
        reward = distance_reward + energy_penalty + upright_bonus * 0.1
        
        return reward
    
    def check_success(self):
        """Check if task is completed"""
        
        robot_pos = np.array(self.humanoid.get_world_pose()[0])
        distance = np.linalg.norm(self.target_pos - robot_pos)
        
        return distance < 0.5  # Success if within 0.5m of target

Training Loop

# train_humanoid_rl.py
import torch
import torch.nn as nn
import torch.optim as optim
from torch.distributions import Normal
import numpy as np
from rl_environment import HumanoidRLEnvironment

class PPOAgent:
    def __init__(self, state_dim, action_dim):
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        
        # Actor network
        self.actor = nn.Sequential(
            nn.Linear(state_dim, 256),
            nn.ReLU(),
            nn.Linear(256, 256),
            nn.ReLU(),
            nn.Linear(256, action_dim),
            nn.Tanh()
        ).to(self.device)
        
        # Critic network
        self.critic = nn.Sequential(
            nn.Linear(state_dim, 256),
            nn.ReLU(),
            nn.Linear(256, 256),
            nn.ReLU(),
            nn.Linear(256, 1)
        ).to(self.device)
        
        self.optimizer = optim.Adam(
            list(self.actor.parameters()) + list(self.critic.parameters()),
            lr=3e-4
        )
        
        # PPO parameters
        self.clip_ratio = 0.2
        self.gamma = 0.99
        self.lam = 0.95
        self.epochs = 10
        self.batch_size = 64
    
    def get_action(self, state):
        """Get action from policy"""
        state = torch.FloatTensor(state).unsqueeze(0).to(self.device)
        
        with torch.no_grad():
            action = self.actor(state)
            value = self.critic(state)
        
        # Add exploration noise
        action = action.cpu().numpy()[0]
        action += np.random.normal(0, 0.1, size=action.shape)
        action = np.clip(action, -1.0, 1.0)
        
        return action, value.item()
    
    def update(self, states, actions, rewards, dones, values):
        """Update policy using PPO"""
        
        # Calculate advantages
        advantages = self.calculate_advantages(rewards, values, dones)
        
        # Convert to tensors
        states = torch.FloatTensor(states).to(self.device)
        actions = torch.FloatTensor(actions).to(self.device)
        advantages = torch.FloatTensor(advantages).to(self.device)
        returns = torch.FloatTensor(rewards).to(self.device)
        
        # PPO update
        for _ in range(self.epochs):
            # Get current policy
            current_actions = self.actor(states)
            current_values = self.critic(states)
            
            # Calculate ratio
            ratio = torch.exp(current_actions - actions)
            
            # Clipped surrogate objective
            surr1 = ratio * advantages
            surr2 = torch.clamp(ratio, 1 - self.clip_ratio, 1 + self.clip_ratio) * advantages
            
            # Actor loss
            actor_loss = -torch.min(surr1, surr2).mean()
            
            # Critic loss
            critic_loss = nn.MSELoss()(current_values.squeeze(), returns)
            
            # Total loss
            loss = actor_loss + 0.5 * critic_loss
            
            # Update
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()
    
    def calculate_advantages(self, rewards, values, dones):
        """Calculate GAE advantages"""
        advantages = []
        gae = 0
        
        for i in reversed(range(len(rewards))):
            if i == len(rewards) - 1:
                next_value = 0
            else:
                next_value = values[i + 1]
            
            delta = rewards[i] + self.gamma * next_value * (1 - dones[i]) - values[i]
            gae = delta + self.gamma * self.lam * (1 - dones[i]) * gae
            advantages.insert(0, gae)
        
        return advantages

def train_humanoid():
    """Train humanoid robot using RL"""
    
    env = HumanoidRLEnvironment()
    agent = PPOAgent(state_dim=48, action_dim=12)
    
    # Training loop
    for episode in range(1000):
        state = env.reset()
        
        # Collect trajectory
        states, actions, rewards, dones, values = [], [], [], [], []
        
        for step in range(1000):
            action, value = agent.get_action(state)
            next_state, reward, done, _ = env.step(action)
            
            states.append(state)
            actions.append(action)
            rewards.append(reward)
            dones.append(done)
            values.append(value)
            
            state = next_state
            
            if done:
                break
        
        # Update agent
        agent.update(states, actions, rewards, dones, values)
        
        # Log progress
        if episode % 10 == 0:
            avg_reward = np.mean(rewards)
            print(f"Episode {episode}: Average Reward: {avg_reward:.2f}")
            
            # Save checkpoint
            torch.save(agent.actor.state_dict(), f"humanoid_actor_episode_{episode}.pth")
            torch.save(agent.critic.state_dict(), f"humanoid_critic_episode_{episode}.pth")

if __name__ == "__main__":
    train_humanoid()

Next Chapter: In Chapter 6, we'll explore robot vision systems using RealSense cameras and OpenCV for perception.

5.1 Introduction to Omniverse & Isaac Sim​

What is NVIDIA Isaac Sim?​

Key Features​

System Requirements​

Installation​

5.2 Photorealistic Environments & Synthetic Data​

Creating Photorealistic Scenes​

Synthetic Data Generation​

5.3 Sim-to-Real Pipeline​

Domain Randomization​

Transfer Learning Pipeline​

5.4 Training AI Models in Simulation​

Reinforcement Learning Environment​

Training Loop​