Gazebo | Ignition | Community
Ask Your Question

Revision history [back]

click to hide/show revision 1
initial version

It is indeed possible to get the camera matrix from the /camera_info topic. It also directly provides a projection matrix to map between pixel coordinates and meters.

However, there is a gotcha that is not so obvious: Gazebo doesn't follow the typical convention of defining the camera's coordinate system with the z-axis pointing forward. Instead, it follows the default robotics joint convention and cameras have the x-axis pointing forward (z-axis pointing up). The projection matrix, however, is given in a format that assumes the standard camera coordinate frame (z-axis pointing forward, y-axis pointing down). As a result, an additional transformation (rotation) is needed to convert the position, before applying the projection.

Here is the critical piece of code in python to (hopefully) make things more clear:

def world2px_cam_info(pos_world):
    # camera info msg based projection

    ## pos in world coords -> pos in camera coords
    # gazebo doesn't use conventional camera coordinates
    # camera points in x-direction instead of z-direction
    tf = transform(np.array((0,0,0, np.pi/2, np.pi/2, 0)))
    pos_cam_gazebo = homogenize(world2cam(pos_world))
    pos_cam = np.matmul(tf, pos_cam_gazebo)

    projection = np.array(cam_info.projection.p).reshape((3, 4))
    pos_px = cartesianize(np.matmul(projection, pos_cam))

    #image indexing uses y-x order instead of x-y
    pos_px = pos_px[::1]

    return pos_px

I've added a full code example below (using gym-ignition and some helpers I wrote along the way). It produces the following image:

Projection

from ign_subscriber import IgnSubscriber
from parsers import camera_parser, camera_info_parser
from kinematics import transform, cartesianize, homogenize

from scenario import gazebo as scenario_gazebo

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Circle
from scipy.spatial.transform import Rotation as R
import time

gazebo = scenario_gazebo.GazeboSimulator(
    step_size=0.001,
    rtf=1.0,
    steps_per_run=1
)
assert gazebo.insert_world_from_sdf("./camera_test.sdf")
gazebo.initialize()

# Fix: available topics seem to only be updated at the end
# of a step. This allows the subscribers to find the topic's
# address
gazebo.run(paused=True)

# wait a second for all async processes to startup
time.sleep(1)

with IgnSubscriber("/camera", parser=camera_parser) as camera_topic, \
        IgnSubscriber("/camera_info", parser=camera_info_parser) as cam_info_topic:
    gazebo.run(paused=True)

    cam_info = cam_info_topic.recv()
    width = cam_info.width
    height = cam_info.height

    def world2cam(pos_world, *, model_name="camera"):
        camera = gazebo.get_world("camera_sensor").get_model(
            model_name).get_link("link")
        cam_pos_world = np.array(camera.position())
        cam_ori_world_quat = np.array(camera.orientation())[[1, 2, 3, 0]]
        cam_ori_world = R.from_quat(cam_ori_world_quat).as_euler("xyz")
        camera_frame_world = np.stack((cam_pos_world, cam_ori_world)).ravel()
        world_to_cam = transform(camera_frame_world)

        pos_world = homogenize(pos_world)
        pos_cam = np.matmul(world_to_cam, pos_world)
        return cartesianize(pos_cam)

    def world2px_cam_info(pos_world):
        # camera info msg based projection

        # pos in world coords -> pos in camera coords
        # gazebo doesn't use conventional camera coordinates
        # camera points in x-direction instead of z-direction
        tf = transform(np.array((0, 0, 0, np.pi/2, np.pi/2, 0)))
        pos_cam_gazebo = homogenize(world2cam(pos_world))
        pos_cam = np.matmul(tf, pos_cam_gazebo)

        projection = np.array(cam_info.projection.p).reshape((3, 4))
        pos_px = cartesianize(np.matmul(projection, pos_cam))

        # image indexing uses y-x order instead of x-y
        pos_px = pos_px[::1]

        return pos_px

    # scene objects
    box = gazebo.get_world("camera_sensor").get_model("box")
    cone = gazebo.get_world("camera_sensor").get_model('Construction Cone')

    box_corner = np.array(box.base_position()) + np.array((.5, -.5, .5))

    img = camera_topic.recv().image
    fig, ax = plt.subplots(1)
    ax.imshow(img)
    ax.add_patch(Circle(world2px_cam_info(box_corner), radius=5))
    ax.add_patch(Circle(world2px_cam_info(cone.base_position()), radius=3))
    plt.show()

gazebo.close()

You can find the helpers in this repo: https://github.com/FirefoxMetzger/panda-ignition-sim

It is indeed possible to get the camera matrix from the /camera_info topic. It also directly provides a projection matrix to map between pixel coordinates and meters.

However, there is a gotcha that is not so obvious: Gazebo doesn't follow the typical convention of defining the camera's coordinate system with the z-axis pointing forward. Instead, it follows the default robotics joint convention and cameras have the x-axis pointing forward (z-axis pointing up). The projection matrix, however, is given in a format that assumes the standard camera coordinate frame (z-axis pointing forward, y-axis pointing down). As a result, an additional transformation (rotation) is needed to convert the position, before applying the projection.

Here is the critical piece of code in python to (hopefully) make things more clear:

def world2px_cam_info(pos_world):
    # camera info msg based projection

    ## pos in world coords -> pos in camera coords
    # gazebo doesn't use conventional camera coordinates
    # camera points in x-direction instead of z-direction
    tf = transform(np.array((0,0,0, np.pi/2, np.pi/2, 0)))
    pos_cam_gazebo = homogenize(world2cam(pos_world))
    pos_cam = np.matmul(tf, pos_cam_gazebo)

    projection = np.array(cam_info.projection.p).reshape((3, 4))
    pos_px = cartesianize(np.matmul(projection, pos_cam))

    #image indexing uses y-x order instead of x-y
    pos_px = pos_px[::1]

    return pos_px

I've added a full code example below (using gym-ignition and some helpers I wrote along the way). It produces the following image:

Projection

from ign_subscriber import IgnSubscriber
from parsers import camera_parser, camera_info_parser
from kinematics import transform, cartesianize, homogenize

from scenario import gazebo as scenario_gazebo

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Circle
from scipy.spatial.transform import Rotation as R
import time

gazebo = scenario_gazebo.GazeboSimulator(
    step_size=0.001,
    rtf=1.0,
    steps_per_run=1
)
assert gazebo.insert_world_from_sdf("./camera_test.sdf")
gazebo.initialize()

# Fix: available topics seem to only be updated at the end
# of a step. This allows the subscribers to find the topic's
# address
gazebo.run(paused=True)

# wait a second for all async processes to startup
time.sleep(1)

with IgnSubscriber("/camera", parser=camera_parser) as camera_topic, \
        IgnSubscriber("/camera_info", parser=camera_info_parser) as cam_info_topic:
    gazebo.run(paused=True)

    cam_info = cam_info_topic.recv()
    width = cam_info.width
    height = cam_info.height

    def world2cam(pos_world, *, model_name="camera"):
        camera = gazebo.get_world("camera_sensor").get_model(
            model_name).get_link("link")
        cam_pos_world = np.array(camera.position())
        cam_ori_world_quat = np.array(camera.orientation())[[1, 2, 3, 0]]
        cam_ori_world = R.from_quat(cam_ori_world_quat).as_euler("xyz")
        camera_frame_world = np.stack((cam_pos_world, cam_ori_world)).ravel()
        world_to_cam = transform(camera_frame_world)

        pos_world = homogenize(pos_world)
        pos_cam = np.matmul(world_to_cam, pos_world)
        return cartesianize(pos_cam)

    def world2px_cam_info(pos_world):
        # camera info msg based projection

        # pos in world coords -> pos in camera coords
        # gazebo doesn't use conventional camera coordinates
        # camera points in x-direction instead of z-direction
        tf = transform(np.array((0, 0, 0, np.pi/2, np.pi/2, 0)))
        pos_cam_gazebo = homogenize(world2cam(pos_world))
        pos_cam = np.matmul(tf, pos_cam_gazebo)

        projection = np.array(cam_info.projection.p).reshape((3, 4))
        pos_px = cartesianize(np.matmul(projection, pos_cam))

        # image indexing uses y-x order instead of x-y
        pos_px = pos_px[::1]

        return pos_px

    # scene objects
    box = gazebo.get_world("camera_sensor").get_model("box")
    cone = gazebo.get_world("camera_sensor").get_model('Construction Cone')

    box_corner = np.array(box.base_position()) + np.array((.5, -.5, .5))

    img = camera_topic.recv().image
    fig, ax = plt.subplots(1)
    ax.imshow(img)
    ax.add_patch(Circle(world2px_cam_info(box_corner), radius=5))
    ax.add_patch(Circle(world2px_cam_info(cone.base_position()), radius=3))
    plt.show()

gazebo.close()

You can find the helpers in this repo: https://github.com/FirefoxMetzger/panda-ignition-sim

camera_test.sdf

It is indeed possible to get the camera matrix from the /camera_info topic. It also directly provides a projection matrix to map between pixel coordinates and meters.

However, there is a gotcha that is not so obvious: Gazebo doesn't follow the typical convention of defining the camera's coordinate system with the z-axis pointing forward. Instead, it follows the default robotics joint convention and cameras have the x-axis pointing forward (z-axis pointing up). The projection matrix, however, is given in a format that assumes the standard camera coordinate frame (z-axis pointing forward, y-axis pointing down). As a result, an additional transformation (rotation) is needed to convert the position, before applying the projection.

Here is the critical piece of code in python to (hopefully) make things more clear:

def world2px_cam_info(pos_world):
    # camera info msg based projection

    ## pos in world coords -> pos in camera coords
    # gazebo doesn't use conventional camera coordinates
    # camera points in x-direction instead of z-direction
    tf = transform(np.array((0,0,0, np.pi/2, np.pi/2, 0)))
    pos_cam_gazebo = homogenize(world2cam(pos_world))
    pos_cam = np.matmul(tf, pos_cam_gazebo)

    projection = np.array(cam_info.projection.p).reshape((3, 4))
    pos_px = cartesianize(np.matmul(projection, pos_cam))

    #image indexing uses y-x order instead of x-y
    pos_px = pos_px[::1]

    return pos_px

I've added a full code example below (using gym-ignition and some helpers I wrote along the way). ropy). It produces the following image:

Projection

from ign_subscriber import IgnSubscriber
from parsers import camera_parser, camera_info_parser
from kinematics import transform, cartesianize, homogenize

from scenario import gazebo as scenario_gazebo

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Circle
from scipy.spatial.transform import Rotation as R
import time

gazebo = scenario_gazebo.GazeboSimulator(
    step_size=0.001,
    rtf=1.0,
    steps_per_run=1
)
assert gazebo.insert_world_from_sdf("./camera_test.sdf")
gazebo.initialize()

# Fix: available topics seem to only be 

https://gist.github.com/FirefoxMetzger/dc9af8ae62fce176a3fe037d095988dc

Edit: The .sdf was causing problems because it contained absolute paths for my local file system due to the traffic cone. I updated at the end # of a step. This allows the subscribers to find the topic's # address gazebo.run(paused=True) # wait a second the .sdf, moved the code into a gist for all async processes to startup time.sleep(1) with IgnSubscriber("/camera", parser=camera_parser) as camera_topic, \ IgnSubscriber("/camera_info", parser=camera_info_parser) as cam_info_topic: gazebo.run(paused=True) cam_info = cam_info_topic.recv() width = cam_info.width height = cam_info.height def world2cam(pos_world, *, model_name="camera"): camera = gazebo.get_world("camera_sensor").get_model( model_name).get_link("link") cam_pos_world = np.array(camera.position()) cam_ori_world_quat = np.array(camera.orientation())[[1, 2, 3, 0]] cam_ori_world = R.from_quat(cam_ori_world_quat).as_euler("xyz") camera_frame_world = np.stack((cam_pos_world, cam_ori_world)).ravel() world_to_cam = transform(camera_frame_world) pos_world = homogenize(pos_world) pos_cam = np.matmul(world_to_cam, pos_world) return cartesianize(pos_cam) def world2px_cam_info(pos_world): # camera info msg based projection # pos in world coords -> pos in camera coords # gazebo doesn't use conventional camera coordinates # camera points in x-direction instead of z-direction tf = transform(np.array((0, 0, 0, np.pi/2, np.pi/2, 0))) pos_cam_gazebo = homogenize(world2cam(pos_world)) pos_cam = np.matmul(tf, pos_cam_gazebo) projection = np.array(cam_info.projection.p).reshape((3, 4)) pos_px = cartesianize(np.matmul(projection, pos_cam)) # image indexing uses y-x order instead of x-y pos_px = pos_px[::1] return pos_px # scene objects box = gazebo.get_world("camera_sensor").get_model("box") cone = gazebo.get_world("camera_sensor").get_model('Construction Cone') box_corner = np.array(box.base_position()) + np.array((.5, -.5, .5)) img = camera_topic.recv().image fig, ax = plt.subplots(1) ax.imshow(img) ax.add_patch(Circle(world2px_cam_info(box_corner), radius=5)) ax.add_patch(Circle(world2px_cam_info(cone.base_position()), radius=3)) plt.show() gazebo.close()

You can find the helpers in this repo: https://github.com/FirefoxMetzger/panda-ignition-sim

camera_test.sdfsyntax highlighting, and added ropy

which takes care of the additional rotation and makes the code cleaner overall.