Patate Lib  0.5
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Pages
Screen Space Curvature using Cuda and Python

Introduction

This is an example that use Grenaille to compute Screen Space Curvature in python using Cuda.

Installation and usage

This example requires the following python packages:

  • pycuda
  • matplotlib

as well as a working cuda setup (tested with 5.0 and 7.0), and the lastest version of eigen dev branch which manage nvcc compiler.

To compile and run the example, call

cd build && make ssgls_py
cd examples/Grenaille/python && python ssgls.py -s 10 -p ./data/ssgls_sample_wc.png -n ./data/ssgls_sample_normal.png

The script will load the input pictures and compute the curvature for a screenspace neighborhood 10x10 pixels.

ssgls_input.png
Screen-Space Curvature typical input. Left: world coordinates. Right: remapped normal vectors

It will generate this picture

ssgls_result1.png
Screen-Space Curvature estimation

as well as the following trace on the standard output

Load CUDA kernel ........... DONE in 0.024 seconds.
Init Input data ............ DONE in 13.390 seconds.
Init queries ............... DONE in 0.720 seconds.
Set memory configuration ... DONE in 0.004 seconds.
Launch kernel .............. DONE in 1.505 seconds.
###########Configuration#############
Image size: 1902 x 911
NbQueries: 1732722
Scale: 10
################Results################
Max value: 25.3449363708
Min value: -31.8370857239
#######################################
Note
Don't worry about the important execution time, this example uses a naive and inefficient approach to represent queries with numpy, making the total process really slow. However, the real computation time in cuda is indicated by the last line of the timing trace. Note that we do not use any re-sampling technique, that is why this step is directly related to the scale parameter.

Cuda programming

Here are the technical details related to the cuda and python biding for screen-space curvature estimation.

Define fitting data structure

class MyPoint
{
public:
enum {Dim = 3};
typedef float Scalar;
typedef Eigen::Matrix<Scalar, Dim, 1> VectorType;
typedef Eigen::Matrix<Scalar, Dim, Dim> MatrixType;
typedef Eigen::Matrix<Scalar, 2, 1> ScreenVectorType;
MULTIARCH inline MyPoint( const VectorType& _pos = VectorType::Zero(),
const VectorType& _normal = VectorType::Zero(),
const ScreenVectorType& _spos = ScreenVectorType::Zero(),
const Scalar _dz = 0.f)
: m_pos(_pos), m_normal(_normal), m_spos(_spos), m_dz(_dz){}
MULTIARCH inline const VectorType& pos() const { return m_pos; }
MULTIARCH inline const VectorType& normal() const { return m_normal; }
MULTIARCH inline const ScreenVectorType& spos() const { return m_spos; }
MULTIARCH inline const float & dz() const { return m_dz; }
MULTIARCH inline VectorType& pos() { return m_pos; }
MULTIARCH inline VectorType& normal() { return m_normal; }
MULTIARCH inline ScreenVectorType& spos() { return m_spos; }
MULTIARCH inline float& dz() { return m_dz; }
private:
ScreenVectorType m_spos;
VectorType m_pos, m_normal;
float m_dz; // depth threshold
};

Define weighting functions

class ProjectWeightFunc: public Grenaille::DistWeightFunc<MyPoint, Grenaille::SmoothWeightKernel<Scalar> >
{
public:
typedef MyPoint::Scalar Scalar;
typedef MyPoint::VectorType VectorType;
/*
Default constructor (needed by Grenaille). Note that the screenspace
evaluation position is specified as parameter
*/
MULTIARCH inline ProjectWeightFunc( const Scalar& _t = 1.f,
const ScreenVectorType& _refPos = ScreenVectorType::Zero(),
const Scalar& _dz = 0.f)
: Grenaille::DistWeightFunc<MyPoint, Grenaille::SmoothWeightKernel<Scalar> >(_t), m_refPos(_refPos), m_dz(_dz) {}
MULTIARCH inline Scalar w(const VectorType& _q, const MyPoint& _attributes) const
{
Scalar d = (_attributes.spos()-m_refPos).norm();
const Scalar dz = _attributes.dz();
if (d > m_t || dz > m_dz)
return Scalar(0.);
return m_wk.f(d/m_t);
}
private:
ScreenVectorType m_refPos;
float m_dz;
};

Define fitting primitive

Kernel

extern "C" {
__global__ void doGLS_kernel(int* _params, //[w, h, scale]
float* _positions,
float* _normals,
float* _result)
{
uint x = (blockIdx.x * blockDim.x) + threadIdx.x;
uint y = (blockIdx.y * blockDim.y) + threadIdx.y;
const int &width = _params[0];
const int &height = _params[1];
if (x < width && y < height)
{
const int &scale = _params[2];
ScreenVectorType refPos;
refPos << x, y;
int dx, dy; // neighbor offset ids
int nx, ny; // neighbor ids
Gls gls;
gls.setWeightFunc(ProjectWeightFunc(scale, refPos));
gls.init( getVector(x,y,width,height,_positions) );
if (getVector(x,y,width,height,_normals).squaredNorm() == 0.f )
{
_result[getId(x,y,width,height,0,1)] = -1.0;
}
else
{
//_result[getId(x,y,width,height,0,1)] = getVector(x,y,width,height,_normals)(0);
VectorType p, n;
// collect neighborhood
VectorType one = VectorType::Zero();
for(dy = -scale; dy != scale; dy++)
{
for(dx = -scale; dx != scale; dx++)
{
nx = x+dx;
ny = y+dy;
// Check image boundaries
if (nx >= 0 && ny >= 0 && nx < width && ny < height)
{
n = getVector(nx,ny,width,height,_normals);
// add nei only when the _normal is properly defined
// need to use an explicit floating point comparison with pycuda
if (n.squaredNorm() != 0.f )
{
// RGB to XYZ remapping
n = 2.f * n - one;
n.normalize();
// GLS computation
gls.addNeighbor(MyPoint(getVector(nx,ny,width,height,_positions),
n,
ScreenVectorType(nx,ny)));
}
}
}
}
// closed form minimization
gls.finalize();
_result[getId(x,y,width,height,0,1)] = gls.kappa();
}
}
}
}

Memory access

We use numpy to format the input data, filled by dimension (in object space) and then by the screen-space coordinates:

__device__ int getId(const int _x,
const int _y,
const int _width,
const int _height,
const int _component,
const int _nbComponent)
{
return (_component) + _nbComponent*(_x + _y * _width);
}
__device__ VectorType getVector(const int _x,
const int _y,
const int _width,
const int _height,
const float* _buffer)
{
VectorType r;
r << Scalar(_buffer[getId(_x,_y,_width,_height,0,3)]),
Scalar(_buffer[getId(_x,_y,_width,_height,1,3)]),
Scalar(_buffer[getId(_x,_y,_width,_height,2,3)]);
return r;
}

Python script

We use numpy to format input data.

1 # This Source Code Form is subject to the terms of the Mozilla Public
2 # License, v. 2.0. If a copy of the MPL was not distributed with this
3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
4 
5 import pycuda.autoinit
6 import pycuda.driver as drv
7 import numpy
8 import Image
9 import time
10 import sys, getopt
11 
12 import matplotlib.pyplot as plt
13 import matplotlib.image as mpimg
14 
15 from pycuda.compiler import SourceModule
16 
17 
18 
19 
20 
21 start_time=0.0
22 
23 def startTimer( funcname, quiet ):
24  global start_time
25 
26  if not quiet:
27  sys.stdout.write(funcname)
28  sys.stdout.flush()
29  start_time = time.time()
30 
31 
32 def stopTimer(quiet):
33  elapsed_time = time.time() - start_time
34  if not quiet:
35  print 'DONE in {0:.3f} seconds.'.format(elapsed_time)
36 
37 
38 
39 
40 ################################################################################
41 # Load input ptx file
42 def loadKernel():
43  module = drv.module_from_file("ssgls.ptx")
44  glsKernel = module.get_function("doGLS_kernel")
45 
46  return module, glsKernel
47 
48 
49 
50 
51 
52 ################################################################################
53 # Format data for easy access in cuda kernel
54 def initInputData(imgNormPath, imgwcPath):
55  i_normals = Image.open (imgNormPath)
56  i_positions = Image.open (imgwcPath)
57 
58  # Todo: check dimensions for depth, normals and positions
59  (w,h) = i_positions.size
60  length = w*h;
61 
62  normals = numpy.array(i_normals.getdata()).reshape(3*length,1).astype(numpy.float32) / 255.0;
63  positions = numpy.array(i_positions.getdata()).reshape(3*length,1).astype(numpy.float32) / 255.0;
64 
65  positions = positions * 2.0 - 1.0;
66 
67  return w, h, normals, positions
68 
69 
70 
71 
72 
73 ################################################################################
74 # Generate queries: In this simple example we set one query per pixel
75 def initQueries(w,h):
76  nbQueries = w*h
77 
78  queries = numpy.zeros(2*nbQueries).astype(numpy.float32);
79  for j in range (0,h):
80  for i in range (0,w):
81  queries[2*(i + j*w)] = i
82  queries[2*(i + j*w)+1] = j
83  return nbQueries, queries
84 
85 
86 
87 
88 def main(argv):
89 
90 
91  scale = 10 # neighborhood size in pixels
92  imgNormPath = ''
93  imgwcPath = ''
94  quiet = False
95  outputPath = ''
96 
97 
98 
99  try:
100  opts, args = getopt.getopt(argv,"s:p:n:o:q",["scale=","positions=","normals=","output=","quiet"])
101  except getopt.GetoptError:
102  print 'ssgl.py -s <scale> -p <positions> -n <normals>'
103  sys.exit(2)
104  for opt, arg in opts:
105  if opt == '-h':
106  print 'ssgl.py -s <scale> -p <positions> -n <normals>'
107  sys.exit()
108  elif opt in ("-s", "--scale"):
109  scale = int(arg)
110  elif opt in ("-p", "--imgpath"):
111  imgwcPath = arg
112  elif opt in ("-n", "--normals"):
113  imgNormPath = arg
114  elif opt in ("-o", "--output"):
115  outputPath = arg
116  elif opt in ("-q", "--quiet"):
117  quiet = True
118 
119 
120 
121 
122 
123  startTimer("Load CUDA kernel ........... ", quiet)
124  module, glsKernel = loadKernel()
125  stopTimer(quiet)
126 
127  startTimer("Init Input data ............ ", quiet)
128  w, h, normals, positions = initInputData (imgNormPath, imgwcPath)
129  stopTimer(quiet)
130 
131  nbQueries = w*h
132 
133  startTimer("Init result array memory ... ", quiet)
134  result = numpy.zeros(nbQueries).astype(numpy.float32)
135  stopTimer(quiet)
136 
137 
138 
139  ################################################################################
140  # Launch kernel
141  blockSize = 32
142  gridWidth = w/blockSize
143  gridHeight = h/blockSize
144 
145  w = numpy.int32(w)
146  h = numpy.int32(h)
147  scale = numpy.int32(scale)
148 
149  startTimer("Launch kernel .............. ", quiet)
150  glsKernel(
151  drv.In(numpy.array([w, h, scale])),
152  drv.In(positions),
153  drv.In(normals),
154  drv.Out(result),
155  block = (blockSize, blockSize, 1),
156  grid = (gridWidth,gridHeight))
157 
158  stopTimer(quiet)
159 
160  ################################################################################
161  # Analyse results
162  whereAreNaNs = numpy.isnan(result);
163  result[whereAreNaNs] = 0;
164 
165  if not quiet:
166  print '\n###########Configuration#############'
167  print 'Image size: {} x {}'.format(w,h)
168  print 'NbQueries: {}'.format(nbQueries)
169  print 'Scale: {}'.format(scale)
170  print '################Results################'
171  print 'Max value: {}'.format(result.max())
172  print 'Min value: {}'.format(result.min())
173  print '#######################################'
174 
175  scaleFactor = max(abs(result.max()), abs(result.min())); #/ (2.0*scaleFactor) + 0.5)
176 
177  if outputPath == '':
178  plt.imshow(result.reshape(h,w), vmin=-scaleFactor, vmax=scaleFactor, cmap='seismic')
179  plt.show()
180  else:
181  plt.imsave(fname=outputPath, arr=result.reshape(h,w), vmin=-1.0, vmax=1.0, cmap='seismic')
182 
183 if __name__ == "__main__":
184  main(sys.argv[1:])
185