Ptex
PtexSeparableKernel.cpp
Go to the documentation of this file.
1 /*
2 PTEX SOFTWARE
3 Copyright 2014 Disney Enterprises, Inc. All rights reserved
4 
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
7 met:
8 
9  * Redistributions of source code must retain the above copyright
10  notice, this list of conditions and the following disclaimer.
11 
12  * Redistributions in binary form must reproduce the above copyright
13  notice, this list of conditions and the following disclaimer in
14  the documentation and/or other materials provided with the
15  distribution.
16 
17  * The names "Disney", "Walt Disney Pictures", "Walt Disney Animation
18  Studios" or the names of its contributors may NOT be used to
19  endorse or promote products derived from this software without
20  specific prior written permission from Walt Disney Pictures.
21 
22 Disclaimer: THIS SOFTWARE IS PROVIDED BY WALT DISNEY PICTURES AND
23 CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
24 BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS
25 FOR A PARTICULAR PURPOSE, NONINFRINGEMENT AND TITLE ARE DISCLAIMED.
26 IN NO EVENT SHALL WALT DISNEY PICTURES, THE COPYRIGHT HOLDER OR
27 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
28 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
29 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND BASED ON ANY
31 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
34 */
35 #include "PtexPlatform.h"
36 #include "PtexUtils.h"
37 #include "PtexHalf.h"
38 #include "PtexSeparableKernel.h"
39 
41 
42 namespace {
43  // apply to 1..4 channels (unrolled channel loop) of packed data (nTxChan==nChan)
44  template<class T, int nChan>
45  void Apply(PtexSeparableKernel& k, float* result, void* data, int /*nChan*/, int /*nTxChan*/)
46  {
47  float* rowResult = (float*) alloca(nChan*sizeof(float));
48  int rowlen = k.res.u() * nChan;
49  int datalen = k.uw * nChan;
50  int rowskip = rowlen - datalen;
51  float* kvp = k.kv;
52  T* p = static_cast<T*>(data) + (k.v * k.res.u() + k.u) * nChan;
53  T* pEnd = p + k.vw * rowlen;
54  while (p != pEnd)
55  {
56  float* kup = k.ku;
57  T* pRowEnd = p + datalen;
58  // just mult and copy first element
59  PtexUtils::VecMult<T,nChan>()(rowResult, p, *kup++);
60  p += nChan;
61  // accumulate remaining elements
62  while (p != pRowEnd) {
63  // rowResult[i] = p[i] * ku[u] for i in {0..n-1}
64  PtexUtils::VecAccum<T,nChan>()(rowResult, p, *kup++);
65  p += nChan;
66  }
67  // result[i] += rowResult[i] * kv[v] for i in {0..n-1}
68  PtexUtils::VecAccum<float,nChan>()(result, rowResult, *kvp++);
69  p += rowskip;
70  }
71  }
72 
73  // apply to 1..4 channels (unrolled channel loop) w/ pixel stride
74  template<class T, int nChan>
75  void ApplyS(PtexSeparableKernel& k, float* result, void* data, int /*nChan*/, int nTxChan)
76  {
77  float* rowResult = (float*) alloca(nChan*sizeof(float));
78  int rowlen = k.res.u() * nTxChan;
79  int datalen = k.uw * nTxChan;
80  int rowskip = rowlen - datalen;
81  float* kvp = k.kv;
82  T* p = static_cast<T*>(data) + (k.v * k.res.u() + k.u) * nTxChan;
83  T* pEnd = p + k.vw * rowlen;
84  while (p != pEnd)
85  {
86  float* kup = k.ku;
87  T* pRowEnd = p + datalen;
88  // just mult and copy first element
89  PtexUtils::VecMult<T,nChan>()(rowResult, p, *kup++);
90  p += nTxChan;
91  // accumulate remaining elements
92  while (p != pRowEnd) {
93  // rowResult[i] = p[i] * ku[u] for i in {0..n-1}
94  PtexUtils::VecAccum<T,nChan>()(rowResult, p, *kup++);
95  p += nTxChan;
96  }
97  // result[i] += rowResult[i] * kv[v] for i in {0..n-1}
98  PtexUtils::VecAccum<float,nChan>()(result, rowResult, *kvp++);
99  p += rowskip;
100  }
101  }
102 
103  // apply to N channels (general case)
104  template<class T>
105  void ApplyN(PtexSeparableKernel& k, float* result, void* data, int nChan, int nTxChan)
106  {
107  float* rowResult = (float*) alloca(nChan*sizeof(float));
108  int rowlen = k.res.u() * nTxChan;
109  int datalen = k.uw * nTxChan;
110  int rowskip = rowlen - datalen;
111  float* kvp = k.kv;
112  T* p = static_cast<T*>(data) + (k.v * k.res.u() + k.u) * nTxChan;
113  T* pEnd = p + k.vw * rowlen;
114  while (p != pEnd)
115  {
116  float* kup = k.ku;
117  T* pRowEnd = p + datalen;
118  // just mult and copy first element
119  PtexUtils::VecMultN<T>()(rowResult, p, nChan, *kup++);
120  p += nTxChan;
121  // accumulate remaining elements
122  while (p != pRowEnd) {
123  // rowResult[i] = p[i] * ku[u] for i in {0..n-1}
124  PtexUtils::VecAccumN<T>()(rowResult, p, nChan, *kup++);
125  p += nTxChan;
126  }
127  // result[i] += rowResult[i] * kv[v] for i in {0..n-1}
128  PtexUtils::VecAccumN<float>()(result, rowResult, nChan, *kvp++);
129  p += rowskip;
130  }
131  }
132 }
133 
134 
135 
138  // nChan == nTxChan
139  ApplyN<uint8_t>, ApplyN<uint16_t>, ApplyN<PtexHalf>, ApplyN<float>,
140  Apply<uint8_t,1>, Apply<uint16_t,1>, Apply<PtexHalf,1>, Apply<float,1>,
141  Apply<uint8_t,2>, Apply<uint16_t,2>, Apply<PtexHalf,2>, Apply<float,2>,
142  Apply<uint8_t,3>, Apply<uint16_t,3>, Apply<PtexHalf,3>, Apply<float,3>,
143  Apply<uint8_t,4>, Apply<uint16_t,4>, Apply<PtexHalf,4>, Apply<float,4>,
144 
145  // nChan != nTxChan (need pixel stride)
146  ApplyN<uint8_t>, ApplyN<uint16_t>, ApplyN<PtexHalf>, ApplyN<float>,
147  ApplyS<uint8_t,1>, ApplyS<uint16_t,1>, ApplyS<PtexHalf,1>, ApplyS<float,1>,
148  ApplyS<uint8_t,2>, ApplyS<uint16_t,2>, ApplyS<PtexHalf,2>, ApplyS<float,2>,
149  ApplyS<uint8_t,3>, ApplyS<uint16_t,3>, ApplyS<PtexHalf,3>, ApplyS<float,3>,
150  ApplyS<uint8_t,4>, ApplyS<uint16_t,4>, ApplyS<PtexHalf,4>, ApplyS<float,4>,
151 };
152 
PtexUtils::VecMultN
Definition: PtexUtils.h:237
PtexSeparableKernel::v
int v
Definition: PtexSeparableKernel.h:51
PtexSeparableKernel::uw
int uw
Definition: PtexSeparableKernel.h:52
PtexSeparableKernel::applyFunctions
static ApplyFn applyFunctions[40]
Definition: PtexSeparableKernel.h:472
PTEX_NAMESPACE_END
#define PTEX_NAMESPACE_END
Definition: PtexVersion.h:62
PtexUtils.h
PtexUtils::VecAccum
Definition: PtexUtils.h:200
PtexSeparableKernel
Definition: PtexSeparableKernel.h:48
PtexUtils::VecMult
Definition: PtexUtils.h:223
PtexSeparableKernel::vw
int vw
Definition: PtexSeparableKernel.h:52
PTEX_NAMESPACE_BEGIN::ApplyN
void ApplyN(PtexSeparableKernel &k, float *result, void *data, int nChan, int nTxChan)
Definition: PtexSeparableKernel.cpp:105
PtexSeparableKernel::ku
float * ku
Definition: PtexSeparableKernel.h:53
PTEX_NAMESPACE_BEGIN::ApplyS
void ApplyS(PtexSeparableKernel &k, float *result, void *data, int, int nTxChan)
Definition: PtexSeparableKernel.cpp:75
PTEX_NAMESPACE_BEGIN::Apply
void Apply(PtexSeparableKernel &k, float *result, void *data, int, int)
Definition: PtexSeparableKernel.cpp:45
PTEX_NAMESPACE_BEGIN
Definition: PtexSeparableKernel.cpp:42
PtexUtils::VecAccumN
Definition: PtexUtils.h:214
PtexSeparableKernel.h
PtexSeparableKernel::res
Res res
Definition: PtexSeparableKernel.h:50
PtexSeparableKernel::u
int u
Definition: PtexSeparableKernel.h:51
PtexSeparableKernel::ApplyFn
void(* ApplyFn)(PtexSeparableKernel &k, float *dst, void *data, int nChan, int nTxChan)
Definition: PtexSeparableKernel.h:470
PtexSeparableKernel::kv
float * kv
Definition: PtexSeparableKernel.h:54
PtexPlatform.h
Platform-specific classes, functions, and includes.
PtexHalf.h
Half-precision floating-point type.