00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #ifdef HAVE_CONFIG_H
00022 #include <config.h>
00023 #endif
00024
00025 #if defined (HAVE_FFTW3)
00026
00027 #include <iostream>
00028 #include <vector>
00029
00030 #include "lo-error.h"
00031 #include "oct-fftw.h"
00032 #include "quit.h"
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052 class
00053 octave_fftw_planner
00054 {
00055 public:
00056
00057 octave_fftw_planner (void);
00058
00059 fftw_plan create_plan (int dir, const int rank, const dim_vector dims,
00060 int howmany, int stride, int dist,
00061 const Complex *in, Complex *out);
00062
00063 fftw_plan create_plan (const int rank, const dim_vector dims,
00064 int howmany, int stride, int dist,
00065 const double *in, Complex *out);
00066
00067 private:
00068
00069 int plan_flags;
00070
00071
00072
00073
00074 fftw_plan plan[2];
00075
00076
00077 int d[2];
00078
00079
00080 int s[2];
00081
00082
00083 int r[2];
00084
00085
00086 int h[2];
00087
00088
00089 dim_vector n[2];
00090
00091 bool simd_align[2];
00092 bool inplace[2];
00093
00094
00095 fftw_plan rplan;
00096
00097
00098 int rd;
00099
00100
00101 int rs;
00102
00103
00104 int rr;
00105
00106
00107 int rh;
00108
00109
00110 dim_vector rn;
00111
00112 bool rsimd_align;
00113 };
00114
00115 octave_fftw_planner::octave_fftw_planner (void)
00116 {
00117 plan_flags = FFTW_ESTIMATE;
00118
00119 plan[0] = plan[1] = 0;
00120 d[0] = d[1] = s[0] = s[1] = r[0] = r[1] = h[0] = h[1] = 0;
00121 simd_align[0] = simd_align[1] = false;
00122 inplace[0] = inplace[1] = false;
00123 n[0] = n[1] = dim_vector ();
00124
00125 rplan = 0;
00126 rd = rs = rr = rh = 0;
00127 rsimd_align = false;
00128 rn = dim_vector ();
00129
00130
00131 fftw_import_system_wisdom ();
00132 }
00133
00134 #define CHECK_SIMD_ALIGNMENT(x) \
00135 ((reinterpret_cast<ptrdiff_t> (x)) & 0xF == 0)
00136
00137 fftw_plan
00138 octave_fftw_planner::create_plan (int dir, const int rank,
00139 const dim_vector dims, int howmany,
00140 int stride, int dist,
00141 const Complex *in, Complex *out)
00142 {
00143 int which = (dir == FFTW_FORWARD) ? 0 : 1;
00144 fftw_plan *cur_plan_p = &plan[which];
00145 bool create_new_plan = false;
00146 bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out);
00147 bool ioinplace = (in == out);
00148
00149
00150
00151
00152
00153 if (plan[which] == 0 || d[which] != dist || s[which] != stride
00154 || r[which] != rank || h[which] != howmany
00155 || ioinplace != inplace[which]
00156 || ((ioalign != simd_align[which]) ? !ioalign : false))
00157 create_new_plan = true;
00158 else
00159 {
00160
00161
00162 for (int i = 0; i < rank; i++)
00163 if (dims(i) != n[which](i))
00164 {
00165 create_new_plan = true;
00166 break;
00167 }
00168 }
00169
00170 if (create_new_plan)
00171 {
00172 d[which] = dist;
00173 s[which] = stride;
00174 r[which] = rank;
00175 h[which] = howmany;
00176 simd_align[which] = ioalign;
00177 inplace[which] = ioinplace;
00178 n[which] = dims;
00179
00180 if (ioalign)
00181 plan_flags &= ~FFTW_UNALIGNED;
00182 else
00183 plan_flags |= FFTW_UNALIGNED;
00184
00185 if (*cur_plan_p)
00186 fftw_destroy_plan (*cur_plan_p);
00187
00188
00189
00190 OCTAVE_LOCAL_BUFFER (int, tmp, rank);
00191
00192 for (int i = 0, j = rank-1; i < rank; i++, j--)
00193 tmp[i] = dims(j);
00194
00195 *cur_plan_p =
00196 fftw_plan_many_dft (rank, tmp, howmany,
00197 reinterpret_cast<fftw_complex *> (const_cast<Complex *> (in)),
00198 0, stride, dist, reinterpret_cast<fftw_complex *> (out),
00199 0, stride, dist, dir, plan_flags);
00200
00201 if (*cur_plan_p == 0)
00202 (*current_liboctave_error_handler) ("Error creating fftw plan");
00203 }
00204
00205 return *cur_plan_p;
00206 }
00207
00208 fftw_plan
00209 octave_fftw_planner::create_plan (const int rank, const dim_vector dims,
00210 int howmany, int stride, int dist,
00211 const double *in, Complex *out)
00212 {
00213 fftw_plan *cur_plan_p = &rplan;
00214 bool create_new_plan = false;
00215 bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out);
00216
00217
00218
00219
00220
00221 if (rplan == 0 || rd != dist || rs != stride || rr != rank
00222 || rh != howmany || ((ioalign != rsimd_align) ? !ioalign : false))
00223 create_new_plan = true;
00224 else
00225 {
00226
00227
00228 for (int i = 0; i < rank; i++)
00229 if (dims(i) != rn(i))
00230 {
00231 create_new_plan = true;
00232 break;
00233 }
00234 }
00235
00236 if (create_new_plan)
00237 {
00238 rd = dist;
00239 rs = stride;
00240 rr = rank;
00241 rh = howmany;
00242 rsimd_align = ioalign;
00243 rn = dims;
00244
00245 if (ioalign)
00246 plan_flags &= ~FFTW_UNALIGNED;
00247 else
00248 plan_flags |= FFTW_UNALIGNED;
00249
00250 if (*cur_plan_p)
00251 fftw_destroy_plan (*cur_plan_p);
00252
00253
00254
00255 OCTAVE_LOCAL_BUFFER (int, tmp, rank);
00256
00257 for (int i = 0, j = rank-1; i < rank; i++, j--)
00258 tmp[i] = dims(j);
00259
00260 *cur_plan_p =
00261 fftw_plan_many_dft_r2c (rank, tmp, howmany,
00262 (const_cast<double *> (in)),
00263 0, stride, dist, reinterpret_cast<fftw_complex *> (out),
00264 0, stride, dist, plan_flags);
00265
00266 if (*cur_plan_p == 0)
00267 (*current_liboctave_error_handler) ("Error creating fftw plan");
00268 }
00269
00270 return *cur_plan_p;
00271 }
00272
00273 static octave_fftw_planner fftw_planner;
00274
00275 static inline void
00276 convert_packcomplex_1d (Complex *out, size_t nr, size_t nc,
00277 int stride, int dist)
00278 {
00279 OCTAVE_QUIT;
00280
00281
00282
00283 for (size_t i = 0; i < nr; i++)
00284 for (size_t j = nc/2+1; j < nc; j++)
00285 out[j*stride + i*dist] = conj(out[(nc - j)*stride + i*dist]);
00286
00287 OCTAVE_QUIT;
00288 }
00289
00290 static inline void
00291 convert_packcomplex_Nd (Complex *out, const dim_vector &dv)
00292 {
00293 size_t nc = dv(0);
00294 size_t nr = dv(1);
00295 size_t np = (dv.length () > 2 ? dv.numel () / nc / nr : 1);
00296 size_t nrp = nr * np;
00297 Complex *ptr1, *ptr2;
00298
00299 OCTAVE_QUIT;
00300
00301
00302
00303 for (size_t i = 0; i < nrp; i++)
00304 {
00305 ptr1 = out + i * (nc/2 + 1) + nrp*((nc-1)/2);
00306 ptr2 = out + i * nc;
00307 for (size_t j = 0; j < nc/2+1; j++)
00308 *ptr2++ = *ptr1++;
00309 }
00310
00311 OCTAVE_QUIT;
00312
00313
00314
00315 for (size_t i = 0; i < np; i++)
00316 {
00317 for (size_t j = 1; j < nr; j++)
00318 for (size_t k = nc/2+1; k < nc; k++)
00319 out[k + (j + i*nr)*nc] = conj(out[nc - k + ((i+1)*nr - j)*nc]);
00320
00321 for (size_t j = nc/2+1; j < nc; j++)
00322 out[j + i*nr*nc] = conj(out[(i*nr+1)*nc - j]);
00323 }
00324
00325 OCTAVE_QUIT;
00326
00327
00328
00329 size_t jstart = dv(0) * dv(1);
00330 size_t kstep = dv(0);
00331 size_t nel = dv.numel ();
00332
00333 for (int inner = 2; inner < dv.length (); inner++)
00334 {
00335 size_t jmax = jstart * dv(inner);
00336 for (size_t i = 0; i < nel; i+=jmax)
00337 for (size_t j = jstart, jj = jmax-jstart; j < jj;
00338 j+=jstart, jj-=jstart)
00339 for (size_t k = 0; k < jstart; k+= kstep)
00340 for (size_t l = nc/2+1; l < nc; l++)
00341 {
00342 Complex tmp = out[i+ j + k + l];
00343 out[i + j + k + l] = out[i + jj + k + l];
00344 out[i + jj + k + l] = tmp;
00345 }
00346 jstart = jmax;
00347 }
00348
00349 OCTAVE_QUIT;
00350 }
00351
00352 int
00353 octave_fftw::fft (const double *in, Complex *out, size_t npts,
00354 size_t nsamples, int stride, int dist)
00355 {
00356 dist = (dist < 0 ? npts : dist);
00357
00358 dim_vector dv (npts);
00359 fftw_plan plan = fftw_planner.create_plan (1, dv, nsamples, stride, dist,
00360 in, out);
00361
00362 fftw_execute_dft_r2c (plan, (const_cast<double *>(in)),
00363 reinterpret_cast<fftw_complex *> (out));
00364
00365
00366
00367 convert_packcomplex_1d (out, nsamples, npts, stride, dist);
00368
00369 return 0;
00370 }
00371
00372 int
00373 octave_fftw::fft (const Complex *in, Complex *out, size_t npts,
00374 size_t nsamples, int stride, int dist)
00375 {
00376 dist = (dist < 0 ? npts : dist);
00377
00378 dim_vector dv (npts);
00379 fftw_plan plan = fftw_planner.create_plan (FFTW_FORWARD, 1, dv, nsamples,
00380 stride, dist, in, out);
00381
00382 fftw_execute_dft (plan,
00383 reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)),
00384 reinterpret_cast<fftw_complex *> (out));
00385
00386 return 0;
00387 }
00388
00389 int
00390 octave_fftw::ifft (const Complex *in, Complex *out, size_t npts,
00391 size_t nsamples, int stride, int dist)
00392 {
00393 dist = (dist < 0 ? npts : dist);
00394
00395 dim_vector dv (npts);
00396 fftw_plan plan = fftw_planner.create_plan (FFTW_BACKWARD, 1, dv, nsamples,
00397 stride, dist, in, out);
00398
00399 fftw_execute_dft (plan,
00400 reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)),
00401 reinterpret_cast<fftw_complex *> (out));
00402
00403 const Complex scale = npts;
00404 for (size_t j = 0; j < nsamples; j++)
00405 for (size_t i = 0; i < npts; i++)
00406 out[i*stride + j*dist] /= scale;
00407
00408 return 0;
00409 }
00410
00411 int
00412 octave_fftw::fftNd (const double *in, Complex *out, const int rank,
00413 const dim_vector &dv)
00414 {
00415 int dist = 1;
00416 for (int i = 0; i < rank; i++)
00417 dist *= dv(i);
00418
00419
00420
00421
00422 int offset = (dv.numel () / dv(0)) * ((dv(0) - 1) / 2);
00423
00424 fftw_plan plan = fftw_planner.create_plan (rank, dv, 1, 1, dist,
00425 in, out + offset);
00426
00427 fftw_execute_dft_r2c (plan, (const_cast<double *>(in)),
00428 reinterpret_cast<fftw_complex *> (out+ offset));
00429
00430
00431
00432 convert_packcomplex_Nd (out, dv);
00433
00434 return 0;
00435 }
00436
00437 int
00438 octave_fftw::fftNd (const Complex *in, Complex *out, const int rank,
00439 const dim_vector &dv)
00440 {
00441 int dist = 1;
00442 for (int i = 0; i < rank; i++)
00443 dist *= dv(i);
00444
00445 fftw_plan plan = fftw_planner.create_plan (FFTW_FORWARD, rank, dv, 1, 1,
00446 dist, in, out);
00447
00448 fftw_execute_dft (plan,
00449 reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)),
00450 reinterpret_cast<fftw_complex *> (out));
00451
00452 return 0;
00453 }
00454
00455 int
00456 octave_fftw::ifftNd (const Complex *in, Complex *out, const int rank,
00457 const dim_vector &dv)
00458 {
00459 int dist = 1;
00460 for (int i = 0; i < rank; i++)
00461 dist *= dv(i);
00462
00463 fftw_plan plan = fftw_planner.create_plan (FFTW_BACKWARD, rank, dv, 1, 1,
00464 dist, in, out);
00465
00466 fftw_execute_dft (plan,
00467 reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)),
00468 reinterpret_cast<fftw_complex *> (out));
00469
00470 const size_t npts = dv.numel ();
00471 const Complex scale = npts;
00472 for (size_t i = 0; i < npts; i++)
00473 out[i] /= scale;
00474
00475 return 0;
00476 }
00477
00478 #endif
00479
00480
00481
00482
00483
00484
00485