メインページ   クラス階層   構成   ファイル一覧   構成メンバ   ファイルメンバ  

oct-fftw.cc

解説を見る。
00001 /*
00002 
00003 This file is part of Octave.
00004 
00005 Octave is free software; you can redistribute it and/or modify it
00006 under the terms of the GNU General Public License as published by the
00007 Free Software Foundation; either version 2, or (at your option) any
00008 later version.
00009 
00010 Octave is distributed in the hope that it will be useful, but WITHOUT
00011 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
00012 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
00013 for more details.
00014 
00015 You should have received a copy of the GNU General Public License
00016 along with Octave; see the file COPYING.  If not, write to the Free
00017 Software Foundation, 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
00018 
00019 */
00020 
00021 #ifdef HAVE_CONFIG_H
00022 #include <config.h>
00023 #endif
00024 
00025 #if defined (HAVE_FFTW3)
00026 
00027 #include <iostream>
00028 #include <vector>
00029 
00030 #include "lo-error.h"
00031 #include "oct-fftw.h"
00032 #include "quit.h"
00033 
00034 // Helper class to create and cache fftw plans for both 1d and
00035 // 2d. This implementation uses FFTW_ESTIMATE to create the plans,
00036 // which in theory is suboptimal, but provides quite reasonable
00037 // performance.
00038 
00039 // Also note that if FFTW_ESTIMATE is not used the planner in FFTW3
00040 // destroys the input and output arrays. So with the form of the
00041 // current code we definitely want FFTW_ESTIMATE!! However, we use any
00042 // wsidom that is available, either in a FFTW3 system wide file or as
00043 // supplied by the user.
00044 
00045 // XXX FIXME XXX -- if we can ensure 16 byte alignment in Array<T>
00046 // (<T> *data) the FFTW3 can use SIMD instructions for further
00047 // acceleration.
00048 
00049 // Note that it is profitable to store the FFTW3 plans, for small
00050 // ffts.
00051 
00052 class
00053 octave_fftw_planner
00054 {
00055 public:
00056 
00057   octave_fftw_planner (void);
00058 
00059   fftw_plan create_plan (int dir, const int rank, const dim_vector dims, 
00060                          int howmany, int stride, int dist, 
00061                          const Complex *in, Complex *out);
00062 
00063   fftw_plan create_plan (const int rank, const dim_vector dims, 
00064                          int howmany, int stride, int dist, 
00065                          const double *in, Complex *out);
00066 
00067 private:
00068 
00069   int plan_flags;
00070 
00071   // XXX FIXME XXX -- perhaps this should be split into two classes?
00072 
00073   // Plan for fft and ifft of complex values
00074   fftw_plan plan[2];
00075 
00076   // dist
00077   int d[2];
00078 
00079   // stride
00080   int s[2];
00081 
00082   // rank
00083   int r[2];
00084 
00085   // howmany
00086   int h[2];
00087 
00088   // dims
00089   dim_vector n[2];
00090 
00091   bool simd_align[2];
00092   bool inplace[2];
00093 
00094   // Plan for fft of real values
00095   fftw_plan rplan;
00096 
00097   // dist
00098   int rd;
00099 
00100   // stride
00101   int rs;
00102 
00103   // rank
00104   int rr;
00105 
00106   // howmany
00107   int rh;
00108 
00109   // dims
00110   dim_vector rn;
00111 
00112   bool rsimd_align;
00113 };
00114 
00115 octave_fftw_planner::octave_fftw_planner (void)
00116 {
00117   plan_flags = FFTW_ESTIMATE;
00118 
00119   plan[0] = plan[1] = 0;
00120   d[0] = d[1] = s[0] = s[1] = r[0] = r[1] = h[0] = h[1] = 0;
00121   simd_align[0] = simd_align[1] = false;
00122   inplace[0] = inplace[1] = false;
00123   n[0] = n[1] = dim_vector ();
00124 
00125   rplan = 0;
00126   rd = rs = rr = rh = 0;
00127   rsimd_align = false;
00128   rn = dim_vector ();
00129   
00130   // If we have a system wide wisdom file, import it.
00131   fftw_import_system_wisdom ();
00132 }
00133 
00134 #define CHECK_SIMD_ALIGNMENT(x) \
00135   ((reinterpret_cast<ptrdiff_t> (x)) & 0xF == 0)
00136 
00137 fftw_plan
00138 octave_fftw_planner::create_plan (int dir, const int rank,
00139                                   const dim_vector dims, int howmany,
00140                                   int stride, int dist, 
00141                                   const Complex *in, Complex *out)
00142 {
00143   int which = (dir == FFTW_FORWARD) ? 0 : 1;
00144   fftw_plan *cur_plan_p = &plan[which];
00145   bool create_new_plan = false;
00146   bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out);
00147   bool ioinplace = (in == out);
00148 
00149   // Don't create a new plan if we have a non SIMD plan already but
00150   // can do SIMD.  This prevents endlessly recreating plans if we
00151   // change the alignment.
00152 
00153   if (plan[which] == 0 || d[which] != dist || s[which] != stride
00154       || r[which] != rank || h[which] != howmany 
00155       || ioinplace != inplace[which]
00156       || ((ioalign != simd_align[which]) ? !ioalign : false))
00157     create_new_plan = true;
00158   else
00159     {
00160       // We still might not have the same shape of array.
00161 
00162       for (int i = 0; i < rank; i++)
00163         if (dims(i) != n[which](i))
00164           {
00165             create_new_plan = true;
00166             break;
00167           }
00168     }
00169 
00170   if (create_new_plan)
00171     {
00172       d[which] = dist;
00173       s[which] = stride;
00174       r[which] = rank;
00175       h[which] = howmany;
00176       simd_align[which] = ioalign;
00177       inplace[which] = ioinplace;
00178       n[which] = dims;
00179 
00180       if (ioalign)
00181         plan_flags &= ~FFTW_UNALIGNED;
00182       else
00183         plan_flags |= FFTW_UNALIGNED;
00184 
00185       if (*cur_plan_p)
00186         fftw_destroy_plan (*cur_plan_p);
00187 
00188       // Note reversal of dimensions for column major storage in FFTW.
00189 
00190       OCTAVE_LOCAL_BUFFER (int, tmp, rank);
00191 
00192       for (int i = 0, j = rank-1; i < rank; i++, j--)
00193         tmp[i] = dims(j);
00194 
00195       *cur_plan_p =
00196         fftw_plan_many_dft (rank, tmp, howmany,
00197               reinterpret_cast<fftw_complex *> (const_cast<Complex *> (in)),
00198               0, stride, dist, reinterpret_cast<fftw_complex *> (out),
00199               0, stride, dist, dir, plan_flags);
00200 
00201       if (*cur_plan_p == 0)
00202         (*current_liboctave_error_handler) ("Error creating fftw plan");
00203     }
00204 
00205   return *cur_plan_p;
00206 }
00207  
00208 fftw_plan
00209 octave_fftw_planner::create_plan (const int rank, const dim_vector dims, 
00210                                   int howmany, int stride, int dist, 
00211                                   const double *in, Complex *out)
00212 {
00213   fftw_plan *cur_plan_p = &rplan;
00214   bool create_new_plan = false;
00215   bool ioalign = CHECK_SIMD_ALIGNMENT (in) && CHECK_SIMD_ALIGNMENT (out);
00216 
00217   // Don't create a new plan if we have a non SIMD plan already but
00218   // can do SIMD.  This prevents endlessly recreating plans if we
00219   // change the alignment.
00220 
00221   if (rplan == 0 || rd != dist || rs != stride || rr != rank
00222       || rh != howmany || ((ioalign != rsimd_align) ? !ioalign : false))
00223     create_new_plan = true;
00224   else
00225     {
00226       // We still might not have the same shape of array.
00227 
00228       for (int i = 0; i < rank; i++)
00229         if (dims(i) != rn(i))
00230           {
00231             create_new_plan = true;
00232             break;
00233           }
00234     }
00235 
00236   if (create_new_plan)
00237     {
00238       rd = dist;
00239       rs = stride;
00240       rr = rank;
00241       rh = howmany;
00242       rsimd_align = ioalign;
00243       rn = dims;
00244 
00245       if (ioalign)
00246         plan_flags &= ~FFTW_UNALIGNED;
00247       else
00248         plan_flags |= FFTW_UNALIGNED;
00249 
00250       if (*cur_plan_p)
00251         fftw_destroy_plan (*cur_plan_p);
00252 
00253       // Note reversal of dimensions for column major storage in FFTW.
00254 
00255       OCTAVE_LOCAL_BUFFER (int, tmp, rank);
00256 
00257       for (int i = 0, j = rank-1; i < rank; i++, j--)
00258         tmp[i] = dims(j);
00259 
00260       *cur_plan_p =
00261         fftw_plan_many_dft_r2c (rank, tmp, howmany,
00262               (const_cast<double *> (in)),
00263               0, stride, dist, reinterpret_cast<fftw_complex *> (out),
00264               0, stride, dist, plan_flags);
00265 
00266       if (*cur_plan_p == 0)
00267         (*current_liboctave_error_handler) ("Error creating fftw plan");
00268     }
00269 
00270   return *cur_plan_p;
00271 }
00272 
00273 static octave_fftw_planner fftw_planner;
00274 
00275 static inline void
00276 convert_packcomplex_1d (Complex *out, size_t nr, size_t nc,
00277                         int stride, int dist)
00278 {
00279   OCTAVE_QUIT;
00280 
00281   // Fill in the missing data.
00282 
00283   for (size_t i = 0; i < nr; i++)
00284     for (size_t j = nc/2+1; j < nc; j++)
00285       out[j*stride + i*dist] = conj(out[(nc - j)*stride + i*dist]);
00286 
00287   OCTAVE_QUIT;
00288 }
00289 
00290 static inline void
00291 convert_packcomplex_Nd (Complex *out, const dim_vector &dv)
00292 {
00293   size_t nc = dv(0);
00294   size_t nr = dv(1);
00295   size_t np = (dv.length () > 2 ? dv.numel () / nc / nr : 1);
00296   size_t nrp = nr * np;
00297   Complex *ptr1, *ptr2;
00298 
00299   OCTAVE_QUIT;
00300 
00301   // Create space for the missing elements.
00302 
00303   for (size_t i = 0; i < nrp; i++)
00304     {
00305       ptr1 = out + i * (nc/2 + 1) + nrp*((nc-1)/2);
00306       ptr2 = out + i * nc;
00307       for (size_t j = 0; j < nc/2+1; j++)
00308         *ptr2++ = *ptr1++;
00309     }
00310 
00311   OCTAVE_QUIT;
00312 
00313   // Fill in the missing data for the rank = 2 case directly for speed.
00314 
00315   for (size_t i = 0; i < np; i++)
00316     {
00317       for (size_t j = 1; j < nr; j++)
00318         for (size_t k = nc/2+1; k < nc; k++)
00319           out[k + (j + i*nr)*nc] = conj(out[nc - k + ((i+1)*nr - j)*nc]);
00320 
00321       for (size_t j = nc/2+1; j < nc; j++)
00322         out[j + i*nr*nc] = conj(out[(i*nr+1)*nc - j]);
00323     }
00324 
00325   OCTAVE_QUIT;
00326 
00327   // Now do the permutations needed for rank > 2 cases.
00328 
00329   size_t jstart = dv(0) * dv(1);
00330   size_t kstep = dv(0);
00331   size_t nel = dv.numel ();
00332 
00333   for (int inner = 2; inner < dv.length (); inner++) 
00334     {
00335       size_t jmax = jstart * dv(inner);
00336       for (size_t i = 0; i < nel; i+=jmax)
00337         for (size_t j = jstart, jj = jmax-jstart; j < jj; 
00338              j+=jstart, jj-=jstart)
00339           for (size_t k = 0; k < jstart; k+= kstep)
00340             for (size_t l = nc/2+1; l < nc; l++)
00341               {
00342                 Complex tmp = out[i+ j + k + l];
00343                 out[i + j + k + l] =  out[i + jj + k + l];
00344                 out[i + jj + k + l] = tmp;
00345               }
00346       jstart = jmax;
00347     }
00348 
00349   OCTAVE_QUIT;
00350 }
00351 
00352 int
00353 octave_fftw::fft (const double *in, Complex *out, size_t npts, 
00354                   size_t nsamples, int stride, int dist)
00355 {
00356   dist = (dist < 0 ? npts : dist);
00357 
00358   dim_vector dv (npts);
00359   fftw_plan plan = fftw_planner.create_plan (1, dv, nsamples, stride, dist,
00360                                              in, out);
00361 
00362   fftw_execute_dft_r2c (plan, (const_cast<double *>(in)),
00363                         reinterpret_cast<fftw_complex *> (out));
00364 
00365   // Need to create other half of the transform.
00366 
00367   convert_packcomplex_1d (out, nsamples, npts, stride, dist);
00368 
00369   return 0;
00370 }
00371 
00372 int
00373 octave_fftw::fft (const Complex *in, Complex *out, size_t npts, 
00374                   size_t nsamples, int stride, int dist)
00375 {
00376   dist = (dist < 0 ? npts : dist);
00377 
00378   dim_vector dv (npts);
00379   fftw_plan plan = fftw_planner.create_plan (FFTW_FORWARD, 1, dv, nsamples,
00380                                              stride, dist, in, out);
00381 
00382   fftw_execute_dft (plan, 
00383         reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)),
00384         reinterpret_cast<fftw_complex *> (out));
00385 
00386   return 0;
00387 }
00388 
00389 int
00390 octave_fftw::ifft (const Complex *in, Complex *out, size_t npts, 
00391                    size_t nsamples, int stride, int dist)
00392 {
00393   dist = (dist < 0 ? npts : dist);
00394 
00395   dim_vector dv (npts);
00396   fftw_plan plan = fftw_planner.create_plan (FFTW_BACKWARD, 1, dv, nsamples,
00397                                              stride, dist, in, out);
00398 
00399   fftw_execute_dft (plan, 
00400         reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)),
00401         reinterpret_cast<fftw_complex *> (out));
00402 
00403   const Complex scale = npts;
00404   for (size_t j = 0; j < nsamples; j++)
00405     for (size_t i = 0; i < npts; i++)
00406       out[i*stride + j*dist] /= scale;
00407 
00408   return 0;
00409 }
00410 
00411 int
00412 octave_fftw::fftNd (const double *in, Complex *out, const int rank, 
00413                     const dim_vector &dv)
00414 {
00415   int dist = 1;
00416   for (int i = 0; i < rank; i++)
00417     dist *= dv(i);
00418 
00419   // Fool with the position of the start of the output matrix, so that
00420   // creating other half of the matrix won't cause cache problems.
00421 
00422   int offset = (dv.numel () / dv(0)) * ((dv(0) - 1) / 2); 
00423   
00424   fftw_plan plan = fftw_planner.create_plan (rank, dv, 1, 1, dist,
00425                                              in, out + offset);
00426 
00427   fftw_execute_dft_r2c (plan, (const_cast<double *>(in)),
00428                         reinterpret_cast<fftw_complex *> (out+ offset));
00429 
00430   // Need to create other half of the transform.
00431 
00432   convert_packcomplex_Nd (out, dv);
00433 
00434   return 0;
00435 }
00436 
00437 int
00438 octave_fftw::fftNd (const Complex *in, Complex *out, const int rank, 
00439                     const dim_vector &dv)
00440 {
00441   int dist = 1;
00442   for (int i = 0; i < rank; i++)
00443     dist *= dv(i);
00444 
00445   fftw_plan plan = fftw_planner.create_plan (FFTW_FORWARD, rank, dv, 1, 1,
00446                                              dist, in, out);
00447 
00448   fftw_execute_dft (plan, 
00449         reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)),
00450         reinterpret_cast<fftw_complex *> (out));
00451 
00452   return 0;
00453 }
00454 
00455 int
00456 octave_fftw::ifftNd (const Complex *in, Complex *out, const int rank, 
00457                      const dim_vector &dv)
00458 {
00459   int dist = 1;
00460   for (int i = 0; i < rank; i++)
00461     dist *= dv(i);
00462 
00463   fftw_plan plan = fftw_planner.create_plan (FFTW_BACKWARD, rank, dv, 1, 1,
00464                                              dist, in, out);
00465 
00466   fftw_execute_dft (plan, 
00467         reinterpret_cast<fftw_complex *> (const_cast<Complex *>(in)),
00468         reinterpret_cast<fftw_complex *> (out));
00469 
00470   const size_t npts = dv.numel ();
00471   const Complex scale = npts;
00472   for (size_t i = 0; i < npts; i++)
00473     out[i] /= scale;
00474 
00475   return 0;
00476 }
00477 
00478 #endif
00479 
00480 /*
00481 ;;; Local Variables: ***
00482 ;;; mode: C++ ***
00483 ;;; End: ***
00484 */
00485 

Wed Dec 29 11:52:07 2004に生成されました。 doxygen1.2.18
SEO [PR] 爆速!無料ブログ 無料ホームページ開設 無料ライブ放送