Skip to content

Mathematica, 2.5 hours. Matlab, 47 seconds. Not impressed, WRI, not impressed.

The code is more or less identical (Mathematica version is necessarily more verbose, because, well, the syntax is more verbose. Unless you are drawing a clock applet.)

By the way, since people or than me might be looking at this code, here’s a brief synopsis of what it does: we are trying to optimize the transmission function of a certain planar structure over two parameters.  The cost function we chose to use in this optimization is \int | F^{-1}\{T(k)\} | /\mathrm{max}[|F^{-1}\{T(k)\}|]dx, where T(k) depends on the optimization parameters.  So the code simply iterates through a 2D matrix of parameters computing the cost function integral and recording the result.

Mathematica: Matlab:
(* Fourier transform function definitions: *)

FftShift1D[x_] :=
 Module[{n = Ceiling[Length[x]/2]},
  Join[Take[x, n - Length[x]],
   Take[x, n]]]; (* operates just like matlab fftshift function *)
(* wrapper over the fft function.  Return values are in a replacement \
list, so usage is as follows:
{a,b,c,d}={transform,kpoints,klim,smpRateK} \
/.PerformDft1[foo,xlim,samprate]
*)
PerformDft1[func_, xLim_, smpRateX_] :=
 Module[{kLim, step, kstep, Nw, data, dataTransform, xvec, kvec},
  step = 1/smpRateX;
  Nw = 2*xLim/step; kLim = smpRateX; kstep = 1/(2*xLim);
  xvec = Table[x, {x, -xLim, xLim - step, step}];
  kvec = Table[k, {k, -kLim/2, kLim/2 - kstep, kstep}];
  data = FftShift1D[func /@ xvec];
  dataTransform =
   smpRateX^-1*Fourier[data, FourierParameters -> {1, -1}];
  {transform -> dataTransform, kpoints -> kvec, klim -> kLim,
   smpRateK -> 1/kstep}
  ]

TESlabTransmissionNice[kx_, mu11_, d_] :=
 Module[{m0 = 1, mu1, kz0, kz1, epar = 1, mperp = 1, eps = 10^-20},
  m0 = m0 + I*eps; mu1 = mu11 + I*eps; epar = epar + I*eps;
  kz0 = Sqrt[m0 - (kx^2) ]; kz1 = Sqrt[mu1 (epar - kx^2/mperp)];
  1 / (Cos[d kz1] -
     I/2 ((mu1 kz0)/(m0 kz1) + (m0 kz1)/(mu1 kz0)) Sin[d kz1])
  ]

MagneticSlabTransmission[kx_, mupp_, d_] :=
  Module[{mu1 = -1 + I*mupp, h = 0.5},
   Exp[-2 Pi 2 h Sqrt[kx^2 - 1]] TESlabTransmissionNice[kx, mu1,
     2 Pi d]];

CostFunctionIntegrand[fn_, print_: False] :=
 Module[{xLim = 100, xSmpRate = 100, xform, kvec, srk},
  {xform, kvec, srk} = {transform, kpoints, smpRateK} /.
    PerformDft1[fn, xLim, xSmpRate];
  xform = FftShift1D@xform;
  If[print,
   Print[ListPlot[Re[Transpose[{kvec, xform}]],
     PlotRange -> {{-1, 1}, All}]];
   Print["k sampling rate: ", srk];
   ];
  Transpose[{kvec, Abs[xform]/Max[Abs[xform]]}]
  ]

ComputeCostFunction[fn_] := Module[{integrand, min, max},
  integrand = CostFunctionIntegrand[fn];
  min = Min[integrand[[All, 1]]];
  max = Max[integrand[[All, 1]]];
  Integrate[
   Interpolation[integrand, InterpolationOrder -> 2][x], {x, min, max}]
  ]

ComputeCostAndOutput[d_, mpp_,
  fname_] := PutAppend[{d, mpp,
    ComputeCostFunction[MagneticSlabTransmission[#, mpp, d] &]},fname];
(* can be read in using perl -pe 's/{(.+)}$/$1/' to strip the braces,
(nb: here + should really be *, just can't easily put it within mma
comment); then: Import["param_optim_numerics\\par_opt1.dat", "CSV"] *)

Map[((ComputeCostAndOutput[#1, #2, "costfn1.dat"] &) @@ #) &,
 Table[{d, mpp}, {d, 0.05, 1, 0.05}, {mpp, 0.05, 1, 0.05}], {2}]
%% ComputeCostAndOutput.m
%
outfile = fopen('par_opt1_matlab.dat','w');
for d=0.05:0.05:1
    for mpp=0.05:0.05:1
        integrand = CostFunctionIntegrand(...
            @(x)(MagneticSlabTransmission(x,mpp,d)));
        costfn=trapz(integrand.kvec,integrand.int);
        fprintf(outfile,'%f,%f,%f\n',d,mpp,costfn);
    end
end
fclose(outfile);

% CostFunctionIntegrand.m
function out=CostFunctionIntegrand(fh)
    xLim=100; xSmpRate=100;
    dftStruct = PerformDft1(fh,xLim,xSmpRate);
    xform = fftshift(dftStruct.transform);
    out.kvec = dftStruct.kpoints;
    out.int = abs(xform)./max(abs(xform));
end

% MagneticSlabTransmission.m
function out = MagneticSlabTransmission(kx,mu1,d,h)
    out=exp(-2*pi*2*h*sqrt(kx.^2-1)).*...
           TESlabTransmissionNice(kx,mu1,2*pi*d);
end

% TESlabTransmissionNice.m
function out = TESlabTransmissionNice(kx,mu11,d)
    m0=1; epar=1; mperp=1;
    m0 = m0 + i*eps; mu1 = mu11 + i*eps;
    epar = epar + i*eps; %#ok
    kz0 = sqrt(m0 - (kx.^2) );
    kz1 = sqrt(mu1.*(epar - kx.^2/mperp));
    out=(cos(d.*kz1)+(sqrt(-1)*(-1/2)).*(kz0.^(-1).*...
    kz1.*m0.*mu1.^(-1)+ ...
   kz0.*kz1.^(-1).*m0.^(-1).*mu1).*sin(d.*kz1)).^(-1);
end

% PerformDft1.m
function out = PerformDft1(fh,xLim,smpRateX)
    step = 1/smpRateX;
    Nw = 2*xLim/step; kLim = smpRateX;
    kstep = 1/(2*xLim);
    xvec = linspace(-xLim,xLim-step,Nw);
    kvec = linspace(-kLim/2,kLim/2-kstep,Nw);
    data = fftshift(fh(xvec));
    dataTransform = smpRateX^-1*fft(data);
    out.transform = dataTransform;
    out.kpoints = kvec;
    out.klim = kLim;
    out.smpRateK = 1/kstep;
end

{ 6 } Comments

  1. JonyEpsilon | July 6, 2009 at 4:54 am | Permalink

    FWIW, I had a crack at speeding up the Mathematica code. You can more than double the speed by adding periods to the numeric constants, which forces Mathematica into doing approximate rather than exact numerics. You can get about another factor of two by replacing the core functions in the integrand with pure functions and removing the modules. (Mathematica is almost always fastest with pure functions as it then doesn’t have to invoke the pattern matcher on the arguments. Modules involve some significant symbolic name-mangling and are pretty slow.)

    It’s still nowhere near 47 seconds, but I thought you might be interested anyway!

    Jony

  2. dnquark | July 6, 2009 at 2:35 pm | Permalink

    Interesting. I figured there’d be room for some optimization, and I’m really glad somebody looked into it. Doesn’t make me feel much better about Mathematica, though. If I don’t use Modules[] when developing code, my namespace is littered with globals, which eventually blows up in my face. And wrapping every possible numeric constant in N[] (or using decimals) gets tiring after a while.

  3. Daniel Lichtblau | July 6, 2009 at 4:22 pm | Permalink

    The bottlenecks are:

    (1) Use of exact inputs that will slow some functions (trigs, Sqrt[], and so on).

    (2) Repeated omputation of a function that is mapped over a list. Much faster to have the function take a list argument and compute a list result. This is in part because we can now work with the vector as a packed array, and do not need to break it up element-by-element.

    Here is the faster code. It runs in about 80 seconds on my machine.

    FftShift1D[x_] := Module[{n = Ceiling[Length[x]/2]},
    RotateRight[x, n]]

    PerformDft1[func_, xLim_, smpRateX_] :=
    Module[{kLim = smpRateX, step = 1/smpRateX,
    kstep = 1/(2*xLim), data, dataTransform, xvec, kvec},
    xvec = Range[-xLim, xLim - step, step];
    kvec = Range[-kLim/2, kLim/2 - kstep, kstep];
    data = FftShift1D[func[N[xvec]]];
    dataTransform = Fourier[data, FourierParameters -> {1, -1}]/smpRateX;
    {dataTransform, kvec, 1/kstep}]

    TESlabTransmissionNice[kx_, mu11_, d_] :=
    Module[{m0, mu1, kz0, kz1, epar, mperp = 1., eps = 10.^(-20)},
    m0 = 1. + I*eps;
    mu1 = mu11 + I*eps;
    epar = 1. + I*eps;
    kz0 = Sqrt[m0 - kx^2];
    kz1 = Sqrt[mu1*(epar - kx^2/mperp)];
    1/(Cos[d*kz1] -
    I/2.*((mu1*kz0)/(m0*kz1) + (m0*kz1)/(mu1*kz0))*Sin[d*kz1])]

    MagneticSlabTransmission[kx_, mupp_, d_] :=
    Module[{mu1 = -1. + I*mupp, h = 0.5},
    Exp[-4*Pi*h*Sqrt[kx^2 - 1.]]*
    TESlabTransmissionNice[kx, mu1, 2.*Pi*d]];

    CostFunctionIntegrand[fn_, print_: False] :=
    Module[{xLim = 100, xSmpRate = 100, xform, kvec,
    srk}, {xform, kvec, srk} = PerformDft1[fn, N[xLim], N[xSmpRate]];
    xform = FftShift1D@xform;
    If[print,
    Print[ListPlot[Re[Transpose[{kvec, xform}]],
    PlotRange -> {{-1, 1}, All}]];
    Print["k sampling rate: ", srk];];
    Transpose[{N[kvec], Abs[xform]/Max[Abs[xform]]}]]

    ComputeCostFunction[fn_] :=
    Module[{integrand, min, max}, integrand = CostFunctionIntegrand[fn];
    min = Min[integrand[[All, 1]]];
    max = Max[integrand[[All, 1]]];
    Integrate[Interpolation[integrand,InterpolationOrder->2][x],{x,
    min,max}]
    ]

    ComputeCostAndOutput[d_, mpp_, fname_] :=
    PutAppend[{d, mpp,
    ComputeCostFunction[MagneticSlabTransmission[#, mpp, d] &]},
    fname];

    Timing[Map[((ComputeCostAndOutput[#1, #2,
    "/tmp/costfn1.dat"] &) @@ #) &,
    Table[{d, mpp}, {d, 0.05, 1, 0.05}, {mpp, 0.05, 1, 0.05}], {2}];]

    We can furthermore replace Integrate[...] by

    NIntegrate[
    Interpolation[integrand, InterpolationOrder -> 2][x], {x, min,
    max}, PrecisionGoal -> 2, AccuracyGoal -> 2, MaxRecursion -> 6,
    MaxPoints -> Ceiling[Length[integrand[[All, 1]]]]/2,
    Method -> {"Trapezoidal", "SymbolicProcessing" -> None}]

    This variant runs in under a minute on my machine (3 Ghz, Linux, version 7.0.1 Mathematica).

    In[8]:= Timing[
    Map[((ComputeCostAndOutput[#1, #2, "/tmp/costfn1.dat"] &) @@ #) &,
    Table[{d, mpp}, {d, 0.05, 1, 0.05}, {mpp, 0.05, 1, 0.05}], {2}];]

    Out[8]= {50.1584, Null}

    Locating the bottlenecks was in part knowing to look for (1) above, coupled with use of Print[] statements to isolate (2).

    Daniel Lichtblau
    Wolfram Research

  4. dnquark | July 6, 2009 at 5:24 pm | Permalink

    My timing, on Solaris 1.5 GHz server, same version of Mathematica:
    posted code, 8 mins 7 seconds;
    replace Integrate with NIntegrate: 5m35s

    Matlab? 48 seconds. still.
    Now, I have to disqualify the NIntegrate results, because for several values of parameters, they seem to be numerically different from the Integrate[] or from matlab’s trapz by about 5%. (Matlab agrees with the Integrate[] approach to within 0.0003%)

  5. green tea | August 8, 2009 at 12:38 am | Permalink

    what about this code?

    I translated MATLAB code to Mathematica code directly.

    A bottleneck is in MagneticSlabTransmission.

    If xLim=54, underflow occurs in MagneticSlabTransmission,

    and Mathematica will use arbitrary precision arithmetic, not FPU.

    trapz[x_, y_] := (x[[2 ;;]] – x[[;; -2]]).(y[[2 ;;]] + y[[;; -2]])/2;

    fftshift[x_] := RotateRight[x, Quotient[Length[x], 2]];

    PerformDft1[fh_, xLim_, smpRateX_] :=
    Module[{step, Nw, kLim, kstep, xvec, kvec, data, dataTransform},
    step = smpRateX^-1;
    Nw = 2*xLim*smpRateX;
    kLim = smpRateX;
    kstep = (2*xLim)^-1;
    xvec = Range[-xLim, xLim - step, step];
    kvec = Range[-kLim/2, kLim/2 - kstep, kstep];
    data = fftshift[fh[xvec]];
    dataTransform = step*Fourier[data, FourierParameters -> {1, -1}];
    {dataTransform, kvec}
    ];

    TESlabTransmissionNice[kx_, mu11_, d_] :=
    Module[{eps, m0, epar, mperp, mu1, kz0, kz1, kz2},
    eps = $MachineEpsilon;
    m0 = 1. + eps*I;
    epar = 1. + eps*I;
    mperp = 1.;
    mu1 = mu11 + eps*I;
    kz0 = Sqrt[m0 - kx^2];
    kz1 = Sqrt[mu1*(epar - kx^2/mperp)];
    kz2 = kz1*m0/(kz0*mu1);
    (Cos[d*kz1] – (kz2 + kz2^(-1))*Sin[d*kz1]*I/2)^(-1)
    ];

    MagneticSlabTransmission[kx_, mu1_, d_] :=
    With[{h = 0.5},
    Exp[-4*Pi*h*Sqrt[kx^2 - 1 + 0.*I]]*
    TESlabTransmissionNice[kx, mu1, 2*Pi*d](*
    unpacking occurs . bottleneck *)
    ];

    CostFunctionIntegrand[fh_] :=
    Module[{xLim, xSmpRate, dftStruct, absxform},
    xLim = 100.;
    xSmpRate = 100.;
    dftStruct = PerformDft1[fh, xLim, xSmpRate];
    absxform = Abs[fftshift[dftStruct[[1]]]];
    {dftStruct[[2]], absxform/Max[absxform]}
    ];

    Timing[
    outfile = OpenWrite["costfn1.dat"];
    Do[
    Write[outfile, {d, mpp,
    trapz @@
    CostFunctionIntegrand[MagneticSlabTransmission[#, mpp, d] &]}],
    {d, 0.05, 1, 0.05}, {mpp, 0.05, 1, 0.05}];
    Close[outfile];
    ]

  6. green tea | August 8, 2009 at 11:57 am | Permalink

    I found a solution to remove the bottleneck.

    SetSystemOptions["CatchMachineUnderflow" -> False];

    prepend this to my previous code.

{ 1 } Trackback

  1. [...] that the product of his life’s work, basically, blows, the good folks at Wolfram actually went and optimized my code, making it run an order of magnitude faster (although still an order of magnitude slower than [...]

Post a Comment

Your email is never published nor shared. Required fields are marked *