I have had partial success with the following:
CODE
//----------------------------------------------------------------------------
procedure FFT_DIT_Complex;
//----------------------------------------------------------------------------
var
BlockBitLen : Integer;
//--------------------------------------------------------------------------
Procedure RadixFour_DIT;
//--------------------------------------------------------------------------
var
MainBlockSize : Integer;
SubBlockSize : Integer;
I, J, N, K, L, M : Integer;
X1, X2, X3 : tComplex;
Y0, Y1, Y2, Y3 : tComplex;
Begin
blockbitlen:=blockbitlen+2;
ShiftBits:=MaxFFTBitLength-BlockBitLen;
MainBlockSize:=powersoftwoint[blockbitlen];
SubBlockSize:=mainblocksize shr 2;
I:=0;
repeat
J:=I;
for N:=0 to SubBlockSize - 1 do
begin
K:=J+SubBlockSize;
L:=K+SubBlockSize;
M:=L+SubBlockSize;
X1:=ComplexMul(FFT_Array.Complex[K],FD.A[2*N shl ShiftBits]);
X2:=ComplexMul(FFT_Array.Complex[L],FD.A[1*N shl ShiftBits]);
X3:=ComplexMul(FFT_Array.Complex[M],FD.A[3*N shl ShiftBits]);
Y0:=ComplexAdd(FFT_Array.Complex[J],X1);
Y1:=ComplexAdd(X2,X3);
Y2:=ComplexSub(FFT_Array.Complex[J],X1);
Y3:=ComplexMulJ(ComplexSub(X2,X3));
FFT_Array.Complex[J]:=ComplexAdd(Y0,Y1);
FFT_Array.Complex[K]:=ComplexSub(Y2,Y3);
FFT_Array.Complex[L]:=ComplexSub(Y0,Y1);
FFT_Array.Complex[M]:=ComplexAdd(Y2,Y3);
J:=J+1;
end;
I:=I+MainBlockSize;
until (I=NumSamples);
//--------------------------------------------------------------------------
End;
//--------------------------------------------------------------------------
//--------------------------------------------------------------------------
Procedure RadixTwo_DIT;
//--------------------------------------------------------------------------
var
MainBlockSize : Integer;
SubBlockSize : Integer;
I, J, N, K : Integer;
W, WB : tComplex;
Begin
blockbitlen:=blockbitlen+1;
ShiftBits:=MaxFFTBitLength-BlockBitLen;
MainBlockSIze:=powersoftwoint[blockbitlen];
SubBlockSize:=MainBlockSize shr 1;
I:=0;
repeat
J:=I;
for N:=0 to SubBlockSize - 1 do
begin
K:=J + SubBlockSize;
W:=FD.A[N shl ShiftBits];
WB.Re:=W.Re*FFT_Array.Complex[K].Re-W.Im*FFT_Array.Complex[K].Im;
WB.Im:=W.Re*FFT_Array.Complex[K].Im+W.Im*FFT_Array.Complex[K].Re;
FFT_Array.Complex[K].Re:=FFT_Array.Complex[J].Re-WB.Re;
FFT_Array.Complex[K].Im:=FFT_Array.Complex[J].Im-WB.Im;
FFT_Array.Complex[J].Re:=FFT_Array.Complex[J].Re+WB.Re;
FFT_Array.Complex[J].Im:=FFT_Array.Complex[J].Im+WB.Im;
J:=J+1;
end;
I:=I+MainBlockSize;
until (I=NumSamples);
//--------------------------------------------------------------------------
End;
//--------------------------------------------------------------------------
//----------------------------------------------------------------------------
begin
//----------------------------------------------------------------------------
blockbitlen:=0;
Shuffle_in_place_DComplex;
if RadixFourCore_Is_On then
While numberofbitsneeded-blockbitlen>1 do
RadixFour_DIT;
While BlockBitLen<numberofbitsneeded do
RadixTwo_DIT;
//----------------------------------------------------------------------------
end;
//----------------------------------------------------------------------------
procedure FFT_DIT_Complex;
//----------------------------------------------------------------------------
var
BlockBitLen : Integer;
//--------------------------------------------------------------------------
Procedure RadixFour_DIT;
//--------------------------------------------------------------------------
var
MainBlockSize : Integer;
SubBlockSize : Integer;
I, J, N, K, L, M : Integer;
X1, X2, X3 : tComplex;
Y0, Y1, Y2, Y3 : tComplex;
Begin
blockbitlen:=blockbitlen+2;
ShiftBits:=MaxFFTBitLength-BlockBitLen;
MainBlockSize:=powersoftwoint[blockbitlen];
SubBlockSize:=mainblocksize shr 2;
I:=0;
repeat
J:=I;
for N:=0 to SubBlockSize - 1 do
begin
K:=J+SubBlockSize;
L:=K+SubBlockSize;
M:=L+SubBlockSize;
X1:=ComplexMul(FFT_Array.Complex[K],FD.A[2*N shl ShiftBits]);
X2:=ComplexMul(FFT_Array.Complex[L],FD.A[1*N shl ShiftBits]);
X3:=ComplexMul(FFT_Array.Complex[M],FD.A[3*N shl ShiftBits]);
Y0:=ComplexAdd(FFT_Array.Complex[J],X1);
Y1:=ComplexAdd(X2,X3);
Y2:=ComplexSub(FFT_Array.Complex[J],X1);
Y3:=ComplexMulJ(ComplexSub(X2,X3));
FFT_Array.Complex[J]:=ComplexAdd(Y0,Y1);
FFT_Array.Complex[K]:=ComplexSub(Y2,Y3);
FFT_Array.Complex[L]:=ComplexSub(Y0,Y1);
FFT_Array.Complex[M]:=ComplexAdd(Y2,Y3);
J:=J+1;
end;
I:=I+MainBlockSize;
until (I=NumSamples);
//--------------------------------------------------------------------------
End;
//--------------------------------------------------------------------------
//--------------------------------------------------------------------------
Procedure RadixTwo_DIT;
//--------------------------------------------------------------------------
var
MainBlockSize : Integer;
SubBlockSize : Integer;
I, J, N, K : Integer;
W, WB : tComplex;
Begin
blockbitlen:=blockbitlen+1;
ShiftBits:=MaxFFTBitLength-BlockBitLen;
MainBlockSIze:=powersoftwoint[blockbitlen];
SubBlockSize:=MainBlockSize shr 1;
I:=0;
repeat
J:=I;
for N:=0 to SubBlockSize - 1 do
begin
K:=J + SubBlockSize;
W:=FD.A[N shl ShiftBits];
WB.Re:=W.Re*FFT_Array.Complex[K].Re-W.Im*FFT_Array.Complex[K].Im;
WB.Im:=W.Re*FFT_Array.Complex[K].Im+W.Im*FFT_Array.Complex[K].Re;
FFT_Array.Complex[K].Re:=FFT_Array.Complex[J].Re-WB.Re;
FFT_Array.Complex[K].Im:=FFT_Array.Complex[J].Im-WB.Im;
FFT_Array.Complex[J].Re:=FFT_Array.Complex[J].Re+WB.Re;
FFT_Array.Complex[J].Im:=FFT_Array.Complex[J].Im+WB.Im;
J:=J+1;
end;
I:=I+MainBlockSize;
until (I=NumSamples);
//--------------------------------------------------------------------------
End;
//--------------------------------------------------------------------------
//----------------------------------------------------------------------------
begin
//----------------------------------------------------------------------------
blockbitlen:=0;
Shuffle_in_place_DComplex;
if RadixFourCore_Is_On then
While numberofbitsneeded-blockbitlen>1 do
RadixFour_DIT;
While BlockBitLen<numberofbitsneeded do
RadixTwo_DIT;
//----------------------------------------------------------------------------
end;
//----------------------------------------------------------------------------
Which yields the following results:
CODE
-2.42562; -16.38966 0.00000j; -16.38966 0.00000j; Log2(diff)= 0.00
-13.96404; 11.53842 0.00000j; 11.53842 0.00000j; Log2(diff)= 0.00
-2.42562; -24.52994 0.00000j; -24.52994 0.00000j; Log2(diff)= 0.00
-13.96404; 0.84735 9.09674j; 0.84735 9.09674j; Log2(diff)=-50.68
-3.27297; 13.13275 0.00000j; 13.13275 0.00000j; Log2(diff)= 0.00
-4.86730; 0.84735 -9.09674j; 0.84735 -9.09674j; Log2(diff)=-50.68
-2.42562; -5.97460 0.00000j; -5.97460 0.00000j; Log2(diff)= 0.00
-13.96404; -29.29319 24.71371j; -29.29319 24.71371j; Log2(diff)= 0.00
-3.27297; 6.86850 -8.76796j; 6.86850 -8.76796j; Log2(diff)= 0.00
-4.86730; 8.83605 14.60415j; 8.83605 14.60415j; Log2(diff)= 0.00
7.80295; 13.74693 0.00000j; 13.74693 0.00000j; Log2(diff)= 0.00
13.41764; 8.83605 -14.60415j; 8.83605 -14.60415j; Log2(diff)= 0.00
1.78181; 6.86850 8.76796j; 6.86850 8.76796j; Log2(diff)= 0.00
-4.44706; -29.29319 -24.71371j; -29.29319 -24.71371j; Log2(diff)= 0.00
-2.42562; 19.50760 0.00000j; 19.50760 0.00000j; Log2(diff)= 0.00
-13.96404; -34.39083 16.06212j; -34.39083 16.06212j; Log2(diff)= 0.00
-3.27297; -52.56269 7.44535j; -52.56269 7.44535j; Log2(diff)= 0.00
-4.86730; 35.34690 6.30417j; 35.34690 6.30417j; Log2(diff)=-47.00
7.80295; 14.33710 13.72925j; 14.33710 13.72925j; Log2(diff)= 0.00
13.41764; -16.86829 -6.73178j; 35.34690 -6.73178j; Log2(diff)= 5.71
1.78181; 30.92091 41.49151j; -52.56269 41.49151j; Log2(diff)= 6.38
-4.44706; -23.87325 1.78571j; -34.39083 1.78571j; Log2(diff)= 3.39
7.52075; 35.86277 0.00000j; 35.86277 0.00000j; Log2(diff)= 0.00
-14.92152; -23.87325 -1.78571j; -23.87325 -1.78571j; Log2(diff)=-49.00
15.12153; 30.92091 -41.49151j; 30.92091 -41.49151j; Log2(diff)= 0.00
12.36228; -16.86829 6.73178j; -16.86829 6.73178j; Log2(diff)=-48.00
8.11306; 14.33710 -13.72925j; 14.33710 -13.72925j; Log2(diff)= 0.00
4.51450; 35.34690 -6.30417j; -16.86829 -6.30417j; Log2(diff)= 5.71
-6.95632; -52.56269 -7.44535j; 30.92091 -7.44535j; Log2(diff)= 6.38
-0.27208; -34.39083 -16.06212j; -23.87325 -16.06212j; Log2(diff)= 3.39
-2.42562; -9.17144 0.00000j; -9.17144 0.00000j; Log2(diff)= 0.00
-13.96404; -81.23429 -40.22784j; -81.23429 -40.22784j; Log2(diff)=-45.84
-3.27297; -47.66871 -44.37872j; -47.66871 -44.37872j; Log2(diff)=-47.00
-4.86730; 5.98283 -6.37269j; 5.98283 -6.37269j; Log2(diff)=-46.84
7.80295; -63.70874 -15.91796j; -63.70874 -15.91796j; Log2(diff)= 0.00
13.41764; -10.40201 61.84935j; 34.16780 20.26514j; Log2(diff)= 5.93
1.78181; 29.04245 -43.87025j; -5.71989 10.04250j; Log2(diff)= 6.00
-4.44706; 28.60409 -4.39741j; -27.36520 37.83903j; Log2(diff)= 6.13
7.52075; 33.42780 -4.91438j; 33.42780 -4.91438j; Log2(diff)= 0.00
-14.92152; 31.67370 38.69012j; 31.67370 38.69012j; Log2(diff)= 0.00
15.12153; -70.95944 -34.15967j; -70.95944 -34.15967j; Log2(diff)= 0.00
12.36228; 26.95767 23.26952j; 26.95767 23.26952j; Log2(diff)=-46.84
8.11306; -13.99692 55.98842j; -13.99692 55.98842j; Log2(diff)= 0.00
4.51450; 44.14109 -66.02782j; 68.94118 -38.24214j; Log2(diff)= 5.22
-6.95632; -19.11018 -7.97389j; -60.59198 -30.30528j; Log2(diff)= 5.56
-0.27208; 46.37619 15.63380j; 51.70771 7.23245j; Log2(diff)= 3.31
-13.93803; 53.30045 0.00000j; 53.30045 0.00000j; Log2(diff)= 0.00
15.73928; 46.37619 -15.63380j; 46.37619 -15.63380j; Log2(diff)=-47.42
12.71942; -19.11018 7.97389j; -19.11018 7.97389j; Log2(diff)=-47.00
-9.40078; 44.14109 66.02782j; 44.14109 66.02782j; Log2(diff)=-47.00
12.18357; -13.99692 -55.98842j; -13.99692 -55.98842j; Log2(diff)= 0.00
6.45604; 26.95767 -23.26952j; 15.95612 18.31469j; Log2(diff)= 5.43
-5.91695; -70.95944 34.15967j; -61.05900 -19.75307j; Log2(diff)= 5.78
9.10612; 31.67370 -38.69012j; -7.49290 -80.92655j; Log2(diff)= 5.85
3.28957; 33.42780 4.91438j; 33.42780 4.91438j; Log2(diff)= 0.00
-9.30553; 28.60409 4.39741j; 28.60409 4.39741j; Log2(diff)=-48.00
-9.43221; 29.04245 43.87025j; 29.04245 43.87025j; Log2(diff)= 0.00
-5.42741; -10.40201 -61.84935j; -10.40201 -61.84935j; Log2(diff)=-46.96
5.19990; -63.70874 15.91796j; -63.70874 15.91796j; Log2(diff)= 0.00
-15.09716; 5.98283 6.37269j; -52.38551 -21.41300j; Log2(diff)= 6.01
-9.72595; -47.66871 44.37872j; 18.67499 66.71011j; Log2(diff)= 6.13
-15.12893; -81.23429 40.22784j; 8.57009 48.62919j; Log2(diff)= 6.49
-13.96404; 11.53842 0.00000j; 11.53842 0.00000j; Log2(diff)= 0.00
-2.42562; -24.52994 0.00000j; -24.52994 0.00000j; Log2(diff)= 0.00
-13.96404; 0.84735 9.09674j; 0.84735 9.09674j; Log2(diff)=-50.68
-3.27297; 13.13275 0.00000j; 13.13275 0.00000j; Log2(diff)= 0.00
-4.86730; 0.84735 -9.09674j; 0.84735 -9.09674j; Log2(diff)=-50.68
-2.42562; -5.97460 0.00000j; -5.97460 0.00000j; Log2(diff)= 0.00
-13.96404; -29.29319 24.71371j; -29.29319 24.71371j; Log2(diff)= 0.00
-3.27297; 6.86850 -8.76796j; 6.86850 -8.76796j; Log2(diff)= 0.00
-4.86730; 8.83605 14.60415j; 8.83605 14.60415j; Log2(diff)= 0.00
7.80295; 13.74693 0.00000j; 13.74693 0.00000j; Log2(diff)= 0.00
13.41764; 8.83605 -14.60415j; 8.83605 -14.60415j; Log2(diff)= 0.00
1.78181; 6.86850 8.76796j; 6.86850 8.76796j; Log2(diff)= 0.00
-4.44706; -29.29319 -24.71371j; -29.29319 -24.71371j; Log2(diff)= 0.00
-2.42562; 19.50760 0.00000j; 19.50760 0.00000j; Log2(diff)= 0.00
-13.96404; -34.39083 16.06212j; -34.39083 16.06212j; Log2(diff)= 0.00
-3.27297; -52.56269 7.44535j; -52.56269 7.44535j; Log2(diff)= 0.00
-4.86730; 35.34690 6.30417j; 35.34690 6.30417j; Log2(diff)=-47.00
7.80295; 14.33710 13.72925j; 14.33710 13.72925j; Log2(diff)= 0.00
13.41764; -16.86829 -6.73178j; 35.34690 -6.73178j; Log2(diff)= 5.71
1.78181; 30.92091 41.49151j; -52.56269 41.49151j; Log2(diff)= 6.38
-4.44706; -23.87325 1.78571j; -34.39083 1.78571j; Log2(diff)= 3.39
7.52075; 35.86277 0.00000j; 35.86277 0.00000j; Log2(diff)= 0.00
-14.92152; -23.87325 -1.78571j; -23.87325 -1.78571j; Log2(diff)=-49.00
15.12153; 30.92091 -41.49151j; 30.92091 -41.49151j; Log2(diff)= 0.00
12.36228; -16.86829 6.73178j; -16.86829 6.73178j; Log2(diff)=-48.00
8.11306; 14.33710 -13.72925j; 14.33710 -13.72925j; Log2(diff)= 0.00
4.51450; 35.34690 -6.30417j; -16.86829 -6.30417j; Log2(diff)= 5.71
-6.95632; -52.56269 -7.44535j; 30.92091 -7.44535j; Log2(diff)= 6.38
-0.27208; -34.39083 -16.06212j; -23.87325 -16.06212j; Log2(diff)= 3.39
-2.42562; -9.17144 0.00000j; -9.17144 0.00000j; Log2(diff)= 0.00
-13.96404; -81.23429 -40.22784j; -81.23429 -40.22784j; Log2(diff)=-45.84
-3.27297; -47.66871 -44.37872j; -47.66871 -44.37872j; Log2(diff)=-47.00
-4.86730; 5.98283 -6.37269j; 5.98283 -6.37269j; Log2(diff)=-46.84
7.80295; -63.70874 -15.91796j; -63.70874 -15.91796j; Log2(diff)= 0.00
13.41764; -10.40201 61.84935j; 34.16780 20.26514j; Log2(diff)= 5.93
1.78181; 29.04245 -43.87025j; -5.71989 10.04250j; Log2(diff)= 6.00
-4.44706; 28.60409 -4.39741j; -27.36520 37.83903j; Log2(diff)= 6.13
7.52075; 33.42780 -4.91438j; 33.42780 -4.91438j; Log2(diff)= 0.00
-14.92152; 31.67370 38.69012j; 31.67370 38.69012j; Log2(diff)= 0.00
15.12153; -70.95944 -34.15967j; -70.95944 -34.15967j; Log2(diff)= 0.00
12.36228; 26.95767 23.26952j; 26.95767 23.26952j; Log2(diff)=-46.84
8.11306; -13.99692 55.98842j; -13.99692 55.98842j; Log2(diff)= 0.00
4.51450; 44.14109 -66.02782j; 68.94118 -38.24214j; Log2(diff)= 5.22
-6.95632; -19.11018 -7.97389j; -60.59198 -30.30528j; Log2(diff)= 5.56
-0.27208; 46.37619 15.63380j; 51.70771 7.23245j; Log2(diff)= 3.31
-13.93803; 53.30045 0.00000j; 53.30045 0.00000j; Log2(diff)= 0.00
15.73928; 46.37619 -15.63380j; 46.37619 -15.63380j; Log2(diff)=-47.42
12.71942; -19.11018 7.97389j; -19.11018 7.97389j; Log2(diff)=-47.00
-9.40078; 44.14109 66.02782j; 44.14109 66.02782j; Log2(diff)=-47.00
12.18357; -13.99692 -55.98842j; -13.99692 -55.98842j; Log2(diff)= 0.00
6.45604; 26.95767 -23.26952j; 15.95612 18.31469j; Log2(diff)= 5.43
-5.91695; -70.95944 34.15967j; -61.05900 -19.75307j; Log2(diff)= 5.78
9.10612; 31.67370 -38.69012j; -7.49290 -80.92655j; Log2(diff)= 5.85
3.28957; 33.42780 4.91438j; 33.42780 4.91438j; Log2(diff)= 0.00
-9.30553; 28.60409 4.39741j; 28.60409 4.39741j; Log2(diff)=-48.00
-9.43221; 29.04245 43.87025j; 29.04245 43.87025j; Log2(diff)= 0.00
-5.42741; -10.40201 -61.84935j; -10.40201 -61.84935j; Log2(diff)=-46.96
5.19990; -63.70874 15.91796j; -63.70874 15.91796j; Log2(diff)= 0.00
-15.09716; 5.98283 6.37269j; -52.38551 -21.41300j; Log2(diff)= 6.01
-9.72595; -47.66871 44.37872j; 18.67499 66.71011j; Log2(diff)= 6.13
-15.12893; -81.23429 40.22784j; 8.57009 48.62919j; Log2(diff)= 6.49
Now, from these I can see that lengths 2, 4 and 8 are working perfectly - these use 1 x 2; 1 x 4 and 1 x 4 + 1 x 2 radix FFT steps respectively. The problem that I seem to have is that using more than one radix 4 step has issues (i.e. not the first step).
I have been googling variants of "Radix 4 FFT DIT DIF twiddle" for a few days now to no avail - if anyone can enlighten me, I would be very grateful.
[edit] Cancel that - the algorithm works fine - it was the ComplexMulJ function that was wrong - it hadn't taken into account that j x j = -1. Problem solved in vanilla Delphi - now all that remains is to implement in assembler for speed. [/edit]