Compare two voices - good tool?
Community Forums/General Help/Compare two voices - good tool?
| ||
| Hi all, I have a little project on the side going on, and for that I would have to compare two voice audio sound files, to find out if they are really different voices, or if there is a match. I've been traversing the web for a while now, but I couldn't find anything useful (other than expensive forensic investigation tools that cost a fortune). Does anyone know of a free or low-cost tool to do this with? Any advise or pointers are appreciated. |
| ||
| Sounds like you'll have to extract the phoneme samples, sample them in the frequency domain with dft, and do a comparison there. Try googling for 'Discreet Fourier transform' and 'dynamic time warp'. I don't know of a method of extracting and normalizing phoneme samples from spoken audio automatically, you'll probably have to do it by hand. |
| ||
| I think you have your work cut out there. Using an average comparison on 2 ("3 dimensional") DFT patterns in a quiet environment should be easy enough, but any noise can interfere massively. (by 3d i mean several dft "snapshots" over a time period.) Here is a public DFT/FFT algorythm i used in a recent project for tone / note recognition.
public class DFT {
public static final int RECTANGULAR = 0;
public static final int HANN = 1;
public static final int HAMMING = 2;
public static final int BLACKMANN = 3;
public static final double[] forwardMagnitude(double[] input) {
int N = input.length;
double[] mag = new double[N];
double[] c = new double[N];
double[] s = new double[N];
double twoPi = 2*Math.PI;
for(int i=0; i<N; i++) {
for(int j=0; j<N; j++) {
c[i] += input[j]*Math.cos(i*j*twoPi/N);
s[i] -= input[j]*Math.sin(i*j*twoPi/N);
}
c[i]/=N;
s[i]/=N;
mag[i]=Math.sqrt(c[i]*c[i]+s[i]*s[i]);
}
return mag;
}
public static final double[] window(double[] input, int type) {
int N = input.length;
double[] windowed = new double[N];
switch(type) {
case RECTANGULAR:
return input;
case HANN:
for(int n=0; n<N; n++) {
windowed[n] = 0.5*(1-Math.cos(2*Math.PI*n/(N-1))) * input[n];
}
break;
case HAMMING:
for (int n = 0; n < input.length; n++) {
windowed[n] = (0.53836-0.46164*Math.cos(Tools.TWO_PI*n/(N-1))) * input[n];
}
case BLACKMANN:
for(int n=0; n<N; n++) {
windowed[n] = (0.42-0.5*Math.cos(2*Math.PI*n/(N-1))+0.08*Math.cos(4*Math.PI*n/(N-1)) ) * input[n];
}
break;
}
return windowed;
}
}
public class Complex {
private double real;
private double imag;
public Complex(double real, double imag) {
this.real = real;
this.imag = imag;
}
public String toString() {
if(imag<0) {
return new String(real+" - i"+Math.abs(imag));
} else {
return new String(real+" + i"+imag);
}
}
public static final Complex multiply(Complex c1, Complex c2) {
double re = c1.real*c2.real - c1.imag*c2.imag;
double im = c1.real*c2.imag + c1.imag*c2.real;
return new Complex(re, im);
}
public static Complex scale(Complex c, double x) {
return new Complex(c.real*x, c.imag*x);
}
public static final Complex add(Complex c1, Complex c2) {
double re = c1.real + c2.real;
double im = c1.imag + c2.imag;
return new Complex(re, im);
}
public static final Complex substract(Complex c1, Complex c2) {
double re = c1.real - c2.real;
double im = c1.imag - c2.imag;
return new Complex(re, im);
}
public static Complex conjugate(Complex c) {
return new Complex(c.real, -c.imag);
}
public static double abs(Complex c) {
return Math.sqrt(c.real*c.real+c.imag*c.imag);
}
public static double[] abs(Complex[] c) {
int N = c.length;
double[] mag = new double[N];
for(int i=0; i<N; i++) {
mag[i] = Math.sqrt(c[i].real*c[i].real+c[i].imag*c[i].imag);
}
return mag;
}
public static final Complex nthRootOfUnity(int n, int N) {
double re = Math.cos(2*Math.PI*n/N);
double im = Math.sin(2*Math.PI*n/N);
return new Complex(re, im);
}
}
/*
* http://www.cs.princeton.edu/introcs/97data/FFT.java.html
* Should be optimized. w_n may be looked up from a table etc.
*
* Java DSP book
*/
public class FFT {
// private static int length;
private static double[] r_data = null;
private static double[] i_data = null;
private static boolean forward = true;
/*
private void computeRootArray(int N) {
Complex[] w = new Complex[N/2];
for(int i=0; i<N/2; i++) {
w[i] = Complex.nthRootOfUnity(i, N);
}
}
*/
public static Complex[] forward(Complex[] x) {
int N = x.length;
if( N == 1 ) {
return new Complex[] { x[0] };
} else {
Complex[] even = new Complex[N/2];
Complex[] odd = new Complex[N/2];
for(int i=0; i<N/2; i++) {
even[i]=x[2*i];
odd[i]=x[2*i+1];
}
Complex[] left = forward(even);
Complex[] right = forward(odd);
Complex[] c = new Complex[N];
for(int n=0; n<N/2; n++) {
double nth = -2*n*Math.PI/N;
Complex wn = new Complex(Math.cos(nth), Math.sin(nth));
c[n] = Complex.add(left[n], Complex.multiply(wn, right[n]));
c[n+N/2] = Complex.substract(left[n], Complex.multiply(wn, right[n]));
}
return c;
}
}
public static int bitReverse(int index) {
// if length = 8 index goes from 0 to 7
// to write 8 we need log2(8)+1=3+1=4 bits.
// 8 = 1000, 7 = 111
// so to write 7 we need log2(8)=3 bits.
int numBits = (int)Tools.log2(8);
int ret = 0;
for (int i = 0; i < numBits; i++) {
ret = (ret<<1) + (index&1);
index = index>>1;
}
return ret;
}
public static double[] magnitudeSpectrum(double[] realPart) {
// length = realPart.length;
// int localN;
//
// int numBits = (int)Tools.log2(length);
//
// for(int m = 1; m <= numBits; m++) {
// // localN = 2^m;
// localN = 1<<m;
// }
double[] imaginaryPart = new double[realPart.length];
for (int i = 0; i < imaginaryPart.length; i++) {
imaginaryPart[i] = 0;
}
forwardFFT(realPart, imaginaryPart);
for (int i = 0; i < realPart.length; i++) {
realPart[i] = Math.sqrt( r_data[i]*r_data[i] + i_data[i]*i_data[i] );
}
return realPart;
}
// swap Zi with Zj
private static void swapInt(int i, int j) {
double tempr;
int ti;
int tj;
ti = i - 1;
tj = j - 1;
tempr = r_data[tj];
r_data[tj] = r_data[ti];
r_data[ti] = tempr;
tempr = i_data[tj];
i_data[tj] = i_data[ti];
i_data[ti] = tempr;
}
private static void bitReverse2() {
// System.out.println("fft: bit reversal");
/* bit reversal */
int n = r_data.length;
int j = 1;
int k;
for (int i = 1; i < n; i++) {
if (i < j) swapInt(i, j);
k = n / 2;
while (k >= 1 && k < j) {
j = j - k;
k = k / 2;
}
j = j + k;
}
}
public static void forwardFFT(double in_r[], double in_i[]) {
int id;
int localN;
double wtemp, Wjk_r, Wjk_i, Wj_r, Wj_i;
double theta, tempr, tempi;
// int ti, tj;
int numBits = (int)Tools.log2(in_r.length);
if (forward) {
//centering(in_r);
}
// Truncate input data to a power of two
int length = 1 << numBits; // length = 2**nu
int n = length;
int nby2;
// Copy passed references to variables to be used within
// fft routines & utilities
r_data = in_r;
i_data = in_i;
bitReverse2();
for (int m = 1; m <= numBits; m++) {
// localN = 2^m;
localN = 1 << m;
nby2 = localN / 2;
Wjk_r = 1;
Wjk_i = 0;
theta = Math.PI / nby2;
// for recursive comptutation of sine and cosine
Wj_r = Math.cos(theta);
Wj_i = -Math.sin(theta);
if (forward == false) {
Wj_i = -Wj_i;
}
for (int j = 0; j < nby2; j++) {
// This is the FFT innermost loop
// Any optimizations that can be made here will yield
// great rewards.
for (int k = j; k < n; k += localN) {
id = k + nby2;
tempr = Wjk_r * r_data[id] - Wjk_i * i_data[id];
tempi = Wjk_r * i_data[id] + Wjk_i * r_data[id];
// Zid = Zi -C
r_data[id] = r_data[k] - tempr;
i_data[id] = i_data[k] - tempi;
r_data[k] += tempr;
i_data[k] += tempi;
}
// (eq 6.23) and (eq 6.24)
wtemp = Wjk_r;
Wjk_r = Wj_r * Wjk_r - Wj_i * Wjk_i;
Wjk_i = Wj_r * Wjk_i + Wj_i * wtemp;
}
}
// normalize output of fft.
// if (forward)
if(false)
for (int i = 0; i < r_data.length; i++) {
r_data[i] = r_data[i] / (double) n;
i_data[i] = i_data[i] / (double) n;
}
in_r = r_data;
in_r = i_data;
}
public static Complex[] inverse(Complex[] c) {
int N = c.length;
Complex[] x = new Complex[N];
for(int i=0; i<N; i++) {
x[i] = Complex.conjugate(c[i]);
}
x = forward(x);
for(int i=0; i<N; i++) {
x[i] = Complex.conjugate(x[i]);
x[i] = Complex.scale(x[i], 1.0/N);
}
return x;
}
}
public class Tools {
public static final double TWO_PI = 2*Math.PI;
public static final double LOG_OF_2_BASE_10 = 1/Math.log10(2);
public static double log2(double x) {
return Math.log10(x)/Math.log10(2.0);
}
public static final double[] lowpass(double[] signal, int nPoints) {
int N = signal.length;
double[] ret = new double[N];
for(int i=0; i<nPoints/2; i++) {
ret[i] = signal[i];
}
for(int i=nPoints/2; i<N-nPoints/2; i++) {
for(int j=0; j<nPoints; j++) {
ret[i]=0;
ret[i]+=signal[i-nPoints/2+j];
ret[i]/=nPoints;
}
}
for(int i=N-nPoints/2; i<N; i++) {
ret[i]=signal[i];
}
return ret;
}
public static final double[] addArrays(double[] x, double[] y) {
double[] sum = new double[x.length];
for(int i=0; i<x.length; i++) {
sum[i] = x[i] + y[i];
}
return sum;
}
public static final Complex[] addArrays(Complex[] x, Complex[] y) {
Complex[] sum = new Complex[x.length];
for(int i=0; i<x.length; i++) {
sum[i] = Complex.add(x[i], y[i]);
}
return sum;
}
public static final Complex[] substractArrays(Complex[] x, Complex[] y) {
Complex[] sum = new Complex[x.length];
for(int i=0; i<x.length; i++) {
sum[i] = Complex.substract(x[i], y[i]);
}
return sum;
}
public static final double[] dotProduct(double[] x, double[] y) {
double[] sum = new double[x.length];
for(int i=0; i<x.length; i++) {
sum[i] = x[i] * y[i];
}
return sum;
}
public static final Complex[] dotProduct(Complex[] x, Complex[] y) {
Complex[] sum = new Complex[x.length];
for(int i=0; i<x.length; i++) {
sum[i] = Complex.multiply(x[i], y[i]);
}
return sum;
}
public static Complex[] makeComplex(double[] x) {
int N = x.length;
Complex[] c = new Complex[N];
for(int i=0; i<N; i++) {
c[i] = new Complex(x[i],0);
}
return c;
}
public static void printArray(double[] arr) {
for (double d : arr) {
System.out.format("%.4f ", d);
}
System.out.println();
}
}
Last edited 2012 |
| ||
| Hey D4NM4N, that's some pretty impressive piece of code you provided me. I'll put it into Java along with the files I need to test, and let you know how it worked out for me :) |
| ||
| Not mine it is some pd code i found. You need to split the classes into 4 sep. class files However I found it very useful in my recog lib. What I did was have 2 threads, one fetching the data from the hardware and putting the sample in a "BlockingQueue". Then had the second one unpacking it from said queue and doing the fourrier transform on it. PS: Have a look for "Android Open source DTMF recogniser" this project also uses the above maths lib and shows a similar technique to what i describe with the dual task block queue. I would give you the full lib i made, but it belongs to the company and i would get a whipping :/ Last edited 2012 |
| ||
| This (http://www.csee.umbc.edu/~squire/download/fft1024.c) is the FFT i use. just drop it in and go. |