from __future__ import division
from numpy import *; seterr(all="ignore")
from numpy import linalg
from numpy import random
from matplotlib.pyplot import *
%matplotlib notebook
import wish
from audio.filters import FIR, AR
import audio.frames
import audio.index
import audio.io
from audio.lp import lp
from audio.quantizers import Quantizer
df=16000
fir = FIR([1.0])
print fir(2.0)
print fir(1.0)
print fir([0.0, 7.0, -3.0])
delay = FIR([0.0, 1.0])
fir = delay
print fir(2.0)
print fir(1.0)
print fir([0.0, 7.0, -3.0])
print fir.a
#data = audio.io.record(3.0, df=df)[0]
#audio.io.play(data, df=df)
aes = audio.index.search("ae", type=audio.index.Phone)
print aes
data = aes[0].audio
you = audio.index.search("you", type=audio.index.Word)[0]
print you
ixs = audio.index.search("ix")
data = ixs[0].audio
plot(data); axis("tight")
sentence = audio.index.search(type=audio.index.Utterance)[0]
for word in sentence:
print word
for phone in word:
print 4*" ", phone
data = sentence.audio
print data
audio.io.play(data, df=16000.0)
utterances = audio.index.search(type=audio.index.Utterance)
n = 10
data = utterances[n].audio
audio.io.play(data, df=16000.0)
class STP(Quantizer):
"Short-Term Predictor"
def __init__(self, order=16, method="autocorrelation"):
self.fir = FIR(a=r_[1.0, zeros(order)])
self.ar = AR(a=zeros(order))
self.order = order
self.method = method
def encode(self, data):
if self.method == "covariance" and self.order >= len(data):
raise ValueError("not enough data samples")
a = lp(data, order=self.order, method=self.method)
self.fir.a[:] = r_[1.0, -a]
error = self.fir(data)
return (a, error)
def decode(self, data):
a, error = data
self.ar.a[:] = a
return self.ar(error)
def stp_error(data, T=0.02, order=16, method="autocorrelation"):
length = len(data)
n = int(T * df) # number of samples for T s at the given frequency.
frames = audio.frames.split(data, n, pad=True)
stp = STP(order=order, method=method)
error = zeros(n*len(frames))
for i, frame in enumerate(frames):
a, error_frame = stp.encode(frame)
error[i*n:(i+1)*n] = error_frame
return error[:length]
figure()
n = len(data)
t = r_[0:n] / df
plot(t,data, "k", alpha=0.1); axis("tight")
error = stp_error(data)
plot(t,error, "r")
error = stp_error(data)
SNR2 = mean(data*data)/mean(error*error)
print "SNR", 10*log10(SNR2), "dB"
audio.io.play(data, df=df)
audio.io.play(error, df=df)
M = amax(abs(error))
audio.io.play(error/M, df=df)
def ltp_parameters(history, frame,
offset_min=1, offset_max=None,
gain_min=0.0, gain_max=inf,
SNR_min=1.0, SNR_max=inf,
returns="offset, gain"):
p = len(history)
data = r_[history, frame] # full data
m = len(frame)
n = len(data)
nxcorrs = zeros(p+1)
gains = zeros(p+1)
SNRs = zeros(p+1)
valids = zeros(p+1, dtype=bool)
frame_norm = linalg.norm(frame)
normed_frame = frame / frame_norm
for i in range(p + 1):
windowed_data = data[n-i-m:n-i]
windowed_data_norm = linalg.norm(windowed_data)
normed_windowed_data = windowed_data / windowed_data_norm
nxcorr = nxcorrs[i] = dot(normed_frame, normed_windowed_data)
SNR = SNRs[i] = 1.0 / sqrt(1 - nxcorr*nxcorr)
#print ">", SNR
gain = gains[i] = nxcorr / windowed_data_norm * frame_norm
valid = True
if offset_min is not None:
valid = valid and (offset_min <= i)
if offset_max is not None:
valid = valid and (i <= offset_max)
valid = valid and (gain_min <= gain <= gain_max)
valid = valid and (SNR_min <= SNR <= SNR_max)
valids[i] = valid
criteria = SNRs.copy()
criteria[logical_not(valids)] = -inf
offset = argmax(criteria)
if not valids[offset]: # everything is invalid!
raise ValueError("no valid set of parameters")
else:
gain = gains[offset]
nxcorr = nxcorrs[offset]
SNR = SNRs[offset]
return wish.grant(returns)
N = 100
data_ = (0.7 * sin(r_[0:N]/N * 2*pi*4) + 0.10 * random.uniform(-1,1,N)) * (1.0 + 2.0*r_[0:1:1.0/N])
#history[::7] = 1.0
history, frame = data_[:-25], data_[-25:]
m = len(history)
n = len(frame)
offset_min = 5
offset, gain, nxcorrs, SNRs, valids = ltp_parameters(history, frame, offset_min=offset_min, SNR_min=1.0,
returns="offset, gain, nxcorrs, SNRs, valids")
print "offset:", offset, "gain:", gain
figure()
plot(data_, "k", alpha=0.5, label="data")
plot(arange(0,n)+m, frame, "b", label="reference")
plot(arange(m-offset, m-offset+n), frame/gain, "r", label="matched")
axis("tight")
legend(loc=0)
figure()
m = arange(len(SNRs))
plot(m[offset_min:],SNRs[offset_min:], "r", alpha=0.25, label="SNR")
n = arange(len(SNRs))
plot(n[valids],SNRs[valids], "bx", linewidth=1.0,label="valid")
xlabel("offset")
ylabel("SNR (linear scale)")
axis("tight")
legend(loc=0)
class LTP(Quantizer):
def __init__(self, order, **options):
self.fir = FIR(a=r_[1.0, zeros(order)])
self.history = zeros(order)
self.ar = AR(a=zeros(order))
self.order = order
self.options = options
def encode(self, frame):
a = zeros_like(self.fir.a)
a[0] = 1.0
try:
offset, gain = ltp_parameters(self.history, frame, **self.options)
a[offset] = - gain
except ValueError:
offset, gain = 0, 0.0
self.fir.a[:] = a
error = self.fir(frame)
self.history = r_[self.history[len(frame):], frame]
return (offset, gain), error
def decode(self, data):
(offset, gain), error = data
a = zeros_like(self.ar.a)
a[offset-1] = gain
self.ar.a[:] = a
return self.ar(error)
f_min = 50.0
f_max = 400.0
order_ltp = int(df/f_min)
print order_ltp
offset_min = int(df/f_max)
print offset_min
def ltp_error(data, T=0.005, order=order_ltp, **options):
length = len(data)
n = int(T * df) # number of samples for T s at the given sampling frequency.
frames = audio.frames.split(data, n, pad=True)
ltp = LTP(order=order, **options)
error = zeros(n*len(frames))
offset = zeros_like(error)
gain = zeros_like(error)
for i, frame in enumerate(frames):
(offset_, gain_), error_frame = ltp.encode(frame)
error[i*n:(i+1)*n] = error_frame
offset[i*n:(i+1)*n] = ones_like(error_frame) * offset_
gain[i*n:(i+1)*n] = ones_like(error_frame) * gain_
error = error[:length]
offset = offset[:length]
gain = gain[:length]
return error, offset, gain
stp_error_ = stp_error(data)
ltp_error_, offset, gain = ltp_error(stp_error_, offset_min=offset_min, SNR_min=1.1)
figure()
n = len(data)
t = r_[0:n] / df
plot(t,data, "k", alpha=0.1, label="audio"); axis("tight")
plot(t, stp_error_, "r", label="STP error")
plot(t, ltp_error_, "g", label="LTP error")
legend(loc=0)
figure()
n = len(data)
t = r_[0:n] / df
plot(t, df / offset)
plot(t, 2*df / offset, "k.", alpha=0.5, ms=0.25)
plot(t, 3*df / offset, "k.", alpha=0.25, ms=0.25)
axis([t[0], t[-1], 00.0, 400.0])
ylabel("Frequency (Hz)")
xlabel("Time (s)")
figure()
n = len(data)
t = r_[0:n] / df
plot(t, gain)
axis("tight")
audio.io.play(data, df=df)
A = amax(abs(stp_error_))
audio.io.play(stp_error_/A, df=df)
A = amax(abs(ltp_error_))
audio.io.play(ltp_error_/A, df=df)