Vocal tract [WIP]
Ditty implementation of a kelly-lochbaum vocal tract, a-la dood.al/pinktrombone/
todo:
- use the same tongue positioning as pink trombone
- nasal tract
- phonemes
- (maybe) make a similar enough sounding formant synthesizer
Log in to post a comment.
// actual bad apple is a bit faster (138bpm), but this should help make it a bit easier to work with
ditty.bpm = 120;
// manual controls
input.forward = 0.5; // min=0, max=1, step=0.01
input.up = 0.5; // min=0, max=1, step=0.01
input.mouth = 0.5; // min=0, max=1, step=0.01
input.nose = 0.5; // min=0, max=1, step=0.01
// === vocal tract ===
function softsaw(p, formant) {
const x = p-~~p;
// polyblep
const f = Math.abs(formant);
const s = Math.max(0, Math.abs((f + 1) * (2*x - 1)) - f);
// saw - polyblep
return 2 * x - 1 - s*s * Math.sign(x - 0.5);
}
// air
const SPEED_OF_SOUND = 343; // roughly 343 m/s at 20 deg C
// vocal tract
const TRACT_LENGTH = 0.17; // 22 cm
const NOSE_LENGTH = 0.14; // 14 cm
const NOSE_START = 0.17; // 3 cm
const LIP_START = 0.15;
const TONGUE_START = 0.02;
// reflections
const GLOT_REFLECT = 0.75;
const LIP_REFLECT = -0.85;
// single tract
class Tract {
constructor(len, nose, junction, lip, tongue) {
// lengths, truncated
len = len|0;
nose = nose|0;
junction = junction|0;
lip = lip|0;
tongue = tongue|0;
// wave state
this.tl = new Float32Array(len).fill(0);
this.tr = new Float32Array(len).fill(0);
this.twl = new Float32Array(len).fill(0);
this.twr = new Float32Array(len).fill(0);
// areas
this.tarea = new Float32Array(len).fill(1);
// reflection coefficients
this.trefl = new Float32Array(len).fill(0);
// junction state
this.junction = junction;
// nose state
this.nl = new Float32Array(len).fill(0);
this.nr = new Float32Array(len).fill(0);
this.nwl = new Float32Array(len).fill(0);
this.nwr = new Float32Array(len).fill(0);
// areas
this.narea = new Float32Array(len).fill(1);
// reflection coefficients
this.nrefl = new Float32Array(len).fill(0);
// articulation
this.lip = lip;
this.tongue = tongue;
}
// calculate reflection coefficients
updateReflection() {
// tract
for (let i = 1; i < this.trefl.length; i++) {
const sum = this.tarea[i - 1] + this.tarea[i];
this.trefl[i] = Math.abs(sum) > 1e-6 ? (this.tarea[i - 1] - this.tarea[i]) / sum : 1;
}
// nose
for (let i = 1; i < this.nrefl.length; i++) {
const sum = this.narea[i - 1] + this.narea[i];
this.nrefl[i] = Math.abs(sum) > 1e-6 ? (this.narea[i - 1] - this.narea[i]) / sum : 1;
}
// nose junction
}
// propagate wave
propagate(v) {
// tract
this.twr[0] = this.tl[0] * GLOT_REFLECT + v; // glottal reflection
this.twl[this.twl.length - 1] = this.tr[this.tr.length - 1] * LIP_REFLECT; // lip reflection
for (let i = 1; i < this.tl.length; i++) {
const w = this.trefl[i] * (this.tr[i - 1] + this.tl[i]);
this.twr[i] = this.tr[i - 1] - w;
this.twl[i - 1] = this.tl[i] + w;
}
// propagate wave
for (let i = 0; i < this.tl.length; i++) {
this.tl[i] = this.twl[i] * 0.999;
this.tr[i] = this.twr[i] * 0.999;
}
// nose
}
// set the shape of the tract
shape(forward, up, nose, mouth) {
// nose, static shape
for (let i = 0; i < this.narea.length; i++) {}
// mouth
up += 2;
for (let i = 0; i < this.tarea.length; i++) {
if (i >= this.tongue && i < this.lip) {
// where in the mouth we are
const t = 1.1 * Math.PI * (forward * this.tarea.length - i) / (this.lip - this.tongue);
const d = 2 + (up - 2) / 1.5;
this.tarea[i] = Math.pow(1.5 - (1.5 - d + 1.7) * Math.cos(t), 2);
} else if (i >= this.lip) {
// where in the lip we are
const p = (i - this.lip) / (this.tarea.length - this.lip);
this.tarea[i] = Math.pow(1.5 - mouth * mouth * 4 * p * (1 - p), 2);
} else this.tarea[i] = 0.6 * 0.6; // default area
}
}
// get sound out
tick(v) {
this.updateReflection();
this.propagate(v);
// return the out waves
return [this.tr[this.tr.length - 1], 0];
}
}
const voc = synth.def(class {
constructor(options) {
// formant phase
this.select = 0;
// exciter phase
this.phase = 0;
// vocal tract
this.tract = new Tract(
ditty.sampleRate * TRACT_LENGTH / SPEED_OF_SOUND,
ditty.sampleRate * NOSE_LENGTH / SPEED_OF_SOUND,
ditty.sampleRate * NOSE_START / SPEED_OF_SOUND,
ditty.sampleRate * LIP_START / SPEED_OF_SOUND,
ditty.sampleRate * TONGUE_START / SPEED_OF_SOUND
);
//this.tract.shape(Math.random(), Math.random(), Math.random(), Math.random());
}
// select a mouth shape
selectShape() {
// how much to blend
const fblend = this.select - this.select|0;
// first index
const fa = options.voice[0 + this.select|0];
// second index
const fb = options.voice[1 + this.select|0] || fa;
return {
};
}
process(note, env, tick, options) {
// phase bookkeeping
const dt = midi_to_hz(note) * ditty.dt;
this.phase += dt;
this.phase -= this.phase|0;
// formant bookkeeping
const fdt = (options.voice.length - 1) * ditty.dt / tick_to_second(options.duration);
this.select += fdt;
// update diameters
// TODO
// tick tracts
// see https://dood.al/pinktrombone/
// TODO
// turbulence?
// TODO
// glottal air wave
// technically not entirely correct like this, but is a lot easier to do
const glottal = env.value * softsaw(this.phase, 4);
this.tract.shape(input.forward, input.up, input.nose, input.mouth);
const [mouth, nose] = this.tract.tick(glottal);
// logging
debug.log("Frequency", midi_to_hz(note));
debug.probe("Glottal flow", glottal, 1, 4 / midi_to_hz(note));
debug.probe("Lips out", mouth, 2, 6 / midi_to_hz(note));
debug.probe("Nose out", nose, 2, 6 / midi_to_hz(note));
// out!
return mouth * 0.5;
}
}, { attack: 0.05, release: 0.05 });
// === melody ===
// sing the vocals
// similar to patmerge, but also allows providing vocals
function sing(instr, pat, time, vocals) {
const notes = [];
for (const note of pat) {
if (notes.length && notes[notes.length - 2] == note) notes[notes.length - 1] += time;
else notes.push(note, time);
}
for (let i = 0; i < notes.length / 2; i++) {
if (notes[i * 2] > 0) instr.play(notes[i * 2], { duration: notes[i * 2 + 1], voice: vocals[i] });
sleep(notes[i * 2 + 1]);
}
}
// vocals!
// first verse from bad apple
// vocals, verse
// 64 beats
function vocalsVerse(vc) {
// + lyrics, the romanization is slightly shuffled around to help me fit it to the notes
// nagareteku toki
sing(vc, [ds4, f4, fs4, gs4, as4, as4, ds5, cs5],
1/2, ["na", "ga", "re", "te", "ku", "to", "ki"]);
// no na ka de demo
sing(vc, [as4, as4, ds4, ds4, as4, gs4, fs4, f4],
1/2, ["no", "na", "ka", "de", "de", "mo"]);
// kedarusa ga hora
sing(vc, [ds4, f4, fs4, gs4, as4, as4, gs4, fs4],
1/2, ["ke", "da", "ru", "sa", "ga", "ho", "ra"]);
// guruguru mawa ate
sing(vc, [f4, ds4, f4, fs4, f4, ds4, d4, f4],
1/2, ["gu", "ru", "gu", "ru", "ma", "wa", "a", "te"]);
// watashi ka ra hana
sing(vc, [ds4, f4, fs4, gs4, as4, as4, ds5, cs5],
1/2, ["wa", "ta", "shi", "ka", "ra", "ha", "na"]);
// re ru kokoro mo
sing(vc, [as4, as4, ds4, ds4, as4, gs4, fs4, f4],
1/2, ["re", "ru", "ko", "ko", "ro", "mo"]);
// mi en ai wa so o
sing(vc, [ds4, f4, fs4, gs4, as4, as4, gs4, fs4],
1/2, ["mi", "en", "a", "i", "wa", "so", "o"]);
// shi ra na i?
sing(vc, [f4, fs4, gs4, as4],
1/1, ["shi", "ra", "na", "i"]);
// jibu un ka ra -- u go
sing(vc, [ds4, f4, fs4, gs4, as4, as4, ds5, cs5],
1/2, ["ji", "bu", "un", "ka", "ra", "u", "go"]);
// ku ko to mo naku
sing(vc, [as4, as4, ds4, ds4, as4, gs4, fs4, f4],
1/2, ["ku", "ko", "to", "mo", "na", "ku"]);
// toki no suki ma ni
sing(vc, [ds4, f4, fs4, gs4, as4, as4, gs4, fs4],
1/2, ["to", "ki", "no", "su", "ki", "ma", "ni"]);
// nagasare tsuzukete
sing(vc, [f4, ds4, f4, fs4, f4, ds4, d4, f4],
1/2, ["na", "ga", "sa", "re", "tsu", "zu", "ke", "te"]);
// Shirana i wa mawa
sing(vc, [ds4, f4, fs4, gs4, as4, as4, ds5, cs5],
1/2, ["shi", "ra", "na", "i", "wa", "ma", "wa"]);
// ri no koto nado
sing(vc, [as4, as4, ds4, ds4, as4, gs4, fs4, f4],
1/2, ["ri", "no", "ko", "to", "na", "do"]);
// watashi wa watashi
sing(vc, [ds4, f4, fs4, gs4, as4, as4, gs4, fs4],
1/2, ["wa", "ta", "shi", "wa", "wa", "ta", "shi"]);
// sore dake
sing(vc, [f4, fs4, gs4, as4],
1/1, ["so", "re", "da", "ke"]);
}
// Main loop, connected to the filter we just created
loop(() => {
// voice articulation demo
sing(voc, [ds4, f4, gs4, as4], 6, ["sus", "amo", "gus", "sus"]);
// sing bad apple
vocalsVerse(voc);
}, { name: 'Vocals' });