/**
* This module contains code for converting to and from the Textgrid
* datastructure. Textgrid files are typically stored as plain text.
* This library does not do actual file IO but instead converts
* to and from loaded strings to instances of Textgrid.
*
* @author Tim Mahrt
* @since March 25, 2015
* @module textgrid_io
*/
import iconvlite from 'iconv-lite';
import {
Textgrid, IntervalTier, PointTier, copyTier,
POINT_TIER, INTERVAL_TIER, MIN_INTERVAL_LENGTH
} from './textgrid';
import {
cropTextgrid
} from './textgrid_modifiers';
/**
* Python-like split from
* http://stackoverflow.com/questions/6131195/javascript-splitting-string-from-the-first-comma
* @param {string} str
* @param {string} separator - the separator to split on
* @param {number} max - the max number of times to split
* @return {Array}
* @ignore
*/
function extendedSplit (str, separator, max) {
const out = [];
let index = 0;
let next;
if (max) {
while (out.length < max - 1) {
next = str.indexOf(separator, index);
if (next === -1) {
break;
}
out.push(str.substring(index, next));
index = next + separator.length;
}
}
out.push(str.substring(index));
return out;
}
function findAllSubstrings (sourceStr, subStr) {
const indexList = [];
let index = sourceStr.indexOf(subStr);
while (index !== -1) {
indexList.push(index);
index += 1;
index = sourceStr.indexOf(subStr, index);
}
return indexList;
}
function fetchRow (dataStr, searchStr, index) {
const startIndex = dataStr.indexOf(searchStr, index) + searchStr.length;
let endIndex = dataStr.indexOf('\n', startIndex);
let word = dataStr.substring(startIndex, endIndex);
word = word.trim();
if (word[0] === '"' && word[word.length - 1] === '"') {
word = word.substring(1, word.length - 1);
}
word = word.trim();
// Increment the index by 1, unless nothing was found
if (endIndex !== -1) endIndex += 1;
return [word, endIndex];
}
function parseNormalTextgrid (data) {
// Toss header
const tierList = data.split('item [');
const textgridHeader = tierList.shift();
const tgMin = parseFloat(textgridHeader.split('xmin = ', 2)[1].split('\n', 1)[0].trim());
const tgMax = parseFloat(textgridHeader.split('xmax = ', 2)[1].split('\n', 1)[0].trim());
// Process each tier individually
// tierList = data.split('item');
// tierList = tierList[1,tierList.length];
let tierTxt = '';
tierList.shift(); // Removing the document root empty item
const textgrid = new Textgrid();
textgrid.minTimestamp = tgMin;
textgrid.maxTimestamp = tgMax;
for (let i = 0; i < tierList.length; i++) {
tierTxt = tierList[i];
// Get tier type
let tierType = POINT_TIER;
let searchWord = 'points';
if (tierTxt.indexOf('class = "IntervalTier"') > -1) {
tierType = INTERVAL_TIER;
searchWord = 'intervals';
}
// Get tier meta-information
const tmpArray = extendedSplit(tierTxt, searchWord + ':', 2);
const header = tmpArray[0];
const tierData = tmpArray[1];
let tierName = header.split('name = ', 2)[1].split('\n', 1)[0].trim();
tierName = tierName.slice(1, tierName.length - 1); // remove quotes
const tierStart = header.split('xmin = ', 2)[1].split('\n', 1)[0].trim();
const tierEnd = header.split('xmax = ', 2)[1].split('\n', 1)[0].trim();
// Get the tier entry list
const entryList = [];
let labelI = 0;
let label = null;
let tier = null;
if (tierType === INTERVAL_TIER) {
let timeStartI = null;
let timeEndI = null;
let timeStart = null;
let timeEnd = null;
while (true) {
[timeStart, timeStartI] = fetchRow(tierData, 'xmin = ', labelI);
// Break condition here. indexof loops around at the end of a file
if (timeStartI <= labelI) break;
[timeEnd, timeEndI] = fetchRow(tierData, 'xmax = ', timeStartI);
[label, labelI] = fetchRow(tierData, 'text =', timeEndI);
label = label.trim();
entryList.push([parseFloat(timeStart), parseFloat(timeEnd), label]);
}
tier = new IntervalTier(tierName, entryList, tierStart, tierEnd);
} else {
let timePointI = null;
let timePoint = null;
while (true) {
[timePoint, timePointI] = fetchRow(tierData, 'number = ', labelI);
// Break condition here. indexof loops around at the end of a file
if (timePointI <= labelI) break;
[label, labelI] = fetchRow(tierData, 'mark =', timePointI);
label = label.trim();
entryList.push([parseFloat(timePoint), label]);
}
tier = new PointTier(tierName, entryList, tierStart, tierEnd);
}
textgrid.addTier(tier);
}
return textgrid;
}
function parseShortTextgrid (data) {
const indexList = [];
const intervalIndicies = findAllSubstrings(data, '"IntervalTier"');
for (let i = 0; i < intervalIndicies.length; i++) {
indexList.push([intervalIndicies[i], true]);
}
const pointIndicies = findAllSubstrings(data, '"TextTier"');
for (let i = 0; i < pointIndicies.length; i++) {
indexList.push([pointIndicies[i], false]);
}
indexList.push([data.length, null]); // The 'end' of the file
indexList.sort(function (x, y) {
return x[0] - y[0];
});
const tupleList = [];
for (let i = 0; i < indexList.length - 1; i++) {
tupleList.push([indexList[i][0], indexList[i + 1][0], indexList[i][1]]);
}
// Set the textgrid's min and max times
const header = data.slice(0, tupleList[0][0]);
const headerList = header.split('\n');
const tgMin = parseFloat(headerList[3]);
const tgMax = parseFloat(headerList[4]);
// Add the textgrid tiers
const textgrid = new Textgrid();
textgrid.minTimestamp = tgMin;
textgrid.maxTimestamp = tgMax;
for (let i = 0; i < tupleList.length; i++) {
let tier = null;
const blockStartI = tupleList[i][0];
const blockEndI = tupleList[i][1];
const isInterval = tupleList[i][2];
const tierData = data.slice(blockStartI, blockEndI);
const metaStartI = fetchRow(tierData, '', 0)[1];
// Tier meta-information
const [tierName, tierNameEndI] = fetchRow(tierData, '', metaStartI);
let [tierStartTime, tierStartTimeI] = fetchRow(tierData, '', tierNameEndI);
let [tierEndTime, tierEndTimeI] = fetchRow(tierData, '', tierStartTimeI);
let startTimeI = fetchRow(tierData, '', tierEndTimeI)[1];
tierStartTime = parseFloat(tierStartTime);
tierEndTime = parseFloat(tierEndTime);
// Tier entry data
let startTime = null;
let endTime = null;
let label = null;
// let tierType = null;
let endTimeI = null;
let labelI = null;
const entryList = [];
if (isInterval === true) {
while (true) {
[startTime, endTimeI] = fetchRow(tierData, '', startTimeI);
if (endTimeI === -1) break;
[endTime, labelI] = fetchRow(tierData, '', endTimeI);
[label, startTimeI] = fetchRow(tierData, '', labelI);
label = label.trim();
entryList.push([startTime, endTime, label]);
}
tier = new IntervalTier(tierName, entryList, tierStartTime, tierEndTime);
} else {
while (true) {
[startTime, labelI] = fetchRow(tierData, '', startTimeI);
if (labelI === -1) break;
[label, startTimeI] = fetchRow(tierData, '', labelI);
label = label.trim();
entryList.push([startTime, label]);
}
tier = new PointTier(tierName, entryList, tierStartTime, tierEndTime);
}
textgrid.addTier(tier);
}
return textgrid;
}
/**
* Fills in the space between intervals with empty space.
* This is necessary to do when saving to create a well-formed textgrid.
* @ignore
*/
function fillInBlanks (tier, blankLabel = '', startTime = null, endTime = null) {
if (startTime === null) startTime = tier.minTimestamp;
if (endTime === null) endTime = tier.maxTimestamp;
// Special case: empty textgrid
if (tier.entryList.length === 0) tier.entryList.push([startTime, endTime, blankLabel]);
// Create a new entry list
const entryList = tier.entryList.slice();
const entry = entryList[0];
let prevEnd = parseFloat(entry[1]);
const newEntryList = [entry];
for (let i = 1; i < entryList.length; i++) {
const newStart = parseFloat(entryList[i][0]);
const newEnd = parseFloat(entryList[i][1]);
if (prevEnd < newStart) newEntryList.push([prevEnd, newStart, blankLabel]);
newEntryList.push(entryList[i]);
prevEnd = newEnd;
}
// Special case: If there is a gap at the start of the file
if (parseFloat(newEntryList[0][0]) < parseFloat(startTime)) {
throw new Error('Tier data is before the tier start time.');
}
if (parseFloat(newEntryList[0][0]) > parseFloat(startTime)) {
newEntryList.splice(0, 0, [startTime, newEntryList[0][0], blankLabel]);
}
// Special case: If there is a gap at the end of the file
if (endTime !== null) {
const lastI = newEntryList.length - 1
if (parseFloat(newEntryList[lastI][1]) > parseFloat(endTime)) {
throw new Error('Tier data is after the tier end time.');
}
if (parseFloat(newEntryList[lastI][1]) < parseFloat(endTime)) {
newEntryList.push([newEntryList[lastI][1], endTime, blankLabel]);
}
}
return copyTier(tier, { entryList: newEntryList });
}
/**
* Prints each entry in the tier on a separate line w/ timing info
* @ignore
*/
function tierToText (tier) {
let text = ''
text += `"${tier.tierType}"\n`;
text += `"${tier.name}"\n`;
text += `${tier.minTimestamp}\n${tier.maxTimestamp}\n`;
text += `${tier.entryList.length}\n`;
for (let i = 0; i < tier.entryList.length; i++) {
let entry = tier.entryList[i];
entry = entry.map(val => `${val}`);
let labelI;
if (tier.tierType === POINT_TIER) {
labelI = 1;
}
else if (tier.tierType === INTERVAL_TIER) {
labelI = 2;
}
entry[labelI] = `"${entry[labelI]}"`
text += entry.join('\n') + '\n';
}
return text
}
/**
* Remove intervals that are very tiny
* Doing many small manipulations on intervals can lead to the creation
* of ultrashort intervals (e.g. 1*10^-15 seconds long). This function
* removes such intervals.
* @ignore
*/
function removeUltrashortIntervals (tier, minLength, minTimestamp) {
// First, remove tiny intervals
const newEntryList = [];
let j = 0;
for (let i = 0; i < tier.entryList.length; i++) {
const [start, stop, label] = tier.entryList[i];
if (stop - start < minLength) {
// Correct ultra-short entries
if (newEntryList.length > 0) {
newEntryList[j - 1] = (newEntryList[j - 1], stop, newEntryList[j - 1]);
}
} else {
// Special case: the first entry in oldEntryList was ultra-short
if (newEntryList.length === 0 && start !== minTimestamp) {
newEntryList.push([minTimestamp, stop, label]);
} else { // Normal case
newEntryList.push([start, stop, label]);
}
j += 1;
}
}
// Next, shift near equivalent tiny boundaries
j = 0;
while (j < newEntryList.length - 1) {
const diff = Math.abs(newEntryList[j][1] - newEntryList[j + 1][0]);
if (diff > 0 && diff < minLength) {
newEntryList[j] = [newEntryList[j][0], newEntryList[j + 1][0], newEntryList[j][2]];
}
j += 1;
}
return copyTier(tier, { entryList: newEntryList });
}
/**
* Formats a textgrid instance for saving to a .csv file
* @param {Textgrid} tg
* @param {string} pivotTierName - One row in the output is listed for each entry in this tier.
* The corresponding entry in each tier will be provided on the same row
* along with the start and end time of the entry from the pivot tier.
* @param {Array} [tierNameList=null] - the list of tier names to save. If null, save all tiers.
* @return {text}
*/
function serializeTextgridToCsv (tg, pivotTierName, tierNameList = null, includeHeader = true) {
if (!tierNameList) tierNameList = tg.tierNameList;
let table = [];
if (includeHeader === true) {
const colHeader = tierNameList.slice();
colHeader.push('Start Time');
colHeader.push('End Time');
table.push(colHeader);
}
const tier = tg.tierDict[pivotTierName];
for (let i = 0; i < tier.entryList.length; i++) {
const start = tier.entryList[i][0];
const stop = tier.entryList[i][1];
// let label = tier.entryList[i][2];
const subTG = cropTextgrid(tg, start, stop, 'truncated', false);
const row = [];
for (let j = 0; j < tierNameList.length; j++) {
let subLabel = '';
if (subTG.tierNameList.includes(tierNameList[j])) {
const subTier = subTG.tierDict[tierNameList[j]];
if (subTier.entryList.length > 0) {
subLabel = subTier.entryList[0][2];
}
}
row.push(subLabel);
}
row.push(start);
row.push(stop);
table.push(row);
}
table = table.map(row => row.join(','));
const csv = table.join('\n');
return csv;
}
/**
* Formats a textgrid instance for saving to a .TextGrid file.
* @param {Textgrid} tg
* @param {number} [minimumIntervalLength=MIN_INTERVAL_LENGTH] - remove all intervals shorter than this; if null, don't remove any intervals
* @param {number} [minTimestamp = null] -- the minTimestamp of the saved Textgrid; if None, use whatever is defined in the Textgrid object. If minTimestamp is larger than timestamps in your textgrid, an exception will be thrown.
* @param {number} [maxTimestamp = null] -- the maxTimestamp of the saved Textgrid; if None, use whatever is defined in the Textgrid object. If maxTimestamp is larger than timestamps in your textgrid, an exception will be thrown.
* @param {boolean} [useShortForm = true] -- specifies whether to use the short or long form specification of a textgrid; the long form is more human readable, the short form is more compact
* @return A text representation of a textgrid that can be opened by Praat
*/
function serializeTextgrid (tg, minimumIntervalLength = MIN_INTERVAL_LENGTH, minTimestamp = null, maxTimestamp = null, useShortForm = true) {
if (minTimestamp === null) minTimestamp = tg.minTimestamp;
if (maxTimestamp === null) maxTimestamp = tg.maxTimestamp;
const outputTg = prepTgForSaving(tg, minimumIntervalLength, minTimestamp, maxTimestamp);
let outputTxt = '';
if (useShortForm) {
outputTxt = tgToShortTextForm(outputTg, minTimestamp, maxTimestamp);
} else {
outputTxt = tgToLongTextForm(outputTg, minTimestamp, maxTimestamp);
}
return outputTxt;
}
function tgToShortTextForm (tg, minTimestamp, maxTimestamp) {
let outputTxt = '';
outputTxt += 'File type = "ooTextFile"\n';
outputTxt += 'Object class = "TextGrid"\n\n';
outputTxt += `${minTimestamp}\n${maxTimestamp}\n`;
outputTxt += `<exists>\n${tg.tierNameList.length}\n`;
for (let i = 0; i < tg.tierNameList.length; i++) {
outputTxt += tierToText(tg.tierDict[tg.tierNameList[i]]);
}
return outputTxt;
}
function tgToLongTextForm (tg, minTimestamp, maxTimestamp) {
const tab = ' '.repeat(4);
let outputTxt = '';
// File header
outputTxt += 'File type = "ooTextFile"\n';
outputTxt += 'Object class = "TextGrid"\n\n';
outputTxt += `xmin = ${minTimestamp} \n`
outputTxt += `xmax = ${maxTimestamp} \n`
outputTxt += 'tiers? <exists> \n'
outputTxt += `size = ${tg.tierNameList.length} \n`
outputTxt += 'item []: \n'
for (let i = 0; i < tg.tierNameList.length; i++) {
const tierName = tg.tierNameList[i];
const tier = tg.tierDict[tierName];
// Interval header
outputTxt += tab + `item [${i + 1}]:\n`
outputTxt += tab.repeat(2) + `class = "${tier.tierType}" \n`
outputTxt += tab.repeat(2) + `name = "${tierName}" \n`
outputTxt += tab.repeat(2) + `xmin = ${minTimestamp} \n`
outputTxt += tab.repeat(2) + `xmax = ${maxTimestamp} \n`
if (tier.tierType === INTERVAL_TIER) {
outputTxt += tab.repeat(2) + `intervals: size = ${tier.entryList.length} \n`
for (let j = 0; j < tier.entryList.length; j++) {
const [start, stop, label] = tier.entryList[j];
outputTxt += tab.repeat(2) + `intervals [${j + 1}]:\n`
outputTxt += tab.repeat(3) + `xmin = ${start} \n`
outputTxt += tab.repeat(3) + `xmax = ${stop} \n`
outputTxt += tab.repeat(3) + `text = "${label}" \n`
}
} else {
outputTxt += tab.repeat(2) + `points: size = ${tier.entryList.length} \n`
for (let j = 0; j < tier.entryList.length; j++) {
const [timestamp, label] = tier.entryList[j];
outputTxt += tab.repeat(2) + `points [${j + 1}]:\n`
outputTxt += tab.repeat(3) + `number = ${timestamp} \n`
outputTxt += tab.repeat(3) + `mark = "${label}" \n`
}
}
}
return outputTxt;
}
/**
* Processing done before every textgrid is saved (serializeTextgrid calls this function) -- gaps are filled with silence and short intervals can be removed
* @param {Textgrid} tg
* @param {number} [minimumIntervalLength=MIN_INTERVAL_LENGTH] - remove all intervals shorter than this; if null, don't remove any intervals
* @param {number} [minTimestamp = null] -- the minTimestamp of the saved Textgrid; if None, use whatever is defined in the Textgrid object. If minTimestamp is larger than timestamps in your textgrid, an exception will be thrown.
* @param {number} [maxTimestamp = null] -- the maxTimestamp of the saved Textgrid; if None, use whatever is defined in the Textgrid object. If maxTimestamp is larger than timestamps in your textgrid, an exception will be thrown.
* @return A cleaned TextGrid
*/
function prepTgForSaving (tg, minimumIntervalLength = MIN_INTERVAL_LENGTH, minTimestamp = null, maxTimestamp = null) {
if (minTimestamp === null) minTimestamp = tg.minTimestamp;
if (maxTimestamp === null) maxTimestamp = tg.maxTimestamp;
for (let i = 0; i < tg.tierNameList.length; i++) {
tg.tierDict[tg.tierNameList[i]].sort();
}
// Fill in the blank spaces for interval tiers
for (let i = 0; i < tg.tierNameList.length; i++) {
const tierName = tg.tierNameList[i];
let tier = tg.tierDict[tierName];
if (tier instanceof IntervalTier) {
tier = fillInBlanks(tier, '', minTimestamp, maxTimestamp);
if (minimumIntervalLength !== null) {
tier = removeUltrashortIntervals(tier, minimumIntervalLength, minTimestamp);
}
tg.tierDict[tierName] = tier;
}
}
for (let i = 0; i < tg.tierNameList.length; i++) {
tg.tierDict[tg.tierNameList[i]].sort();
}
return tg;
}
/**
* Creates an instance of a Textgrid from the contents of a .Textgrid file.
* @param {Buffer|string} text - can be either a buffer or a raw text string
* @param {boolean} readRaw - default false; if true, points and intervals with an empty label '' are removed
* @return {Textgrid}
*/
function parseTextgrid (text, readRaw = false) {
text = decodeBuffer(text);
text = text.replace(/\r\n/g, '\n');
let textgrid;
const caseA = text.indexOf('ooTextFile short') > -1; // 'short' in header
const caseB = text.indexOf('item [') === -1; // 'item' keyword not in file
if (caseA || caseB) {
textgrid = parseShortTextgrid(text);
} else {
textgrid = parseNormalTextgrid(text);
}
if (readRaw === false) {
for (let i = 0; i < textgrid.tierNameList.length; i++) {
const tierName = textgrid.tierNameList[i];
const tier = removeBlanks(textgrid.tierDict[tierName]);
textgrid.replaceTier(tierName, tier);
}
}
return textgrid;
}
function removeBlanks (tier) {
const entryList = [];
for (let i = 0; i < tier.entryList.length; i++) {
const entry = tier.entryList[i];
if (entry[entry.length - 1] === '') {
continue;
}
entryList.push(entry);
}
return copyTier(tier, { entryList: entryList });
}
/**
* Decodes a buffer from utf16/8 to text.
* @param {Buffer} buffer - if not of type Buffer, it will be returned without modification.
* @return {string}
* @ignore
*/
function decodeBuffer (buffer) {
let returnText = buffer
if (Buffer.isBuffer(buffer)) {
let decodedText = iconvlite.decode(buffer, 'utf16');
if (decodedText.indexOf('ooTextFile') === -1) {
decodedText = iconvlite.decode(buffer, 'utf8');
}
returnText = decodedText;
}
return returnText;
}
export { parseTextgrid, serializeTextgrid, serializeTextgridToCsv, decodeBuffer, prepTgForSaving };