praatio.utilities.textgrid_io

  1import re
  2import json
  3from typing import Optional, Tuple, List, Any, Dict, Match
  4
  5from typing_extensions import Literal
  6
  7from praatio.utilities import errors
  8from praatio.utilities import my_math
  9from praatio.utilities import utils
 10from praatio.utilities.constants import (
 11    TextgridFormats,
 12    MIN_INTERVAL_LENGTH,
 13    Interval,
 14    Point,
 15    INTERVAL_TIER,
 16    POINT_TIER,
 17)
 18
 19
 20def reSearch(pattern, string, flags=None) -> Match[str]:
 21    """Search for the string to match. Throws an error if no match is found."""
 22    if flags:
 23        matches = re.search(pattern, string, flags)
 24    else:
 25        matches = re.search(pattern, string)
 26
 27    if not matches:
 28        raise errors.ParsingError("Expected field in Textgrid missing.")
 29
 30    return matches
 31
 32
 33def _removeBlanks(tier: Dict) -> None:
 34    def hasContent(entry):
 35        return entry[-1] != ""
 36
 37    tier["entries"] = filter(hasContent, tier["entries"])
 38
 39
 40def _removeUltrashortIntervals(
 41    tier: Dict, minLength: float, minTimestamp: float
 42) -> None:
 43    """Remove intervals that are very tiny
 44
 45    Doing many small manipulations on intervals can lead to the creation
 46    of ultrashort intervals (e.g. 1*10^-15 seconds long).  This function
 47    removes such intervals.
 48    """
 49
 50    # First, remove tiny intervals
 51    newEntries: List[Interval] = []
 52    j = 0  # index to newEntries
 53    for start, end, label in tier["entries"]:
 54
 55        if end - start < minLength:
 56            # Correct ultra-short entries
 57            if len(newEntries) > 0:
 58                lastStart, _, lastLabel = newEntries[j - 1]
 59                newEntries[j - 1] = Interval(lastStart, end, lastLabel)
 60        else:
 61            # Special case: the first entry in oldEntries was ultra-short
 62            if len(newEntries) == 0 and start != minTimestamp:
 63                newEntries.append(Interval(minTimestamp, end, label))
 64            # Normal case
 65            else:
 66                newEntries.append(Interval(start, end, label))
 67            j += 1
 68
 69    # Next, shift near equivalent tiny boundaries
 70    # This will link intervals that were connected by an interval
 71    # that was shorter than minLength
 72    j = 0
 73    while j < len(newEntries) - 1:
 74        diff = abs(newEntries[j][1] - newEntries[j + 1][0])
 75        if diff > 0 and diff < minLength:
 76            newEntries[j] = Interval(
 77                newEntries[j][0],
 78                newEntries[j + 1][0],
 79                newEntries[j][2],
 80            )
 81        j += 1
 82
 83    tier["entries"] = newEntries
 84
 85
 86def _fillInBlanks(
 87    tier: Dict,
 88    blankLabel: str = "",
 89    minTime: Optional[float] = None,
 90    maxTime: Optional[float] = None,
 91) -> None:
 92    """Fills in the space between intervals with empty space
 93
 94    This is necessary to do when saving to create a well-formed textgrid
 95    """
 96    if minTime is None:
 97        minTime = tier["xmin"]
 98
 99    if maxTime is None:
100        maxTime = tier["xmax"]
101
102    # Special case: empty textgrid
103    if len(tier["entries"]) == 0:
104        tier["entries"].append((minTime, maxTime, blankLabel))
105
106    # Create a new entry list
107    entries = tier["entries"]
108    entry = entries[0]
109    prevEnd = float(entry[1])
110    newEntries = [entry]
111    for entry in entries[1:]:
112        newStart = float(entry[0])
113        newEnd = float(entry[1])
114
115        if prevEnd < newStart:
116            newEntries.append((prevEnd, newStart, blankLabel))
117        newEntries.append(entry)
118
119        prevEnd = newEnd
120
121    # Special case: If there is a gap at the start of the file
122    if float(newEntries[0][0]) < float(minTime):
123        raise errors.ParsingError(
124            "The entries are shorter than the min time specified in the textgrid."
125        )
126    if float(newEntries[0][0]) > float(minTime):
127        newEntries.insert(0, (minTime, newEntries[0][0], blankLabel))
128
129    # Special case -- if there is a gap at the end of the file
130    if maxTime is not None:
131        if float(newEntries[-1][1]) > float(maxTime):
132            raise errors.ParsingError(
133                "The entries are longer than the max time specified in the textgrid."
134            )
135        if float(newEntries[-1][1]) < float(maxTime):
136            newEntries.append((newEntries[-1][1], maxTime, blankLabel))
137
138    newEntries.sort()
139    tier["entries"] = newEntries
140
141
142def parseTextgridStr(data: str, includeEmptyIntervals: bool = False) -> Dict:
143    """Converts a string representation of a Textgrid into a dictionary
144
145    https://www.fon.hum.uva.nl/praat/manual/TextGrid_file_formats.html
146
147    Args:
148        fnFullPath: the path to the textgrid to open
149        includeEmptyIntervals: if False, points and intervals with
150             an empty label '' are not included in the returned dictionary
151
152    Returns:
153        Dictionary
154    """
155
156    try:
157        tgAsDict = json.loads(data)
158        if "start" in tgAsDict.keys():  # Using simplified json format
159            tgAsDict = _upconvertDictionaryFromJson(tgAsDict)
160    except ValueError:
161        caseA = "ooTextFile short" in data
162        caseB = "item [" not in data
163        if caseA or caseB:
164            tgAsDict = _parseShortTextgrid(data)
165        else:
166            tgAsDict = _parseNormalTextgrid(data)
167
168    if includeEmptyIntervals is False:
169        for tier in tgAsDict["tiers"]:
170            _removeBlanks(tier)
171
172    return tgAsDict
173
174
175def getTextgridAsStr(
176    tg: Dict,
177    format: Literal["short_textgrid", "long_textgrid", "json", "textgrid_json"],
178    includeBlankSpaces: bool,
179    minTimestamp: Optional[float] = None,
180    maxTimestamp: Optional[float] = None,
181    minimumIntervalLength: float = MIN_INTERVAL_LENGTH,
182) -> str:
183    """Converts a textgrid to a string, suitable for saving
184
185    Args:
186        tg: the textgrid to convert to a string
187        format: one of ['short_textgrid', 'long_textgrid', 'json', 'textgrid_json']
188        includeBlankSpaces: if True, blank sections in interval
189            tiers will be filled in with an empty interval
190            (with a label of "")
191        minTimestamp: the minTimestamp of the saved Textgrid;
192            if None, use whatever is defined in the Textgrid object.
193            If minTimestamp is larger than timestamps in your textgrid,
194            an exception will be thrown.
195        maxTimestamp: the maxTimestamp of the saved Textgrid;
196            if None, use whatever is defined in the Textgrid object.
197            If maxTimestamp is smaller than timestamps in your textgrid,
198            an exception will be thrown.
199        minimumIntervalLength: any labeled intervals smaller
200            than this will be removed, useful for removing ultrashort
201            or fragmented intervals; if None, don't remove any.
202            Removed intervals are merged (without their label) into
203            adjacent entries.
204
205    Returns:
206        a string representation of the textgrid
207    """
208
209    utils.validateOption("format", format, TextgridFormats)
210
211    tg = _prepTgForSaving(
212        tg, includeBlankSpaces, minTimestamp, maxTimestamp, minimumIntervalLength
213    )
214
215    if format == TextgridFormats.LONG_TEXTGRID:
216        outputTxt = _tgToLongTextForm(tg)
217    elif format == TextgridFormats.SHORT_TEXTGRID:
218        outputTxt = _tgToShortTextForm(tg)
219    elif format == TextgridFormats.JSON:
220        outputTxt = _tgToJson(_downconvertDictionaryForJson(tg))
221    elif format == TextgridFormats.TEXTGRID_JSON:
222        outputTxt = _tgToJson(tg)
223
224    return outputTxt
225
226
227def _upconvertDictionaryFromJson(tgAsDict: dict) -> dict:
228    """
229    Convert from the sparse json format to the one shaped more literally like a textgrid
230    """
231    transformedDict = {}
232    transformedDict["xmin"] = tgAsDict["start"]
233    transformedDict["xmax"] = tgAsDict["end"]
234    transformedDict["tiers"] = []
235
236    for tierName in tgAsDict["tiers"].keys():
237        tier = tgAsDict["tiers"][tierName]
238        transformedDict["tiers"].append(
239            {
240                "class": tier["type"],
241                "name": tierName,
242                "xmin": tgAsDict["start"],
243                "xmax": tgAsDict["end"],
244                "entries": tier["entries"],
245            }
246        )
247
248    return transformedDict
249
250
251def _downconvertDictionaryForJson(tgAsDict: Dict) -> dict:
252    """
253    Convert from the textgrid-shaped json format to a more minimal json format
254    """
255    tiers = {}
256    for tier in tgAsDict["tiers"]:
257        tiers[tier["name"]] = {
258            "type": tier["class"],
259            "entries": tier["entries"],
260        }
261
262    return {
263        "start": tgAsDict["xmin"],
264        "end": tgAsDict["xmax"],
265        "tiers": tiers,
266    }
267
268
269def _sortEntries(tg: Dict) -> None:
270    for tier in tg["tiers"]:
271        tier["entries"] = sorted(tier["entries"])
272
273
274def _prepTgForSaving(
275    tg: Dict,
276    includeBlankSpaces: Optional[bool],
277    minTimestamp: Optional[float],
278    maxTimestamp: Optional[float],
279    minimumIntervalLength: float,
280) -> Dict:
281    _sortEntries(tg)
282
283    if minTimestamp is None:
284        minTimestamp = tg["xmin"]
285    else:
286        tg["xmin"] = minTimestamp
287
288    if maxTimestamp is None:
289        maxTimestamp = tg["xmax"]
290    else:
291        tg["xmax"] = maxTimestamp
292
293    # Fill in the blank spaces for interval tiers
294    if includeBlankSpaces:
295        newTierList = []
296        for tier in tg["tiers"]:
297            if tier["class"] == POINT_TIER:
298                newTierList.append(tier)
299                continue
300
301            _fillInBlanks(tier, "", minTimestamp, maxTimestamp)
302            if minimumIntervalLength is not None:
303                _removeUltrashortIntervals(tier, minimumIntervalLength, minTimestamp)
304
305    _sortEntries(tg)
306
307    return tg
308
309
310def _tgToShortTextForm(
311    tg: Dict,
312) -> str:
313
314    # Header
315    outputTxt = ""
316    outputTxt += 'File type = "ooTextFile"\n'
317    outputTxt += 'Object class = "TextGrid"\n\n'
318    outputTxt += "%s\n%s\n" % (
319        my_math.numToStr(tg["xmin"]),
320        my_math.numToStr(tg["xmax"]),
321    )
322    outputTxt += "<exists>\n%d\n" % len(tg["tiers"])
323    for tier in tg["tiers"]:
324        text = ""
325        text += '"%s"\n' % tier["class"]
326        text += '"%s"\n' % utils.escapeQuotes(tier["name"])
327        text += "%s\n%s\n%s\n" % (
328            my_math.numToStr(tier["xmin"]),
329            my_math.numToStr(tier["xmax"]),
330            len(tier["entries"]),
331        )
332
333        for entry in tier["entries"]:
334            entry = [my_math.numToStr(val) for val in entry[:-1]] + [
335                '"%s"' % utils.escapeQuotes(entry[-1])
336            ]
337
338            text += "\n".join([str(val) for val in entry]) + "\n"
339
340        outputTxt += text
341
342    return outputTxt
343
344
345def _tgToLongTextForm(tg: Dict) -> str:
346    outputTxt = ""
347    outputTxt += 'File type = "ooTextFile"\n'
348    outputTxt += 'Object class = "TextGrid"\n\n'
349
350    tab = " " * 4
351
352    # Header
353    outputTxt += "xmin = %s \n" % my_math.numToStr(tg["xmin"])
354    outputTxt += "xmax = %s \n" % my_math.numToStr(tg["xmax"])
355    outputTxt += "tiers? <exists> \n"
356    outputTxt += "size = %d \n" % len(tg["tiers"])
357    outputTxt += "item []: \n"
358
359    for tierNum, tier in enumerate(tg["tiers"]):
360        # Interval header
361        outputTxt += tab + "item [%d]:\n" % (tierNum + 1)
362        outputTxt += tab * 2 + 'class = "%s" \n' % tier["class"]
363        outputTxt += tab * 2 + 'name = "%s" \n' % utils.escapeQuotes(tier["name"])
364        outputTxt += tab * 2 + "xmin = %s \n" % my_math.numToStr(tier["xmin"])
365        outputTxt += tab * 2 + "xmax = %s \n" % my_math.numToStr(tier["xmax"])
366
367        entries = tier["entries"]
368        if tier["class"] == INTERVAL_TIER:
369            outputTxt += tab * 2 + "intervals: size = %d \n" % len(entries)
370            for intervalNum, entry in enumerate(entries):
371                start, end, label = entry
372                outputTxt += tab * 2 + "intervals [%d]:\n" % (intervalNum + 1)
373                outputTxt += tab * 3 + "xmin = %s \n" % my_math.numToStr(start)
374                outputTxt += tab * 3 + "xmax = %s \n" % my_math.numToStr(end)
375                outputTxt += tab * 3 + 'text = "%s" \n' % utils.escapeQuotes(label)
376        else:
377            outputTxt += tab * 2 + "points: size = %d \n" % len(entries)
378            for pointNum, entry in enumerate(entries):
379                timestamp, label = entry
380                outputTxt += tab * 2 + "points [%d]:\n" % (pointNum + 1)
381                outputTxt += tab * 3 + "number = %s \n" % my_math.numToStr(timestamp)
382                outputTxt += tab * 3 + 'mark = "%s" \n' % utils.escapeQuotes(label)
383
384    return outputTxt
385
386
387def _tgToJson(tgAsDict: Dict) -> str:
388    """Returns a json representation of a textgrid"""
389    return json.dumps(tgAsDict, ensure_ascii=False)
390
391
392def _parseNormalTextgrid(data: str) -> Dict:
393    """
394    Reads a normal textgrid
395    """
396    data = data.replace("\r\n", "\n")
397
398    # Toss textgrid header
399    header, data = re.split(r"item ?\[", data, maxsplit=1, flags=re.MULTILINE)
400
401    headerList = header.split("\n")
402    tgMin = float(headerList[3].split("=")[1].strip())
403    tgMax = float(headerList[4].split("=")[1].strip())
404
405    # Process each tier individually (will be output to separate folders)
406    tiers = []
407    tierList = re.split(r"item ?\[", data, flags=re.MULTILINE)[1:]
408    for tierTxt in tierList:
409        if 'class = "IntervalTier"' in tierTxt:
410            tierType = INTERVAL_TIER
411            searchWord = r"intervals ?\["
412        else:
413            tierType = POINT_TIER
414            searchWord = r"points ?\["
415
416        # Get tier meta-information
417        try:
418            d = re.split(searchWord, tierTxt, flags=re.MULTILINE)
419            header, tierData = d[0], d[1:]
420        except ValueError:
421            # A tier with no entries
422            if re.search(r"size ?= ?0", tierTxt):
423                header = tierTxt
424                tierData = []
425            else:
426                raise
427        tierName = reSearch(
428            r"name ?= ?\"(.*)\"\s*$", header, flags=re.MULTILINE
429        ).groups()[0]
430        tierName = re.sub(r'""', '"', tierName)
431
432        # "-0" has been reported as a potential start time
433        tierStartTimeStr = reSearch(
434            r"xmin ?= ?-?([\d.]+)\s*$", header, flags=re.MULTILINE
435        ).groups()[0]
436        tierStartTime = utils.strToIntOrFloat(tierStartTimeStr)
437
438        tierEndTimeStr = reSearch(
439            r"xmax ?= ?([\d.]+)\s*$", header, flags=re.MULTILINE
440        ).groups()[0]
441        tierEndTime = utils.strToIntOrFloat(tierEndTimeStr)
442
443        # Get the tier entry list
444        entries: List[Any] = []
445        if tierType == INTERVAL_TIER:
446            for element in tierData:
447                timeStart = reSearch(
448                    r"xmin ?= ?-?([\d.]+)\s*$", element, flags=re.MULTILINE
449                ).groups()[0]
450                timeEnd = reSearch(
451                    r"xmax ?= ?([\d.]+)\s*$", element, flags=re.MULTILINE
452                ).groups()[0]
453                label = reSearch(
454                    r"text ?= ?\"(.*)\"\s*$",
455                    element,
456                    flags=re.MULTILINE | re.DOTALL,
457                ).groups()[0]
458
459                label = label.strip()
460                label = re.sub(r'""', '"', label)
461                entries.append(Interval(timeStart, timeEnd, label))
462        else:
463            for element in tierData:
464                time = reSearch(
465                    r"number ?= ?-?([\d.]+)\s*$", element, flags=re.MULTILINE
466                ).groups()[0]
467                label = reSearch(
468                    r"mark ?= ?\"(.*)\"\s*$",
469                    element,
470                    flags=re.MULTILINE | re.DOTALL,
471                ).groups()[0]
472                label = label.strip()
473                entries.append(Point(time, label))
474
475        tierAsDict = {
476            "class": tierType,
477            "name": tierName,
478            "xmin": float(tierStartTime),
479            "xmax": float(tierEndTime),
480            "entries": entries,
481        }
482        tiers.append(tierAsDict)
483
484    tgDict = {"xmin": tgMin, "xmax": tgMax, "tiers": tiers}
485
486    return tgDict
487
488
489def _parseShortTextgrid(data: str) -> Dict:
490    """Reads a short textgrid file"""
491    data = data.replace("\r\n", "\n")
492
493    intervalIndicies = [(i, True) for i in utils.findAll(data, '"IntervalTier"')]
494    pointIndicies = [(i, False) for i in utils.findAll(data, '"TextTier"')]
495
496    indexList = [*intervalIndicies, *pointIndicies]
497    indexList.append((len(data), True))  # The 'end' of the file
498    indexList.sort()
499
500    tupleList = [
501        (indexList[i][0], indexList[i + 1][0], indexList[i][1])
502        for i in range(len(indexList) - 1)
503    ]
504
505    # Set the textgrid's min and max times
506    header = data[: tupleList[0][0]]
507    headerList = header.split("\n")
508    tgMin = float(headerList[3].strip())
509    tgMax = float(headerList[4].strip())
510
511    # Load the data for each tier
512    tiers = []
513    for blockStartI, blockEndI, isInterval in tupleList:
514        tierData = data[blockStartI:blockEndI]
515
516        # First row contains the tier type, which we already know
517        metaStartI = _fetchRow(tierData, 0)[1]
518
519        # Tier meta-information
520        tierName, tierNameEndI = _fetchTextRow(tierData, metaStartI)
521        tierStartTimeStr, tierStartTimeI = _fetchRow(tierData, tierNameEndI)
522        tierEndTimeStr, tierEndTimeI = _fetchRow(tierData, tierStartTimeI)
523        startTimeI = _fetchRow(tierData, tierEndTimeI)[1]
524
525        tierStartTime = utils.strToIntOrFloat(tierStartTimeStr)
526        tierEndTime = utils.strToIntOrFloat(tierEndTimeStr)
527
528        # Tier entry data
529        entries: List[Any] = []
530        if isInterval:
531            className = INTERVAL_TIER
532            while True:
533                try:
534                    startTime, endTimeI = _fetchRow(tierData, startTimeI)
535                    endTime, labelI = _fetchRow(tierData, endTimeI)
536                    label, startTimeI = _fetchTextRow(tierData, labelI)
537                except (ValueError, IndexError):
538                    break
539
540                label = label.strip()
541                entries.append(Interval(startTime, endTime, label))
542        else:
543            className = POINT_TIER
544            while True:
545                try:
546                    time, labelI = _fetchRow(tierData, startTimeI)
547                    label, startTimeI = _fetchTextRow(tierData, labelI)
548                except (ValueError, IndexError):
549                    break
550                label = label.strip()
551                entries.append(Point(time, label))
552
553        tierAsDict = {
554            "class": className,
555            "name": tierName,
556            "xmin": float(tierStartTime),
557            "xmax": float(tierEndTime),
558            "entries": entries,
559        }
560        tiers.append(tierAsDict)
561
562    tgDict = {"xmin": tgMin, "xmax": tgMax, "tiers": tiers}
563
564    return tgDict
565
566
567def _fetchRow(
568    dataStr: str, index: int, searchStr: Optional[str] = None
569) -> Tuple[str, int]:
570    if searchStr is None:
571        startIndex = index
572    else:
573        startIndex = dataStr.index(searchStr, index) + len(searchStr)
574
575    endIndex = dataStr.index("\n", startIndex)
576
577    word = dataStr[startIndex:endIndex]
578    word = word.strip()
579    if word[0] == '"' and word[-1] == '"':
580        word = word[1:-1]
581    word = word.strip()
582
583    return word, endIndex + 1
584
585
586def _fetchTextRow(
587    dataStr: str, index: int, searchStr: Optional[str] = None
588) -> Tuple[str, int]:
589    if searchStr is None:
590        startIndex = index
591    else:
592        startIndex = dataStr.index(searchStr, index) + len(searchStr)
593
594    # A textgrid text is ended by double quotes. Double quotes that
595    # appear in the text are escaped by a preceeding double quotes.
596    # We know we're at the end of a text if the number of double
597    # quotes is odd.
598    endIndex = startIndex + 1
599    while True:
600        quoteStartIndex = dataStr.index('"', endIndex)
601        quoteEndIndex = quoteStartIndex
602        while dataStr[quoteEndIndex] == '"':
603            quoteEndIndex += 1
604
605        endIndex = quoteEndIndex
606
607        if (quoteEndIndex - quoteStartIndex) % 2 != 0:
608            break
609
610    word = dataStr[startIndex:endIndex]
611    word = word[1:-1]  # Remove the quote marks around the text
612    word = word.strip()
613
614    word = word.replace('""', '"')  # Unescape quote marks
615
616    # Advance to the end of the line
617    endIndex = dataStr.index("\n", endIndex)
618
619    return word, endIndex + 1
def reSearch(pattern, string, flags=None) -> Match[str]:
21def reSearch(pattern, string, flags=None) -> Match[str]:
22    """Search for the string to match. Throws an error if no match is found."""
23    if flags:
24        matches = re.search(pattern, string, flags)
25    else:
26        matches = re.search(pattern, string)
27
28    if not matches:
29        raise errors.ParsingError("Expected field in Textgrid missing.")
30
31    return matches

Search for the string to match. Throws an error if no match is found.

def parseTextgridStr(data: str, includeEmptyIntervals: bool = False) -> Dict:
143def parseTextgridStr(data: str, includeEmptyIntervals: bool = False) -> Dict:
144    """Converts a string representation of a Textgrid into a dictionary
145
146    https://www.fon.hum.uva.nl/praat/manual/TextGrid_file_formats.html
147
148    Args:
149        fnFullPath: the path to the textgrid to open
150        includeEmptyIntervals: if False, points and intervals with
151             an empty label '' are not included in the returned dictionary
152
153    Returns:
154        Dictionary
155    """
156
157    try:
158        tgAsDict = json.loads(data)
159        if "start" in tgAsDict.keys():  # Using simplified json format
160            tgAsDict = _upconvertDictionaryFromJson(tgAsDict)
161    except ValueError:
162        caseA = "ooTextFile short" in data
163        caseB = "item [" not in data
164        if caseA or caseB:
165            tgAsDict = _parseShortTextgrid(data)
166        else:
167            tgAsDict = _parseNormalTextgrid(data)
168
169    if includeEmptyIntervals is False:
170        for tier in tgAsDict["tiers"]:
171            _removeBlanks(tier)
172
173    return tgAsDict

Converts a string representation of a Textgrid into a dictionary

https://www.fon.hum.uva.nl/praat/manual/TextGrid_file_formats.html

Arguments:
  • fnFullPath: the path to the textgrid to open
  • includeEmptyIntervals: if False, points and intervals with an empty label '' are not included in the returned dictionary
Returns:

Dictionary

def getTextgridAsStr( tg: Dict, format: Literal['short_textgrid', 'long_textgrid', 'json', 'textgrid_json'], includeBlankSpaces: bool, minTimestamp: Optional[float] = None, maxTimestamp: Optional[float] = None, minimumIntervalLength: float = 1e-08) -> str:
176def getTextgridAsStr(
177    tg: Dict,
178    format: Literal["short_textgrid", "long_textgrid", "json", "textgrid_json"],
179    includeBlankSpaces: bool,
180    minTimestamp: Optional[float] = None,
181    maxTimestamp: Optional[float] = None,
182    minimumIntervalLength: float = MIN_INTERVAL_LENGTH,
183) -> str:
184    """Converts a textgrid to a string, suitable for saving
185
186    Args:
187        tg: the textgrid to convert to a string
188        format: one of ['short_textgrid', 'long_textgrid', 'json', 'textgrid_json']
189        includeBlankSpaces: if True, blank sections in interval
190            tiers will be filled in with an empty interval
191            (with a label of "")
192        minTimestamp: the minTimestamp of the saved Textgrid;
193            if None, use whatever is defined in the Textgrid object.
194            If minTimestamp is larger than timestamps in your textgrid,
195            an exception will be thrown.
196        maxTimestamp: the maxTimestamp of the saved Textgrid;
197            if None, use whatever is defined in the Textgrid object.
198            If maxTimestamp is smaller than timestamps in your textgrid,
199            an exception will be thrown.
200        minimumIntervalLength: any labeled intervals smaller
201            than this will be removed, useful for removing ultrashort
202            or fragmented intervals; if None, don't remove any.
203            Removed intervals are merged (without their label) into
204            adjacent entries.
205
206    Returns:
207        a string representation of the textgrid
208    """
209
210    utils.validateOption("format", format, TextgridFormats)
211
212    tg = _prepTgForSaving(
213        tg, includeBlankSpaces, minTimestamp, maxTimestamp, minimumIntervalLength
214    )
215
216    if format == TextgridFormats.LONG_TEXTGRID:
217        outputTxt = _tgToLongTextForm(tg)
218    elif format == TextgridFormats.SHORT_TEXTGRID:
219        outputTxt = _tgToShortTextForm(tg)
220    elif format == TextgridFormats.JSON:
221        outputTxt = _tgToJson(_downconvertDictionaryForJson(tg))
222    elif format == TextgridFormats.TEXTGRID_JSON:
223        outputTxt = _tgToJson(tg)
224
225    return outputTxt

Converts a textgrid to a string, suitable for saving

Arguments:
  • tg: the textgrid to convert to a string
  • format: one of ['short_textgrid', 'long_textgrid', 'json', 'textgrid_json']
  • includeBlankSpaces: if True, blank sections in interval tiers will be filled in with an empty interval (with a label of "")
  • minTimestamp: the minTimestamp of the saved Textgrid; if None, use whatever is defined in the Textgrid object. If minTimestamp is larger than timestamps in your textgrid, an exception will be thrown.
  • maxTimestamp: the maxTimestamp of the saved Textgrid; if None, use whatever is defined in the Textgrid object. If maxTimestamp is smaller than timestamps in your textgrid, an exception will be thrown.
  • minimumIntervalLength: any labeled intervals smaller than this will be removed, useful for removing ultrashort or fragmented intervals; if None, don't remove any. Removed intervals are merged (without their label) into adjacent entries.
Returns:

a string representation of the textgrid