praatio.utilities.utils

Various generic utility functions

  1"""
  2Various generic utility functions
  3"""
  4
  5import os
  6import subprocess
  7import itertools
  8import wave
  9from importlib import resources
 10from typing_extensions import Literal
 11from typing import Any, Iterator, List, Tuple, NoReturn, Type, Optional
 12
 13from praatio.utilities import errors
 14from praatio.utilities import constants
 15
 16Interval = constants.Interval
 17
 18# New in python 3.9
 19if hasattr(resources, "files"):
 20    scriptsPath = resources.files("praatio") / "praatScripts"
 21# Deprecated in python 3.11
 22else:
 23    with resources.path("praatio", "praatScripts") as path:
 24        scriptsPath = path
 25
 26
 27def find(list, value, reverse) -> Optional[int]:
 28    """Returns the first/last index of an item in a list"""
 29    if value not in list:
 30        return None
 31
 32    if reverse:
 33        index = len(list) - list[::-1].index(value) - 1
 34    else:
 35        index = list.index(value)
 36
 37    return index
 38
 39
 40def reportNoop(_exception: Type[BaseException], _text: str) -> None:
 41    pass
 42
 43
 44def reportException(exception: Type[BaseException], text: str) -> NoReturn:
 45    raise exception(text)
 46
 47
 48def reportWarning(_exception: Type[BaseException], text: str) -> None:
 49    print(text)
 50
 51
 52def getErrorReporter(reportingMode: Literal["silence", "warning", "error"]):
 53    modeToFunc = {
 54        constants.ErrorReportingMode.SILENCE: reportNoop,
 55        constants.ErrorReportingMode.WARNING: reportWarning,
 56        constants.ErrorReportingMode.ERROR: reportException,
 57    }
 58
 59    return modeToFunc[reportingMode]
 60
 61
 62def checkIsUndershoot(time: float, referenceTime: float, errorReporter) -> bool:
 63    if time < referenceTime:
 64        errorReporter(
 65            errors.OutOfBounds,
 66            f"'{time}' occurs before minimum allowed time '{referenceTime}'",
 67        )
 68        return True
 69    else:
 70        return False
 71
 72
 73def checkIsOvershoot(time: float, referenceTime: float, errorReporter) -> bool:
 74    if time > referenceTime:
 75        errorReporter(
 76            errors.OutOfBounds,
 77            f"'{time}' occurs after maximum allowed time '{referenceTime}'",
 78        )
 79        return True
 80    else:
 81        return False
 82
 83
 84def validateOption(variableName, value, optionClass):
 85    if value not in optionClass.validOptions:
 86        raise errors.WrongOption(variableName, value, optionClass.validOptions)
 87
 88
 89def intervalOverlapCheck(
 90    interval: Interval,
 91    cmprInterval: Interval,
 92    percentThreshold: float = 0,
 93    timeThreshold: float = 0,
 94    boundaryInclusive: bool = False,
 95) -> bool:
 96    """Checks whether two intervals overlap
 97
 98    Args:
 99        interval:
100        cmprInterval:
101        percentThreshold: if percentThreshold is greater than 0, then
102            if the intervals overlap, they must overlap by at least this threshold
103            (0.2 would mean 20% overlap considering both intervals)
104            (eg [0, 6] and [3,8] have an overlap of 50%. If percentThreshold is set
105             to higher than 50%, the intervals will be considered to not overlap.)
106        timeThreshold: if greater than 0, then if the intervals overlap,
107            they must overlap by at least this threshold
108        boundaryInclusive: if true, then two intervals are considered to
109            overlap if they share a boundary
110
111    Returns:
112        bool:
113    """
114    # TODO: move to Interval class?
115    startTime, endTime = interval[:2]
116    cmprStartTime, cmprEndTime = cmprInterval[:2]
117
118    overlapTime = max(0, min(endTime, cmprEndTime) - max(startTime, cmprStartTime))
119    overlapFlag = overlapTime > 0
120
121    # Do they share a boundary?  Only need to check if one boundary ends
122    # when another begins (because otherwise, they overlap in other ways)
123    boundaryOverlapFlag = False
124    if boundaryInclusive:
125        boundaryOverlapFlag = startTime == cmprEndTime or endTime == cmprStartTime
126
127    # Is the overlap over a certain percent?
128    percentOverlapFlag = False
129    if percentThreshold > 0 and overlapFlag:
130        totalTime = max(endTime, cmprEndTime) - min(startTime, cmprStartTime)
131        percentOverlap = overlapTime / float(totalTime)
132
133        percentOverlapFlag = percentOverlap >= percentThreshold
134        overlapFlag = percentOverlapFlag
135
136    # Is the overlap more than a certain threshold?
137    timeOverlapFlag = False
138    if timeThreshold > 0 and overlapFlag:
139        timeOverlapFlag = overlapTime >= timeThreshold
140        overlapFlag = timeOverlapFlag
141
142    overlapFlag = (
143        overlapFlag or boundaryOverlapFlag or percentOverlapFlag or timeOverlapFlag
144    )
145
146    return overlapFlag
147
148
149def getIntervalsInInterval(
150    start: float,
151    end: float,
152    intervals: List[Interval],
153    mode: Literal["strict", "lax", "truncated"],
154) -> List[Interval]:
155    """Gets all intervals that exist between /start/ and /end/
156
157    Args:
158        start: the target interval start time
159        end: the target interval stop time
160        intervals: the list of intervals to check
161        mode: Determines judgement criteria
162            - 'strict', only intervals wholly contained by the target
163                interval will be kept
164            - 'lax', partially contained intervals will be kept
165            - 'truncated', partially contained intervals will be
166                truncated to fit within the crop region.
167
168    Returns:
169        The list of intervals that overlap with the target interval
170    """
171    # TODO: move to Interval class?
172    validateOption("mode", mode, constants.CropCollision)
173
174    containedIntervals = []
175    for interval in intervals:
176        matchedEntry = None
177
178        # Don't need to investigate if the current interval is
179        # before start or after end
180        if interval.end <= start or interval.start >= end:
181            continue
182
183        # Determine if the current interval is wholly contained
184        # within the superEntry
185        if interval.start >= start and interval.end <= end:
186            matchedEntry = interval
187
188        # If the current interval is only partially contained within the
189        # target interval AND inclusion is 'lax', include it anyways
190        elif mode == constants.CropCollision.LAX and (
191            interval.start >= start or interval.end <= end
192        ):
193            matchedEntry = interval
194
195        # The current interval stradles the end of the target interval
196        elif interval.start >= start and interval.end > end:
197            if mode == constants.CropCollision.TRUNCATED:
198                matchedEntry = Interval(interval.start, end, interval.label)
199
200        # The current interval stradles the start of the target interval
201        elif interval.start < start and interval.end <= end:
202            if mode == constants.CropCollision.TRUNCATED:
203                matchedEntry = Interval(start, interval.end, interval.label)
204
205        # The current interval contains the target interval completely
206        elif interval.start <= start and interval.end >= end:
207            if mode == constants.CropCollision.LAX:
208                matchedEntry = interval
209            elif mode == constants.CropCollision.TRUNCATED:
210                matchedEntry = Interval(start, end, interval.label)
211
212        if matchedEntry is not None:
213            containedIntervals.append(matchedEntry)
214
215    return containedIntervals
216
217
218def escapeQuotes(text: str) -> str:
219    return text.replace('"', '""')
220
221
222def strToIntOrFloat(inputStr: str) -> float:
223    return float(inputStr) if "." in inputStr else int(inputStr)
224
225
226def getValueAtTime(
227    timestamp: float,
228    sortedDataTupleList: List[Tuple[Any, ...]],
229    fuzzyMatching: bool = False,
230    startI: int = 0,
231) -> Tuple[Tuple[Any, ...], int]:
232    """Get the value in the data list (sorted by time) that occurs at this point
233
234    If fuzzyMatching is True, if there is not a value
235    at the requested timestamp, the nearest feature value will be taken.
236
237    The procedure assumes that all data is ordered in time.
238    dataTupleList should be in the form
239    [(t1, v1a, v1b, ..), (t2, v2a, v2b, ..), ..]
240
241    The procedure makes one pass through dataTupleList and one
242    pass through self.entries.  If the data is not sequentially
243    ordered, the incorrect response will be returned.
244
245    For efficiency purposes, it takes a starting index and returns the ending
246    index.
247    """
248    # TODO: move to Point class?
249    i = startI
250    bestRow: Tuple[Any, ...] = ()
251
252    # Only find exact timestamp matches
253    if fuzzyMatching is False:
254        while True:
255            try:
256                currRow = sortedDataTupleList[i]
257            except IndexError:
258                break
259
260            currTime = currRow[0]
261            if currTime >= timestamp:
262                if timestamp == currTime:
263                    bestRow = currRow
264                break
265            i += 1
266
267    # Find the closest timestamp
268    else:
269        bestTime = sortedDataTupleList[i][0]
270        bestRow = sortedDataTupleList[i]
271        while True:
272            try:
273                dataTuple = sortedDataTupleList[i]
274            except IndexError:
275                i -= 1
276                break  # Last known value is the closest one
277
278            currTime = dataTuple[0]
279            currRow = dataTuple
280
281            currDiff = abs(currTime - timestamp)
282            bestDiff = abs(bestTime - timestamp)
283            if currDiff < bestDiff:  # We're closer to the target val
284                bestTime = currTime
285                bestRow = currRow
286                if currDiff == 0:
287                    break  # Can't do better than a perfect match
288            elif currDiff > bestDiff:
289                i -= 1
290                break  # We've past the best value.
291            i += 1
292
293    retRow = bestRow
294
295    return retRow, i
296
297
298def getValuesInInterval(dataTupleList: List, start: float, end: float) -> List:
299    """Gets the values that exist within an interval
300
301    The function assumes that the data is formated as
302    [(t1, v1a, v1b, ...), (t2, v2a, v2b, ...)]
303    """
304    # TODO: move to Interval class?
305    intervalDataList = []
306    for dataTuple in dataTupleList:
307        time = dataTuple[0]
308        if start <= time and end >= time:
309            intervalDataList.append(dataTuple)
310
311    return intervalDataList
312
313
314def sign(x: float) -> int:
315    """Returns 1 if x is positive, 0 if x is 0, and -1 otherwise"""
316    retVal = 0
317    if x > 0:
318        retVal = 1
319    elif x < 0:
320        retVal = -1
321    return retVal
322
323
324def invertIntervalList(
325    inputList: List[Tuple[float, float]], minValue: float = None, maxValue: float = None
326) -> List[Tuple[float, float]]:
327    """Inverts the segments of a list of intervals
328
329    e.g.
330    [(0,1), (4,5), (7,10)] -> [(1,4), (5,7)]
331    [(0.5, 1.2), (3.4, 5.0)] -> [(0.0, 0.5), (1.2, 3.4)]
332    """
333    if any([interval[0] >= interval[1] for interval in inputList]):
334        raise errors.ArgumentError("Interval start occured before interval end")
335
336    inputList = sorted(inputList)
337
338    # Special case -- empty lists
339    invList: List[Tuple[float, float]]
340    if len(inputList) == 0 and minValue is not None and maxValue is not None:
341        invList = [
342            (minValue, maxValue),
343        ]
344    else:
345        # Insert in a garbage head and tail value for the purpose
346        # of inverting, in the range does not start and end at the
347        # smallest and largest values
348        if minValue is not None and inputList[0][0] > minValue:
349            inputList.insert(0, (-1, minValue))
350        if maxValue is not None and inputList[-1][1] < maxValue:
351            inputList.append((maxValue, maxValue + 1))
352
353        invList = [
354            (inputList[i][1], inputList[i + 1][0]) for i in range(0, len(inputList) - 1)
355        ]
356
357        # If two intervals in the input share a boundary, we'll get invalid intervals in the output
358        # eg invertIntervalList([(0, 1), (1, 2)]) -> [(1, 1)]
359        invList = [interval for interval in invList if interval[0] != interval[1]]
360
361    return invList
362
363
364def makeDir(path: str) -> None:
365    """Create a new directory
366
367    Unlike os.mkdir, it does not throw an exception if the directory already exists
368    """
369    if not os.path.exists(path):
370        os.mkdir(path)
371
372
373def findAll(txt: str, subStr: str) -> List[int]:
374    """Find the starting indicies of all instances of subStr in txt"""
375    indexList = []
376    index = 0
377    while True:
378        try:
379            index = txt.index(subStr, index)
380        except ValueError:
381            break
382        indexList.append(int(index))
383        index += 1
384
385    return indexList
386
387
388def runPraatScript(
389    praatEXE: str, scriptFN: str, argList: List[Any], cwd: str = None
390) -> None:
391    # Popen gives a not-very-transparent error
392    if not os.path.exists(praatEXE):
393        raise errors.FileNotFound(praatEXE)
394    if not os.path.exists(scriptFN):
395        raise errors.FileNotFound(scriptFN)
396
397    argList = ["%s" % arg for arg in argList]
398    cmdList = [praatEXE, "--run", scriptFN] + argList
399
400    myProcess = subprocess.Popen(cmdList, cwd=cwd)
401
402    if myProcess.wait():
403        raise errors.PraatExecutionFailed(cmdList)
404
405
406def safeZip(listOfLists: List[list], enforceLength: bool) -> Iterator[Any]:
407    """A safe version of python's zip()
408
409    If two sublists are of different sizes, python's zip will truncate
410    the output to be the smaller of the two.
411
412    safeZip throws an exception if the size of the any sublist is different
413    from the rest.
414    """
415    if enforceLength is True:
416        length = len(listOfLists[0])
417        if not all([length == len(subList) for subList in listOfLists]):
418            raise errors.SafeZipException("Lists to zip have different sizes.")
419
420    return itertools.zip_longest(*listOfLists)
421
422
423def getWavDuration(wavFN: str) -> float:
424    "For internal use.  See praatio.audio.QueryWav() for general use."
425    audiofile = wave.open(wavFN, "r")
426    params = audiofile.getparams()
427    framerate = params[2]
428    nframes = params[3]
429    duration = float(nframes) / framerate
430
431    return duration
432
433
434def chooseClosestTime(
435    targetTime: float, candidateA: Optional[float], candidateB: Optional[float]
436) -> float:
437    """Chooses the closest time between two options that straddle the target time
438
439    Args:
440        targetTime: the time to compare against
441        candidateA: the first candidate
442        candidateB: the second candidate
443
444    Returns:
445        the closer of the two options to the target time
446    Raises:
447        ArgumentError: When no left or right candidate is provided
448    """
449    closestTime: float
450    if candidateA is None and candidateB is None:
451        raise (errors.ArgumentError("Must provide at"))
452
453    elif candidateA is None and candidateB is not None:
454        closestTime = candidateB
455    elif candidateB is None and candidateA is not None:
456        closestTime = candidateA
457    elif candidateB is not None and candidateA is not None:
458        aDiff = abs(candidateA - targetTime)
459        bDiff = abs(candidateB - targetTime)
460
461        if aDiff <= bDiff:
462            closestTime = candidateA
463        else:
464            closestTime = candidateB
465
466    return closestTime
467
468
469def getInterval(
470    startTime: float, duration: float, max: float, reverse: bool
471) -> Tuple[float, float]:
472    """returns an interval before or after some start time
473
474    The returned timestamps will be between 0 and max
475
476    Args:
477        startTime: the time to get the interval from
478        duration: duration of the interval
479        max: the maximum allowed time
480        reverse: is the interval before or after the targetTime?
481
482    Returns:
483        the start and end time of an interval
484    """
485    if reverse is True:
486        endTime = startTime
487        startTime -= duration
488    else:
489        endTime = startTime + duration
490
491    # Don't read over the edges
492    if startTime < 0:
493        startTime = 0
494    elif endTime > max:
495        endTime = max
496
497    return (startTime, endTime)
class Interval(praatio.utilities.utils.Interval):
15class Interval(namedtuple("Interval", ["start", "end", "label"])):
16    def __eq__(self, other):
17        if not isinstance(other, Interval):
18            return False
19
20        return (
21            math.isclose(self.start, other.start)
22            and math.isclose(self.end, other.end)
23            and self.label == other.label
24        )
25
26    def __ne__(self, other):
27        return not self == other

Interval(start, end, label)

Interval(start, end, label)

Create new instance of Interval(start, end, label)

start

Alias for field number 0

end

Alias for field number 1

label

Alias for field number 2

Inherited Members
builtins.tuple
index
count
def find(list, value, reverse) -> Optional[int]:
28def find(list, value, reverse) -> Optional[int]:
29    """Returns the first/last index of an item in a list"""
30    if value not in list:
31        return None
32
33    if reverse:
34        index = len(list) - list[::-1].index(value) - 1
35    else:
36        index = list.index(value)
37
38    return index

Returns the first/last index of an item in a list

def reportNoop(_exception: Type[BaseException], _text: str) -> None:
41def reportNoop(_exception: Type[BaseException], _text: str) -> None:
42    pass
def reportException(exception: Type[BaseException], text: str) -> NoReturn:
45def reportException(exception: Type[BaseException], text: str) -> NoReturn:
46    raise exception(text)
def reportWarning(_exception: Type[BaseException], text: str) -> None:
49def reportWarning(_exception: Type[BaseException], text: str) -> None:
50    print(text)
def getErrorReporter(reportingMode: Literal['silence', 'warning', 'error']):
53def getErrorReporter(reportingMode: Literal["silence", "warning", "error"]):
54    modeToFunc = {
55        constants.ErrorReportingMode.SILENCE: reportNoop,
56        constants.ErrorReportingMode.WARNING: reportWarning,
57        constants.ErrorReportingMode.ERROR: reportException,
58    }
59
60    return modeToFunc[reportingMode]
def checkIsUndershoot(time: float, referenceTime: float, errorReporter) -> bool:
63def checkIsUndershoot(time: float, referenceTime: float, errorReporter) -> bool:
64    if time < referenceTime:
65        errorReporter(
66            errors.OutOfBounds,
67            f"'{time}' occurs before minimum allowed time '{referenceTime}'",
68        )
69        return True
70    else:
71        return False
def checkIsOvershoot(time: float, referenceTime: float, errorReporter) -> bool:
74def checkIsOvershoot(time: float, referenceTime: float, errorReporter) -> bool:
75    if time > referenceTime:
76        errorReporter(
77            errors.OutOfBounds,
78            f"'{time}' occurs after maximum allowed time '{referenceTime}'",
79        )
80        return True
81    else:
82        return False
def validateOption(variableName, value, optionClass):
85def validateOption(variableName, value, optionClass):
86    if value not in optionClass.validOptions:
87        raise errors.WrongOption(variableName, value, optionClass.validOptions)
def intervalOverlapCheck( interval: Interval, cmprInterval: Interval, percentThreshold: float = 0, timeThreshold: float = 0, boundaryInclusive: bool = False) -> bool:
 90def intervalOverlapCheck(
 91    interval: Interval,
 92    cmprInterval: Interval,
 93    percentThreshold: float = 0,
 94    timeThreshold: float = 0,
 95    boundaryInclusive: bool = False,
 96) -> bool:
 97    """Checks whether two intervals overlap
 98
 99    Args:
100        interval:
101        cmprInterval:
102        percentThreshold: if percentThreshold is greater than 0, then
103            if the intervals overlap, they must overlap by at least this threshold
104            (0.2 would mean 20% overlap considering both intervals)
105            (eg [0, 6] and [3,8] have an overlap of 50%. If percentThreshold is set
106             to higher than 50%, the intervals will be considered to not overlap.)
107        timeThreshold: if greater than 0, then if the intervals overlap,
108            they must overlap by at least this threshold
109        boundaryInclusive: if true, then two intervals are considered to
110            overlap if they share a boundary
111
112    Returns:
113        bool:
114    """
115    # TODO: move to Interval class?
116    startTime, endTime = interval[:2]
117    cmprStartTime, cmprEndTime = cmprInterval[:2]
118
119    overlapTime = max(0, min(endTime, cmprEndTime) - max(startTime, cmprStartTime))
120    overlapFlag = overlapTime > 0
121
122    # Do they share a boundary?  Only need to check if one boundary ends
123    # when another begins (because otherwise, they overlap in other ways)
124    boundaryOverlapFlag = False
125    if boundaryInclusive:
126        boundaryOverlapFlag = startTime == cmprEndTime or endTime == cmprStartTime
127
128    # Is the overlap over a certain percent?
129    percentOverlapFlag = False
130    if percentThreshold > 0 and overlapFlag:
131        totalTime = max(endTime, cmprEndTime) - min(startTime, cmprStartTime)
132        percentOverlap = overlapTime / float(totalTime)
133
134        percentOverlapFlag = percentOverlap >= percentThreshold
135        overlapFlag = percentOverlapFlag
136
137    # Is the overlap more than a certain threshold?
138    timeOverlapFlag = False
139    if timeThreshold > 0 and overlapFlag:
140        timeOverlapFlag = overlapTime >= timeThreshold
141        overlapFlag = timeOverlapFlag
142
143    overlapFlag = (
144        overlapFlag or boundaryOverlapFlag or percentOverlapFlag or timeOverlapFlag
145    )
146
147    return overlapFlag

Checks whether two intervals overlap

Arguments:
  • interval:
  • cmprInterval:
  • percentThreshold: if percentThreshold is greater than 0, then if the intervals overlap, they must overlap by at least this threshold (0.2 would mean 20% overlap considering both intervals) (eg [0, 6] and [3,8] have an overlap of 50%. If percentThreshold is set to higher than 50%, the intervals will be considered to not overlap.)
  • timeThreshold: if greater than 0, then if the intervals overlap, they must overlap by at least this threshold
  • boundaryInclusive: if true, then two intervals are considered to overlap if they share a boundary
Returns:

bool:

def getIntervalsInInterval( start: float, end: float, intervals: List[Interval], mode: Literal['strict', 'lax', 'truncated']) -> List[Interval]:
150def getIntervalsInInterval(
151    start: float,
152    end: float,
153    intervals: List[Interval],
154    mode: Literal["strict", "lax", "truncated"],
155) -> List[Interval]:
156    """Gets all intervals that exist between /start/ and /end/
157
158    Args:
159        start: the target interval start time
160        end: the target interval stop time
161        intervals: the list of intervals to check
162        mode: Determines judgement criteria
163            - 'strict', only intervals wholly contained by the target
164                interval will be kept
165            - 'lax', partially contained intervals will be kept
166            - 'truncated', partially contained intervals will be
167                truncated to fit within the crop region.
168
169    Returns:
170        The list of intervals that overlap with the target interval
171    """
172    # TODO: move to Interval class?
173    validateOption("mode", mode, constants.CropCollision)
174
175    containedIntervals = []
176    for interval in intervals:
177        matchedEntry = None
178
179        # Don't need to investigate if the current interval is
180        # before start or after end
181        if interval.end <= start or interval.start >= end:
182            continue
183
184        # Determine if the current interval is wholly contained
185        # within the superEntry
186        if interval.start >= start and interval.end <= end:
187            matchedEntry = interval
188
189        # If the current interval is only partially contained within the
190        # target interval AND inclusion is 'lax', include it anyways
191        elif mode == constants.CropCollision.LAX and (
192            interval.start >= start or interval.end <= end
193        ):
194            matchedEntry = interval
195
196        # The current interval stradles the end of the target interval
197        elif interval.start >= start and interval.end > end:
198            if mode == constants.CropCollision.TRUNCATED:
199                matchedEntry = Interval(interval.start, end, interval.label)
200
201        # The current interval stradles the start of the target interval
202        elif interval.start < start and interval.end <= end:
203            if mode == constants.CropCollision.TRUNCATED:
204                matchedEntry = Interval(start, interval.end, interval.label)
205
206        # The current interval contains the target interval completely
207        elif interval.start <= start and interval.end >= end:
208            if mode == constants.CropCollision.LAX:
209                matchedEntry = interval
210            elif mode == constants.CropCollision.TRUNCATED:
211                matchedEntry = Interval(start, end, interval.label)
212
213        if matchedEntry is not None:
214            containedIntervals.append(matchedEntry)
215
216    return containedIntervals

Gets all intervals that exist between /start/ and /end/

Arguments:
  • start: the target interval start time
  • end: the target interval stop time
  • intervals: the list of intervals to check
  • mode: Determines judgement criteria
    • 'strict', only intervals wholly contained by the target interval will be kept
    • 'lax', partially contained intervals will be kept
    • 'truncated', partially contained intervals will be truncated to fit within the crop region.
Returns:

The list of intervals that overlap with the target interval

def escapeQuotes(text: str) -> str:
219def escapeQuotes(text: str) -> str:
220    return text.replace('"', '""')
def strToIntOrFloat(inputStr: str) -> float:
223def strToIntOrFloat(inputStr: str) -> float:
224    return float(inputStr) if "." in inputStr else int(inputStr)
def getValueAtTime( timestamp: float, sortedDataTupleList: List[Tuple[Any, ...]], fuzzyMatching: bool = False, startI: int = 0) -> Tuple[Tuple[Any, ...], int]:
227def getValueAtTime(
228    timestamp: float,
229    sortedDataTupleList: List[Tuple[Any, ...]],
230    fuzzyMatching: bool = False,
231    startI: int = 0,
232) -> Tuple[Tuple[Any, ...], int]:
233    """Get the value in the data list (sorted by time) that occurs at this point
234
235    If fuzzyMatching is True, if there is not a value
236    at the requested timestamp, the nearest feature value will be taken.
237
238    The procedure assumes that all data is ordered in time.
239    dataTupleList should be in the form
240    [(t1, v1a, v1b, ..), (t2, v2a, v2b, ..), ..]
241
242    The procedure makes one pass through dataTupleList and one
243    pass through self.entries.  If the data is not sequentially
244    ordered, the incorrect response will be returned.
245
246    For efficiency purposes, it takes a starting index and returns the ending
247    index.
248    """
249    # TODO: move to Point class?
250    i = startI
251    bestRow: Tuple[Any, ...] = ()
252
253    # Only find exact timestamp matches
254    if fuzzyMatching is False:
255        while True:
256            try:
257                currRow = sortedDataTupleList[i]
258            except IndexError:
259                break
260
261            currTime = currRow[0]
262            if currTime >= timestamp:
263                if timestamp == currTime:
264                    bestRow = currRow
265                break
266            i += 1
267
268    # Find the closest timestamp
269    else:
270        bestTime = sortedDataTupleList[i][0]
271        bestRow = sortedDataTupleList[i]
272        while True:
273            try:
274                dataTuple = sortedDataTupleList[i]
275            except IndexError:
276                i -= 1
277                break  # Last known value is the closest one
278
279            currTime = dataTuple[0]
280            currRow = dataTuple
281
282            currDiff = abs(currTime - timestamp)
283            bestDiff = abs(bestTime - timestamp)
284            if currDiff < bestDiff:  # We're closer to the target val
285                bestTime = currTime
286                bestRow = currRow
287                if currDiff == 0:
288                    break  # Can't do better than a perfect match
289            elif currDiff > bestDiff:
290                i -= 1
291                break  # We've past the best value.
292            i += 1
293
294    retRow = bestRow
295
296    return retRow, i

Get the value in the data list (sorted by time) that occurs at this point

If fuzzyMatching is True, if there is not a value at the requested timestamp, the nearest feature value will be taken.

The procedure assumes that all data is ordered in time. dataTupleList should be in the form [(t1, v1a, v1b, ..), (t2, v2a, v2b, ..), ..]

The procedure makes one pass through dataTupleList and one pass through self.entries. If the data is not sequentially ordered, the incorrect response will be returned.

For efficiency purposes, it takes a starting index and returns the ending index.

def getValuesInInterval(dataTupleList: List, start: float, end: float) -> List:
299def getValuesInInterval(dataTupleList: List, start: float, end: float) -> List:
300    """Gets the values that exist within an interval
301
302    The function assumes that the data is formated as
303    [(t1, v1a, v1b, ...), (t2, v2a, v2b, ...)]
304    """
305    # TODO: move to Interval class?
306    intervalDataList = []
307    for dataTuple in dataTupleList:
308        time = dataTuple[0]
309        if start <= time and end >= time:
310            intervalDataList.append(dataTuple)
311
312    return intervalDataList

Gets the values that exist within an interval

The function assumes that the data is formated as [(t1, v1a, v1b, ...), (t2, v2a, v2b, ...)]

def sign(x: float) -> int:
315def sign(x: float) -> int:
316    """Returns 1 if x is positive, 0 if x is 0, and -1 otherwise"""
317    retVal = 0
318    if x > 0:
319        retVal = 1
320    elif x < 0:
321        retVal = -1
322    return retVal

Returns 1 if x is positive, 0 if x is 0, and -1 otherwise

def invertIntervalList( inputList: List[Tuple[float, float]], minValue: float = None, maxValue: float = None) -> List[Tuple[float, float]]:
325def invertIntervalList(
326    inputList: List[Tuple[float, float]], minValue: float = None, maxValue: float = None
327) -> List[Tuple[float, float]]:
328    """Inverts the segments of a list of intervals
329
330    e.g.
331    [(0,1), (4,5), (7,10)] -> [(1,4), (5,7)]
332    [(0.5, 1.2), (3.4, 5.0)] -> [(0.0, 0.5), (1.2, 3.4)]
333    """
334    if any([interval[0] >= interval[1] for interval in inputList]):
335        raise errors.ArgumentError("Interval start occured before interval end")
336
337    inputList = sorted(inputList)
338
339    # Special case -- empty lists
340    invList: List[Tuple[float, float]]
341    if len(inputList) == 0 and minValue is not None and maxValue is not None:
342        invList = [
343            (minValue, maxValue),
344        ]
345    else:
346        # Insert in a garbage head and tail value for the purpose
347        # of inverting, in the range does not start and end at the
348        # smallest and largest values
349        if minValue is not None and inputList[0][0] > minValue:
350            inputList.insert(0, (-1, minValue))
351        if maxValue is not None and inputList[-1][1] < maxValue:
352            inputList.append((maxValue, maxValue + 1))
353
354        invList = [
355            (inputList[i][1], inputList[i + 1][0]) for i in range(0, len(inputList) - 1)
356        ]
357
358        # If two intervals in the input share a boundary, we'll get invalid intervals in the output
359        # eg invertIntervalList([(0, 1), (1, 2)]) -> [(1, 1)]
360        invList = [interval for interval in invList if interval[0] != interval[1]]
361
362    return invList

Inverts the segments of a list of intervals

e.g. [(0,1), (4,5), (7,10)] -> [(1,4), (5,7)] [(0.5, 1.2), (3.4, 5.0)] -> [(0.0, 0.5), (1.2, 3.4)]

def makeDir(path: str) -> None:
365def makeDir(path: str) -> None:
366    """Create a new directory
367
368    Unlike os.mkdir, it does not throw an exception if the directory already exists
369    """
370    if not os.path.exists(path):
371        os.mkdir(path)

Create a new directory

Unlike os.mkdir, it does not throw an exception if the directory already exists

def findAll(txt: str, subStr: str) -> List[int]:
374def findAll(txt: str, subStr: str) -> List[int]:
375    """Find the starting indicies of all instances of subStr in txt"""
376    indexList = []
377    index = 0
378    while True:
379        try:
380            index = txt.index(subStr, index)
381        except ValueError:
382            break
383        indexList.append(int(index))
384        index += 1
385
386    return indexList

Find the starting indicies of all instances of subStr in txt

def runPraatScript( praatEXE: str, scriptFN: str, argList: List[Any], cwd: str = None) -> None:
389def runPraatScript(
390    praatEXE: str, scriptFN: str, argList: List[Any], cwd: str = None
391) -> None:
392    # Popen gives a not-very-transparent error
393    if not os.path.exists(praatEXE):
394        raise errors.FileNotFound(praatEXE)
395    if not os.path.exists(scriptFN):
396        raise errors.FileNotFound(scriptFN)
397
398    argList = ["%s" % arg for arg in argList]
399    cmdList = [praatEXE, "--run", scriptFN] + argList
400
401    myProcess = subprocess.Popen(cmdList, cwd=cwd)
402
403    if myProcess.wait():
404        raise errors.PraatExecutionFailed(cmdList)
def safeZip(listOfLists: List[list], enforceLength: bool) -> Iterator[Any]:
407def safeZip(listOfLists: List[list], enforceLength: bool) -> Iterator[Any]:
408    """A safe version of python's zip()
409
410    If two sublists are of different sizes, python's zip will truncate
411    the output to be the smaller of the two.
412
413    safeZip throws an exception if the size of the any sublist is different
414    from the rest.
415    """
416    if enforceLength is True:
417        length = len(listOfLists[0])
418        if not all([length == len(subList) for subList in listOfLists]):
419            raise errors.SafeZipException("Lists to zip have different sizes.")
420
421    return itertools.zip_longest(*listOfLists)

A safe version of python's zip()

If two sublists are of different sizes, python's zip will truncate the output to be the smaller of the two.

safeZip throws an exception if the size of the any sublist is different from the rest.

def getWavDuration(wavFN: str) -> float:
424def getWavDuration(wavFN: str) -> float:
425    "For internal use.  See praatio.audio.QueryWav() for general use."
426    audiofile = wave.open(wavFN, "r")
427    params = audiofile.getparams()
428    framerate = params[2]
429    nframes = params[3]
430    duration = float(nframes) / framerate
431
432    return duration

For internal use. See praatio.audio.QueryWav() for general use.

def chooseClosestTime( targetTime: float, candidateA: Optional[float], candidateB: Optional[float]) -> float:
435def chooseClosestTime(
436    targetTime: float, candidateA: Optional[float], candidateB: Optional[float]
437) -> float:
438    """Chooses the closest time between two options that straddle the target time
439
440    Args:
441        targetTime: the time to compare against
442        candidateA: the first candidate
443        candidateB: the second candidate
444
445    Returns:
446        the closer of the two options to the target time
447    Raises:
448        ArgumentError: When no left or right candidate is provided
449    """
450    closestTime: float
451    if candidateA is None and candidateB is None:
452        raise (errors.ArgumentError("Must provide at"))
453
454    elif candidateA is None and candidateB is not None:
455        closestTime = candidateB
456    elif candidateB is None and candidateA is not None:
457        closestTime = candidateA
458    elif candidateB is not None and candidateA is not None:
459        aDiff = abs(candidateA - targetTime)
460        bDiff = abs(candidateB - targetTime)
461
462        if aDiff <= bDiff:
463            closestTime = candidateA
464        else:
465            closestTime = candidateB
466
467    return closestTime

Chooses the closest time between two options that straddle the target time

Arguments:
  • targetTime: the time to compare against
  • candidateA: the first candidate
  • candidateB: the second candidate
Returns:

the closer of the two options to the target time

Raises:
  • ArgumentError: When no left or right candidate is provided
def getInterval( startTime: float, duration: float, max: float, reverse: bool) -> Tuple[float, float]:
470def getInterval(
471    startTime: float, duration: float, max: float, reverse: bool
472) -> Tuple[float, float]:
473    """returns an interval before or after some start time
474
475    The returned timestamps will be between 0 and max
476
477    Args:
478        startTime: the time to get the interval from
479        duration: duration of the interval
480        max: the maximum allowed time
481        reverse: is the interval before or after the targetTime?
482
483    Returns:
484        the start and end time of an interval
485    """
486    if reverse is True:
487        endTime = startTime
488        startTime -= duration
489    else:
490        endTime = startTime + duration
491
492    # Don't read over the edges
493    if startTime < 0:
494        startTime = 0
495    elif endTime > max:
496        endTime = max
497
498    return (startTime, endTime)

returns an interval before or after some start time

The returned timestamps will be between 0 and max

Arguments:
  • startTime: the time to get the interval from
  • duration: duration of the interval
  • max: the maximum allowed time
  • reverse: is the interval before or after the targetTime?
Returns:

the start and end time of an interval