123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354 |
- import zlib
- from typing import Iterator, TextIO
- def exact_div(x, y):
- assert x % y == 0
- return x // y
- def str2bool(string):
- str2val = {"True": True, "False": False}
- if string in str2val:
- return str2val[string]
- else:
- raise ValueError(f"Expected one of {set(str2val.keys())}, got {string}")
- def optional_int(string):
- return None if string == "None" else int(string)
- def optional_float(string):
- return None if string == "None" else float(string)
- def compression_ratio(text) -> float:
- return len(text) / len(zlib.compress(text.encode("utf-8")))
- def format_timestamp(seconds: float):
- assert seconds >= 0, "non-negative timestamp expected"
- milliseconds = round(seconds * 1000.0)
- hours = milliseconds // 3_600_000
- milliseconds -= hours * 3_600_000
- minutes = milliseconds // 60_000
- milliseconds -= minutes * 60_000
- seconds = milliseconds // 1_000
- milliseconds -= seconds * 1_000
- return (f"{hours}:" if hours > 0 else "") + f"{minutes:02d}:{seconds:02d}.{milliseconds:03d}"
- def write_vtt(transcript: Iterator[dict], file: TextIO):
- print("WEBVTT\n", file=file)
- for segment in transcript:
- print(
- f"{format_timestamp(segment['start'])} --> {format_timestamp(segment['end'])}\n"
- f"{segment['text'].replace('-->', '->')}\n",
- file=file,
- flush=True,
- )
|