utils.py 1.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354
  1. import zlib
  2. from typing import Iterator, TextIO
  3. def exact_div(x, y):
  4. assert x % y == 0
  5. return x // y
  6. def str2bool(string):
  7. str2val = {"True": True, "False": False}
  8. if string in str2val:
  9. return str2val[string]
  10. else:
  11. raise ValueError(f"Expected one of {set(str2val.keys())}, got {string}")
  12. def optional_int(string):
  13. return None if string == "None" else int(string)
  14. def optional_float(string):
  15. return None if string == "None" else float(string)
  16. def compression_ratio(text) -> float:
  17. return len(text) / len(zlib.compress(text.encode("utf-8")))
  18. def format_timestamp(seconds: float):
  19. assert seconds >= 0, "non-negative timestamp expected"
  20. milliseconds = round(seconds * 1000.0)
  21. hours = milliseconds // 3_600_000
  22. milliseconds -= hours * 3_600_000
  23. minutes = milliseconds // 60_000
  24. milliseconds -= minutes * 60_000
  25. seconds = milliseconds // 1_000
  26. milliseconds -= seconds * 1_000
  27. return (f"{hours}:" if hours > 0 else "") + f"{minutes:02d}:{seconds:02d}.{milliseconds:03d}"
  28. def write_vtt(transcript: Iterator[dict], file: TextIO):
  29. print("WEBVTT\n", file=file)
  30. for segment in transcript:
  31. print(
  32. f"{format_timestamp(segment['start'])} --> {format_timestamp(segment['end'])}\n"
  33. f"{segment['text'].replace('-->', '->')}\n",
  34. file=file,
  35. flush=True,
  36. )