99 lines
3.0 KiB
Python
99 lines
3.0 KiB
Python
|
|
# coding=utf-8
|
||
|
|
# Copyright 2025-present, the HuggingFace Inc. team.
|
||
|
|
#
|
||
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
|
# you may not use this file except in compliance with the License.
|
||
|
|
# You may obtain a copy of the License at
|
||
|
|
#
|
||
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||
|
|
#
|
||
|
|
# Unless required by applicable law or agreed to in writing, software
|
||
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
|
# See the License for the specific language governing permissions and
|
||
|
|
# limitations under the License.
|
||
|
|
"""Parsing helpers shared across modules."""
|
||
|
|
|
||
|
|
import re
|
||
|
|
import time
|
||
|
|
from typing import Dict
|
||
|
|
|
||
|
|
|
||
|
|
RE_NUMBER_WITH_UNIT = re.compile(r"(\d+)([a-z]+)", re.IGNORECASE)
|
||
|
|
|
||
|
|
BYTE_UNITS: Dict[str, int] = {
|
||
|
|
"k": 1_000,
|
||
|
|
"m": 1_000_000,
|
||
|
|
"g": 1_000_000_000,
|
||
|
|
"t": 1_000_000_000_000,
|
||
|
|
"p": 1_000_000_000_000_000,
|
||
|
|
}
|
||
|
|
|
||
|
|
TIME_UNITS: Dict[str, int] = {
|
||
|
|
"s": 1,
|
||
|
|
"m": 60,
|
||
|
|
"h": 60 * 60,
|
||
|
|
"d": 24 * 60 * 60,
|
||
|
|
"w": 7 * 24 * 60 * 60,
|
||
|
|
"mo": 30 * 24 * 60 * 60,
|
||
|
|
"y": 365 * 24 * 60 * 60,
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
def parse_size(value: str) -> int:
|
||
|
|
"""Parse a size expressed as a string with digits and unit (like `"10MB"`) to an integer (in bytes)."""
|
||
|
|
return _parse_with_unit(value, BYTE_UNITS)
|
||
|
|
|
||
|
|
|
||
|
|
def parse_duration(value: str) -> int:
|
||
|
|
"""Parse a duration expressed as a string with digits and unit (like `"10s"`) to an integer (in seconds)."""
|
||
|
|
return _parse_with_unit(value, TIME_UNITS)
|
||
|
|
|
||
|
|
|
||
|
|
def _parse_with_unit(value: str, units: Dict[str, int]) -> int:
|
||
|
|
"""Parse a numeric value with optional unit."""
|
||
|
|
stripped = value.strip()
|
||
|
|
if not stripped:
|
||
|
|
raise ValueError("Value cannot be empty.")
|
||
|
|
try:
|
||
|
|
return int(value)
|
||
|
|
except ValueError:
|
||
|
|
pass
|
||
|
|
|
||
|
|
match = RE_NUMBER_WITH_UNIT.fullmatch(stripped)
|
||
|
|
if not match:
|
||
|
|
raise ValueError(f"Invalid value '{value}'. Must match pattern '\\d+[a-z]+' or be a plain number.")
|
||
|
|
|
||
|
|
number = int(match.group(1))
|
||
|
|
unit = match.group(2).lower()
|
||
|
|
|
||
|
|
if unit not in units:
|
||
|
|
raise ValueError(f"Unknown unit '{unit}'. Must be one of {list(units.keys())}.")
|
||
|
|
|
||
|
|
return number * units[unit]
|
||
|
|
|
||
|
|
|
||
|
|
def format_timesince(ts: float) -> str:
|
||
|
|
"""Format timestamp in seconds into a human-readable string, relative to now.
|
||
|
|
|
||
|
|
Vaguely inspired by Django's `timesince` formatter.
|
||
|
|
"""
|
||
|
|
_TIMESINCE_CHUNKS = (
|
||
|
|
# Label, divider, max value
|
||
|
|
("second", 1, 60),
|
||
|
|
("minute", 60, 60),
|
||
|
|
("hour", 60 * 60, 24),
|
||
|
|
("day", 60 * 60 * 24, 6),
|
||
|
|
("week", 60 * 60 * 24 * 7, 6),
|
||
|
|
("month", 60 * 60 * 24 * 30, 11),
|
||
|
|
("year", 60 * 60 * 24 * 365, None),
|
||
|
|
)
|
||
|
|
delta = time.time() - ts
|
||
|
|
if delta < 20:
|
||
|
|
return "a few seconds ago"
|
||
|
|
for label, divider, max_value in _TIMESINCE_CHUNKS: # noqa: B007
|
||
|
|
value = round(delta / divider)
|
||
|
|
if max_value is not None and value <= max_value:
|
||
|
|
break
|
||
|
|
return f"{value} {label}{'s' if value > 1 else ''} ago"
|