632 lines
20 KiB
Python
632 lines
20 KiB
Python
|
|
# Copyright (c) 2006, Mathieu Fenniak
|
||
|
|
# All rights reserved.
|
||
|
|
#
|
||
|
|
# Redistribution and use in source and binary forms, with or without
|
||
|
|
# modification, are permitted provided that the following conditions are
|
||
|
|
# met:
|
||
|
|
#
|
||
|
|
# * Redistributions of source code must retain the above copyright notice,
|
||
|
|
# this list of conditions and the following disclaimer.
|
||
|
|
# * Redistributions in binary form must reproduce the above copyright notice,
|
||
|
|
# this list of conditions and the following disclaimer in the documentation
|
||
|
|
# and/or other materials provided with the distribution.
|
||
|
|
# * The name of the author may not be used to endorse or promote products
|
||
|
|
# derived from this software without specific prior written permission.
|
||
|
|
#
|
||
|
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||
|
|
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||
|
|
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||
|
|
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||
|
|
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||
|
|
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||
|
|
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||
|
|
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||
|
|
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||
|
|
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||
|
|
# POSSIBILITY OF SUCH DAMAGE.
|
||
|
|
|
||
|
|
"""Utility functions for PDF library."""
|
||
|
|
__author__ = "Mathieu Fenniak"
|
||
|
|
__author_email__ = "biziqe@mathieu.fenniak.net"
|
||
|
|
|
||
|
|
import functools
|
||
|
|
import logging
|
||
|
|
import re
|
||
|
|
import sys
|
||
|
|
import warnings
|
||
|
|
from dataclasses import dataclass
|
||
|
|
from datetime import datetime, timezone
|
||
|
|
from io import DEFAULT_BUFFER_SIZE
|
||
|
|
from os import SEEK_CUR
|
||
|
|
from re import Pattern
|
||
|
|
from typing import (
|
||
|
|
IO,
|
||
|
|
Any,
|
||
|
|
Optional,
|
||
|
|
Union,
|
||
|
|
overload,
|
||
|
|
)
|
||
|
|
|
||
|
|
if sys.version_info[:2] >= (3, 10):
|
||
|
|
# Python 3.10+: https://www.python.org/dev/peps/pep-0484/
|
||
|
|
from typing import TypeAlias
|
||
|
|
else:
|
||
|
|
from typing_extensions import TypeAlias
|
||
|
|
|
||
|
|
if sys.version_info >= (3, 11):
|
||
|
|
from typing import Self
|
||
|
|
else:
|
||
|
|
from typing_extensions import Self
|
||
|
|
|
||
|
|
from .errors import (
|
||
|
|
STREAM_TRUNCATED_PREMATURELY,
|
||
|
|
DeprecationError,
|
||
|
|
PdfStreamError,
|
||
|
|
)
|
||
|
|
|
||
|
|
TransformationMatrixType: TypeAlias = tuple[
|
||
|
|
tuple[float, float, float], tuple[float, float, float], tuple[float, float, float]
|
||
|
|
]
|
||
|
|
CompressedTransformationMatrix: TypeAlias = tuple[
|
||
|
|
float, float, float, float, float, float
|
||
|
|
]
|
||
|
|
|
||
|
|
StreamType = IO[Any]
|
||
|
|
StrByteType = Union[str, StreamType]
|
||
|
|
|
||
|
|
|
||
|
|
def parse_iso8824_date(text: Optional[str]) -> Optional[datetime]:
|
||
|
|
orgtext = text
|
||
|
|
if not text:
|
||
|
|
return None
|
||
|
|
if text[0].isdigit():
|
||
|
|
text = "D:" + text
|
||
|
|
if text.endswith(("Z", "z")):
|
||
|
|
text += "0000"
|
||
|
|
text = text.replace("z", "+").replace("Z", "+").replace("'", "")
|
||
|
|
i = max(text.find("+"), text.find("-"))
|
||
|
|
if i > 0 and i != len(text) - 5:
|
||
|
|
text += "00"
|
||
|
|
for f in (
|
||
|
|
"D:%Y",
|
||
|
|
"D:%Y%m",
|
||
|
|
"D:%Y%m%d",
|
||
|
|
"D:%Y%m%d%H",
|
||
|
|
"D:%Y%m%d%H%M",
|
||
|
|
"D:%Y%m%d%H%M%S",
|
||
|
|
"D:%Y%m%d%H%M%S%z",
|
||
|
|
):
|
||
|
|
try:
|
||
|
|
d = datetime.strptime(text, f) # noqa: DTZ007
|
||
|
|
except ValueError:
|
||
|
|
continue
|
||
|
|
else:
|
||
|
|
if text.endswith("+0000"):
|
||
|
|
d = d.replace(tzinfo=timezone.utc)
|
||
|
|
return d
|
||
|
|
raise ValueError(f"Can not convert date: {orgtext}")
|
||
|
|
|
||
|
|
|
||
|
|
def format_iso8824_date(dt: datetime) -> str:
|
||
|
|
"""
|
||
|
|
Convert a datetime object to PDF date string format.
|
||
|
|
|
||
|
|
Converts datetime to the PDF date format D:YYYYMMDDHHmmSSOHH'mm
|
||
|
|
as specified in the PDF Reference.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
dt: A datetime object to convert.
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
A date string in PDF format.
|
||
|
|
"""
|
||
|
|
date_str = dt.strftime("D:%Y%m%d%H%M%S")
|
||
|
|
if dt.tzinfo is not None:
|
||
|
|
offset = dt.utcoffset()
|
||
|
|
assert offset is not None
|
||
|
|
total_seconds = int(offset.total_seconds())
|
||
|
|
hours, remainder = divmod(abs(total_seconds), 3600)
|
||
|
|
minutes = remainder // 60
|
||
|
|
sign = "+" if total_seconds >= 0 else "-"
|
||
|
|
date_str += f"{sign}{hours:02d}'{minutes:02d}'"
|
||
|
|
return date_str
|
||
|
|
|
||
|
|
|
||
|
|
def _get_max_pdf_version_header(header1: str, header2: str) -> str:
|
||
|
|
versions = (
|
||
|
|
"%PDF-1.3",
|
||
|
|
"%PDF-1.4",
|
||
|
|
"%PDF-1.5",
|
||
|
|
"%PDF-1.6",
|
||
|
|
"%PDF-1.7",
|
||
|
|
"%PDF-2.0",
|
||
|
|
)
|
||
|
|
pdf_header_indices = []
|
||
|
|
if header1 in versions:
|
||
|
|
pdf_header_indices.append(versions.index(header1))
|
||
|
|
if header2 in versions:
|
||
|
|
pdf_header_indices.append(versions.index(header2))
|
||
|
|
if len(pdf_header_indices) == 0:
|
||
|
|
raise ValueError(f"Neither {header1!r} nor {header2!r} are proper headers")
|
||
|
|
return versions[max(pdf_header_indices)]
|
||
|
|
|
||
|
|
|
||
|
|
WHITESPACES = (b"\x00", b"\t", b"\n", b"\f", b"\r", b" ")
|
||
|
|
WHITESPACES_AS_BYTES = b"".join(WHITESPACES)
|
||
|
|
WHITESPACES_AS_REGEXP = b"[" + WHITESPACES_AS_BYTES + b"]"
|
||
|
|
|
||
|
|
|
||
|
|
def read_until_whitespace(stream: StreamType, maxchars: Optional[int] = None) -> bytes:
|
||
|
|
"""
|
||
|
|
Read non-whitespace characters and return them.
|
||
|
|
|
||
|
|
Stops upon encountering whitespace or when maxchars is reached.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
stream: The data stream from which was read.
|
||
|
|
maxchars: The maximum number of bytes returned; by default unlimited.
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
The data which was read.
|
||
|
|
|
||
|
|
"""
|
||
|
|
txt = b""
|
||
|
|
while True:
|
||
|
|
tok = stream.read(1)
|
||
|
|
if tok.isspace() or not tok:
|
||
|
|
break
|
||
|
|
txt += tok
|
||
|
|
if len(txt) == maxchars:
|
||
|
|
break
|
||
|
|
return txt
|
||
|
|
|
||
|
|
|
||
|
|
def read_non_whitespace(stream: StreamType) -> bytes:
|
||
|
|
"""
|
||
|
|
Find and read the next non-whitespace character (ignores whitespace).
|
||
|
|
|
||
|
|
Args:
|
||
|
|
stream: The data stream from which was read.
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
The data which was read.
|
||
|
|
|
||
|
|
"""
|
||
|
|
tok = stream.read(1)
|
||
|
|
while tok in WHITESPACES:
|
||
|
|
tok = stream.read(1)
|
||
|
|
return tok
|
||
|
|
|
||
|
|
|
||
|
|
def skip_over_whitespace(stream: StreamType) -> bool:
|
||
|
|
"""
|
||
|
|
Similar to read_non_whitespace, but return a boolean if at least one
|
||
|
|
whitespace character was read.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
stream: The data stream from which was read.
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
True if one or more whitespace was skipped, otherwise return False.
|
||
|
|
|
||
|
|
"""
|
||
|
|
tok = stream.read(1)
|
||
|
|
cnt = 0
|
||
|
|
while tok in WHITESPACES:
|
||
|
|
cnt += 1
|
||
|
|
tok = stream.read(1)
|
||
|
|
return cnt > 0
|
||
|
|
|
||
|
|
|
||
|
|
def check_if_whitespace_only(value: bytes) -> bool:
|
||
|
|
"""
|
||
|
|
Check if the given value consists of whitespace characters only.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
value: The bytes to check.
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
True if the value only has whitespace characters, otherwise return False.
|
||
|
|
|
||
|
|
"""
|
||
|
|
return all(b in WHITESPACES_AS_BYTES for b in value)
|
||
|
|
|
||
|
|
|
||
|
|
def skip_over_comment(stream: StreamType) -> None:
|
||
|
|
tok = stream.read(1)
|
||
|
|
stream.seek(-1, 1)
|
||
|
|
if tok == b"%":
|
||
|
|
while tok not in (b"\n", b"\r"):
|
||
|
|
tok = stream.read(1)
|
||
|
|
if tok == b"":
|
||
|
|
raise PdfStreamError("File ended unexpectedly.")
|
||
|
|
|
||
|
|
|
||
|
|
def read_until_regex(stream: StreamType, regex: Pattern[bytes]) -> bytes:
|
||
|
|
"""
|
||
|
|
Read until the regular expression pattern matched (ignore the match).
|
||
|
|
Treats EOF on the underlying stream as the end of the token to be matched.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
regex: re.Pattern
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
The read bytes.
|
||
|
|
|
||
|
|
"""
|
||
|
|
name = b""
|
||
|
|
while True:
|
||
|
|
tok = stream.read(16)
|
||
|
|
if not tok:
|
||
|
|
return name
|
||
|
|
m = regex.search(name + tok)
|
||
|
|
if m is not None:
|
||
|
|
stream.seek(m.start() - (len(name) + len(tok)), 1)
|
||
|
|
name = (name + tok)[: m.start()]
|
||
|
|
break
|
||
|
|
name += tok
|
||
|
|
return name
|
||
|
|
|
||
|
|
|
||
|
|
def read_block_backwards(stream: StreamType, to_read: int) -> bytes:
|
||
|
|
"""
|
||
|
|
Given a stream at position X, read a block of size to_read ending at position X.
|
||
|
|
|
||
|
|
This changes the stream's position to the beginning of where the block was
|
||
|
|
read.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
stream:
|
||
|
|
to_read:
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
The data which was read.
|
||
|
|
|
||
|
|
"""
|
||
|
|
if stream.tell() < to_read:
|
||
|
|
raise PdfStreamError("Could not read malformed PDF file")
|
||
|
|
# Seek to the start of the block we want to read.
|
||
|
|
stream.seek(-to_read, SEEK_CUR)
|
||
|
|
read = stream.read(to_read)
|
||
|
|
# Seek to the start of the block we read after reading it.
|
||
|
|
stream.seek(-to_read, SEEK_CUR)
|
||
|
|
return read
|
||
|
|
|
||
|
|
|
||
|
|
def read_previous_line(stream: StreamType) -> bytes:
|
||
|
|
"""
|
||
|
|
Given a byte stream with current position X, return the previous line.
|
||
|
|
|
||
|
|
All characters between the first CR/LF byte found before X
|
||
|
|
(or, the start of the file, if no such byte is found) and position X
|
||
|
|
After this call, the stream will be positioned one byte after the
|
||
|
|
first non-CRLF character found beyond the first CR/LF byte before X,
|
||
|
|
or, if no such byte is found, at the beginning of the stream.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
stream: StreamType:
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
The data which was read.
|
||
|
|
|
||
|
|
"""
|
||
|
|
line_content = []
|
||
|
|
found_crlf = False
|
||
|
|
if stream.tell() == 0:
|
||
|
|
raise PdfStreamError(STREAM_TRUNCATED_PREMATURELY)
|
||
|
|
while True:
|
||
|
|
to_read = min(DEFAULT_BUFFER_SIZE, stream.tell())
|
||
|
|
if to_read == 0:
|
||
|
|
break
|
||
|
|
# Read the block. After this, our stream will be one
|
||
|
|
# beyond the initial position.
|
||
|
|
block = read_block_backwards(stream, to_read)
|
||
|
|
idx = len(block) - 1
|
||
|
|
if not found_crlf:
|
||
|
|
# We haven't found our first CR/LF yet.
|
||
|
|
# Read off characters until we hit one.
|
||
|
|
while idx >= 0 and block[idx] not in b"\r\n":
|
||
|
|
idx -= 1
|
||
|
|
if idx >= 0:
|
||
|
|
found_crlf = True
|
||
|
|
if found_crlf:
|
||
|
|
# We found our first CR/LF already (on this block or
|
||
|
|
# a previous one).
|
||
|
|
# Our combined line is the remainder of the block
|
||
|
|
# plus any previously read blocks.
|
||
|
|
line_content.append(block[idx + 1 :])
|
||
|
|
# Continue to read off any more CRLF characters.
|
||
|
|
while idx >= 0 and block[idx] in b"\r\n":
|
||
|
|
idx -= 1
|
||
|
|
else:
|
||
|
|
# Didn't find CR/LF yet - add this block to our
|
||
|
|
# previously read blocks and continue.
|
||
|
|
line_content.append(block)
|
||
|
|
if idx >= 0:
|
||
|
|
# We found the next non-CRLF character.
|
||
|
|
# Set the stream position correctly, then break
|
||
|
|
stream.seek(idx + 1, SEEK_CUR)
|
||
|
|
break
|
||
|
|
# Join all the blocks in the line (which are in reverse order)
|
||
|
|
return b"".join(line_content[::-1])
|
||
|
|
|
||
|
|
|
||
|
|
def matrix_multiply(
|
||
|
|
a: TransformationMatrixType, b: TransformationMatrixType
|
||
|
|
) -> TransformationMatrixType:
|
||
|
|
return tuple( # type: ignore[return-value]
|
||
|
|
tuple(sum(float(i) * float(j) for i, j in zip(row, col)) for col in zip(*b))
|
||
|
|
for row in a
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
def mark_location(stream: StreamType) -> None:
|
||
|
|
"""Create text file showing current location in context."""
|
||
|
|
# Mainly for debugging
|
||
|
|
radius = 5000
|
||
|
|
stream.seek(-radius, 1)
|
||
|
|
with open("pypdf_pdfLocation.txt", "wb") as output_fh:
|
||
|
|
output_fh.write(stream.read(radius))
|
||
|
|
output_fh.write(b"HERE")
|
||
|
|
output_fh.write(stream.read(radius))
|
||
|
|
stream.seek(-radius, 1)
|
||
|
|
|
||
|
|
|
||
|
|
@overload
|
||
|
|
def ord_(b: str) -> int:
|
||
|
|
...
|
||
|
|
|
||
|
|
|
||
|
|
@overload
|
||
|
|
def ord_(b: bytes) -> bytes:
|
||
|
|
...
|
||
|
|
|
||
|
|
|
||
|
|
@overload
|
||
|
|
def ord_(b: int) -> int:
|
||
|
|
...
|
||
|
|
|
||
|
|
|
||
|
|
def ord_(b: Union[int, str, bytes]) -> Union[int, bytes]:
|
||
|
|
if isinstance(b, str):
|
||
|
|
return ord(b)
|
||
|
|
return b
|
||
|
|
|
||
|
|
|
||
|
|
def deprecate(msg: str, stacklevel: int = 3) -> None:
|
||
|
|
warnings.warn(msg, DeprecationWarning, stacklevel=stacklevel)
|
||
|
|
|
||
|
|
|
||
|
|
def deprecation(msg: str) -> None:
|
||
|
|
raise DeprecationError(msg)
|
||
|
|
|
||
|
|
|
||
|
|
def deprecate_with_replacement(old_name: str, new_name: str, removed_in: str) -> None:
|
||
|
|
"""Issue a warning that a feature will be removed, but has a replacement."""
|
||
|
|
deprecate(
|
||
|
|
f"{old_name} is deprecated and will be removed in pypdf {removed_in}. Use {new_name} instead.",
|
||
|
|
4,
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
def deprecation_with_replacement(old_name: str, new_name: str, removed_in: str) -> None:
|
||
|
|
"""Raise an exception that a feature was already removed, but has a replacement."""
|
||
|
|
deprecation(
|
||
|
|
f"{old_name} is deprecated and was removed in pypdf {removed_in}. Use {new_name} instead."
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
def deprecate_no_replacement(name: str, removed_in: str) -> None:
|
||
|
|
"""Issue a warning that a feature will be removed without replacement."""
|
||
|
|
deprecate(f"{name} is deprecated and will be removed in pypdf {removed_in}.", 4)
|
||
|
|
|
||
|
|
|
||
|
|
def deprecation_no_replacement(name: str, removed_in: str) -> None:
|
||
|
|
"""Raise an exception that a feature was already removed without replacement."""
|
||
|
|
deprecation(f"{name} is deprecated and was removed in pypdf {removed_in}.")
|
||
|
|
|
||
|
|
|
||
|
|
def logger_error(msg: str, src: str) -> None:
|
||
|
|
"""
|
||
|
|
Use this instead of logger.error directly.
|
||
|
|
|
||
|
|
That allows people to overwrite it more easily.
|
||
|
|
|
||
|
|
See the docs on when to use which:
|
||
|
|
https://pypdf.readthedocs.io/en/latest/user/suppress-warnings.html
|
||
|
|
"""
|
||
|
|
logging.getLogger(src).error(msg)
|
||
|
|
|
||
|
|
|
||
|
|
def logger_warning(msg: str, src: str) -> None:
|
||
|
|
"""
|
||
|
|
Use this instead of logger.warning directly.
|
||
|
|
|
||
|
|
That allows people to overwrite it more easily.
|
||
|
|
|
||
|
|
## Exception, warnings.warn, logger_warning
|
||
|
|
- Exceptions should be used if the user should write code that deals with
|
||
|
|
an error case, e.g. the PDF being completely broken.
|
||
|
|
- warnings.warn should be used if the user needs to fix their code, e.g.
|
||
|
|
DeprecationWarnings
|
||
|
|
- logger_warning should be used if the user needs to know that an issue was
|
||
|
|
handled by pypdf, e.g. a non-compliant PDF being read in a way that
|
||
|
|
pypdf could apply a robustness fix to still read it. This applies mainly
|
||
|
|
to strict=False mode.
|
||
|
|
"""
|
||
|
|
logging.getLogger(src).warning(msg)
|
||
|
|
|
||
|
|
|
||
|
|
def rename_kwargs(
|
||
|
|
func_name: str, kwargs: dict[str, Any], aliases: dict[str, str], fail: bool = False
|
||
|
|
) -> None:
|
||
|
|
"""
|
||
|
|
Helper function to deprecate arguments.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
func_name: Name of the function to be deprecated
|
||
|
|
kwargs:
|
||
|
|
aliases:
|
||
|
|
fail:
|
||
|
|
|
||
|
|
"""
|
||
|
|
for old_term, new_term in aliases.items():
|
||
|
|
if old_term in kwargs:
|
||
|
|
if fail:
|
||
|
|
raise DeprecationError(
|
||
|
|
f"{old_term} is deprecated as an argument. Use {new_term} instead"
|
||
|
|
)
|
||
|
|
if new_term in kwargs:
|
||
|
|
raise TypeError(
|
||
|
|
f"{func_name} received both {old_term} and {new_term} as "
|
||
|
|
f"an argument. {old_term} is deprecated. "
|
||
|
|
f"Use {new_term} instead."
|
||
|
|
)
|
||
|
|
kwargs[new_term] = kwargs.pop(old_term)
|
||
|
|
warnings.warn(
|
||
|
|
message=(
|
||
|
|
f"{old_term} is deprecated as an argument. Use {new_term} instead"
|
||
|
|
),
|
||
|
|
category=DeprecationWarning,
|
||
|
|
stacklevel=3,
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
def _human_readable_bytes(bytes: int) -> str:
|
||
|
|
if bytes < 10**3:
|
||
|
|
return f"{bytes} Byte"
|
||
|
|
if bytes < 10**6:
|
||
|
|
return f"{bytes / 10**3:.1f} kB"
|
||
|
|
if bytes < 10**9:
|
||
|
|
return f"{bytes / 10**6:.1f} MB"
|
||
|
|
return f"{bytes / 10**9:.1f} GB"
|
||
|
|
|
||
|
|
|
||
|
|
# The following class has been copied from Django:
|
||
|
|
# https://github.com/django/django/blob/adae619426b6f50046b3daaa744db52989c9d6db/django/utils/functional.py#L51-L65
|
||
|
|
# It received some modifications to comply with our own coding standards.
|
||
|
|
#
|
||
|
|
# Original license:
|
||
|
|
#
|
||
|
|
# ---------------------------------------------------------------------------------
|
||
|
|
# Copyright (c) Django Software Foundation and individual contributors.
|
||
|
|
# All rights reserved.
|
||
|
|
#
|
||
|
|
# Redistribution and use in source and binary forms, with or without modification,
|
||
|
|
# are permitted provided that the following conditions are met:
|
||
|
|
#
|
||
|
|
# 1. Redistributions of source code must retain the above copyright notice,
|
||
|
|
# this list of conditions and the following disclaimer.
|
||
|
|
#
|
||
|
|
# 2. Redistributions in binary form must reproduce the above copyright
|
||
|
|
# notice, this list of conditions and the following disclaimer in the
|
||
|
|
# documentation and/or other materials provided with the distribution.
|
||
|
|
#
|
||
|
|
# 3. Neither the name of Django nor the names of its contributors may be used
|
||
|
|
# to endorse or promote products derived from this software without
|
||
|
|
# specific prior written permission.
|
||
|
|
#
|
||
|
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||
|
|
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||
|
|
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||
|
|
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||
|
|
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||
|
|
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||
|
|
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||
|
|
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||
|
|
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||
|
|
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||
|
|
# ---------------------------------------------------------------------------------
|
||
|
|
class classproperty: # noqa: N801
|
||
|
|
"""
|
||
|
|
Decorator that converts a method with a single cls argument into a property
|
||
|
|
that can be accessed directly from the class.
|
||
|
|
"""
|
||
|
|
|
||
|
|
def __init__(self, method=None) -> None: # type: ignore # noqa: ANN001
|
||
|
|
self.fget = method
|
||
|
|
|
||
|
|
def __get__(self, instance, cls=None) -> Any: # type: ignore # noqa: ANN001
|
||
|
|
return self.fget(cls)
|
||
|
|
|
||
|
|
def getter(self, method) -> Self: # type: ignore # noqa: ANN001
|
||
|
|
self.fget = method
|
||
|
|
return self
|
||
|
|
|
||
|
|
|
||
|
|
@dataclass
|
||
|
|
class File:
|
||
|
|
from .generic import IndirectObject # noqa: PLC0415
|
||
|
|
|
||
|
|
name: str = ""
|
||
|
|
"""
|
||
|
|
Filename as identified within the PDF file.
|
||
|
|
"""
|
||
|
|
data: bytes = b""
|
||
|
|
"""
|
||
|
|
Data as bytes.
|
||
|
|
"""
|
||
|
|
indirect_reference: Optional[IndirectObject] = None
|
||
|
|
"""
|
||
|
|
Reference to the object storing the stream.
|
||
|
|
"""
|
||
|
|
|
||
|
|
def __str__(self) -> str:
|
||
|
|
return f"{self.__class__.__name__}(name={self.name}, data: {_human_readable_bytes(len(self.data))})"
|
||
|
|
|
||
|
|
def __repr__(self) -> str:
|
||
|
|
return self.__str__()[:-1] + f", hash: {hash(self.data)})"
|
||
|
|
|
||
|
|
|
||
|
|
@functools.total_ordering
|
||
|
|
class Version:
|
||
|
|
COMPONENT_PATTERN = re.compile(r"^(\d+)(.*)$")
|
||
|
|
|
||
|
|
def __init__(self, version_str: str) -> None:
|
||
|
|
self.version_str = version_str
|
||
|
|
self.components = self._parse_version(version_str)
|
||
|
|
|
||
|
|
def _parse_version(self, version_str: str) -> list[tuple[int, str]]:
|
||
|
|
components = version_str.split(".")
|
||
|
|
parsed_components = []
|
||
|
|
for component in components:
|
||
|
|
match = Version.COMPONENT_PATTERN.match(component)
|
||
|
|
if not match:
|
||
|
|
parsed_components.append((0, component))
|
||
|
|
continue
|
||
|
|
integer_prefix = match.group(1)
|
||
|
|
suffix = match.group(2)
|
||
|
|
if integer_prefix is None:
|
||
|
|
integer_prefix = 0
|
||
|
|
parsed_components.append((int(integer_prefix), suffix))
|
||
|
|
return parsed_components
|
||
|
|
|
||
|
|
def __eq__(self, other: object) -> bool:
|
||
|
|
if not isinstance(other, Version):
|
||
|
|
return False
|
||
|
|
return self.components == other.components
|
||
|
|
|
||
|
|
def __hash__(self) -> int:
|
||
|
|
# Convert to tuple as lists cannot be hashed.
|
||
|
|
return hash((self.__class__, tuple(self.components)))
|
||
|
|
|
||
|
|
def __lt__(self, other: Any) -> bool:
|
||
|
|
if not isinstance(other, Version):
|
||
|
|
raise ValueError(f"Version cannot be compared against {type(other)}")
|
||
|
|
|
||
|
|
for self_component, other_component in zip(self.components, other.components):
|
||
|
|
self_value, self_suffix = self_component
|
||
|
|
other_value, other_suffix = other_component
|
||
|
|
|
||
|
|
if self_value < other_value:
|
||
|
|
return True
|
||
|
|
if self_value > other_value:
|
||
|
|
return False
|
||
|
|
|
||
|
|
if self_suffix < other_suffix:
|
||
|
|
return True
|
||
|
|
if self_suffix > other_suffix:
|
||
|
|
return False
|
||
|
|
|
||
|
|
return len(self.components) < len(other.components)
|