group-wbl/.venv/lib/python3.13/site-packages/pypdf/_xobj_image_helpers.py

"""Functions to convert an image XObject to an image"""

import sys
from io import BytesIO
from typing import Any, Literal, Optional, Union, cast

from ._utils import check_if_whitespace_only, logger_warning
from .constants import ColorSpaces, StreamAttributes
from .constants import FilterTypes as FT
from .constants import ImageAttributes as IA
from .errors import EmptyImageDataError, PdfReadError
from .generic import (
    ArrayObject,
    DecodedStreamObject,
    EncodedStreamObject,
    NullObject,
    TextStringObject,
    is_null_or_none,
)

if sys.version_info[:2] >= (3, 10):
    from typing import TypeAlias
else:
    from typing_extensions import TypeAlias


try:
    from PIL import Image, UnidentifiedImageError
except ImportError:
    raise ImportError(
        "pillow is required to do image extraction. "
        "It can be installed via 'pip install pypdf[image]'"
    )

mode_str_type: TypeAlias = Literal[
    "", "1", "RGB", "2bits", "4bits", "P", "L", "RGBA", "CMYK"
]

MAX_IMAGE_MODE_NESTING_DEPTH: int = 10


def _get_image_mode(
    color_space: Union[str, list[Any], Any],
    color_components: int,
    prev_mode: mode_str_type,
    depth: int = 0,
) -> tuple[mode_str_type, bool]:
    """
    Returns:
        Image mode, not taking into account mask (transparency).
        ColorInversion is required (like for some DeviceCMYK).

    """
    if depth > MAX_IMAGE_MODE_NESTING_DEPTH:
        raise PdfReadError(
            "Color spaces nested too deeply. If required, consider increasing MAX_IMAGE_MODE_NESTING_DEPTH."
        )
    if is_null_or_none(color_space):
        return "", False
    color_space_str: str = ""
    if isinstance(color_space, str):
        color_space_str = color_space
    elif not isinstance(color_space, list):
        raise PdfReadError(
            "Cannot interpret color space", color_space
        )  # pragma: no cover
    elif not color_space:
        return "", False
    elif color_space[0].startswith("/Cal"):  # /CalRGB or /CalGray
        color_space_str = "/Device" + color_space[0][4:]
    elif color_space[0] == "/ICCBased":
        icc_profile = color_space[1].get_object()
        color_components = cast(int, icc_profile["/N"])
        color_space_str = icc_profile.get("/Alternate", "")
    elif color_space[0] == "/Indexed":
        color_space_str = color_space[1].get_object()
        mode, invert_color = _get_image_mode(
            color_space_str, color_components, prev_mode, depth + 1
        )
        if mode in ("RGB", "CMYK"):
            mode = "P"
        return mode, invert_color
    elif color_space[0] == "/Separation":
        color_space_str = color_space[2].get_object()
        mode, invert_color = _get_image_mode(
            color_space_str, color_components, prev_mode, depth + 1
        )
        return mode, True
    elif color_space[0] == "/DeviceN":
        original_color_space = color_space
        color_components = len(color_space[1])
        color_space_str = color_space[2].get_object()
        if color_space_str == "/DeviceCMYK" and color_components == 1:
            if original_color_space[1][0] != "/Black":
                logger_warning(
                    f"Color {original_color_space[1][0]} converted to Gray. Please share PDF with pypdf dev team",
                    __name__,
                )
            return "L", True
        mode, invert_color = _get_image_mode(
            color_space_str, color_components, prev_mode, depth + 1
        )
        return mode, invert_color

    mode_map: dict[str, mode_str_type] = {
        "1bit": "1",  # must be zeroth position: color_components may index the values
        "/DeviceGray": "L",  # must be first position: color_components may index the values
        "palette": "P",  # must be second position: color_components may index the values
        "/DeviceRGB": "RGB",  # must be third position: color_components may index the values
        "/DeviceCMYK": "CMYK",  # must be fourth position: color_components may index the values
        "2bit": "2bits",
        "4bit": "4bits",
    }

    mode = (
        mode_map.get(color_space_str)
        or list(mode_map.values())[color_components]
        or prev_mode
    )

    return mode, mode == "CMYK"


def bits2byte(data: bytes, size: tuple[int, int], bits: int) -> bytes:
    mask = (1 << bits) - 1
    byte_buffer = bytearray(size[0] * size[1])
    data_index = 0
    bit = 8 - bits
    for y in range(size[1]):
        if bit != 8 - bits:
            data_index += 1
            bit = 8 - bits
        for x in range(size[0]):
            byte_buffer[x + y * size[0]] = (data[data_index] >> bit) & mask
            bit -= bits
            if bit < 0:
                data_index += 1
                bit = 8 - bits
    return bytes(byte_buffer)


def _extended_image_from_bytes(
    mode: str, size: tuple[int, int], data: bytes
) -> Image.Image:
    try:
        img = Image.frombytes(mode, size, data)
    except ValueError as exc:
        nb_pix = size[0] * size[1]
        data_length = len(data)
        if data_length == 0:
            raise EmptyImageDataError(
                "Data is 0 bytes, cannot process an image from empty data."
            ) from exc
        if data_length % nb_pix != 0:
            raise exc
        k = nb_pix * len(mode) / data_length
        data = b"".join(bytes((x,) * int(k)) for x in data)
        img = Image.frombytes(mode, size, data)
    return img


def __handle_flate__indexed(color_space: ArrayObject) -> tuple[Any, Any, Any, Any]:
    count = len(color_space)
    if count == 4:
        color_space, base, hival, lookup = (value.get_object() for value in color_space)
        return color_space, base, hival, lookup

    # Deal with strange AutoDesk files where `base` and `hival` look like this:
    #   /DeviceRGB\x00255
    element1 = color_space[1]
    element1 = element1 if isinstance(element1, str) else element1.get_object()
    if count == 3 and "\x00" in element1:
        color_space, lookup = color_space[0].get_object(), color_space[2].get_object()
        base, hival = element1.split("\x00")
        hival = int(hival)
        return color_space, base, hival, lookup
    raise PdfReadError(f"Expected color space with 4 values, got {count}: {color_space}")


def _handle_flate(
    size: tuple[int, int],
    data: bytes,
    mode: mode_str_type,
    color_space: str,
    colors: int,
    obj_as_text: str,
) -> tuple[Image.Image, str, str, bool]:
    """
    Process image encoded in flateEncode
    Returns img, image_format, extension, color inversion
    """
    extension = ".png"  # mime_type: "image/png"
    image_format = "PNG"
    lookup: Any
    base: Any
    hival: Any
    if isinstance(color_space, ArrayObject) and color_space[0] == "/Indexed":
        color_space, base, hival, lookup = __handle_flate__indexed(color_space)
    if mode == "2bits":
        mode = "P"
        data = bits2byte(data, size, 2)
    elif mode == "4bits":
        mode = "P"
        data = bits2byte(data, size, 4)
    img = _extended_image_from_bytes(mode, size, data)
    if color_space == "/Indexed":
        if isinstance(lookup, (EncodedStreamObject, DecodedStreamObject)):
            lookup = lookup.get_data()
        if isinstance(lookup, TextStringObject):
            lookup = lookup.original_bytes
        if isinstance(lookup, str):
            lookup = lookup.encode()
        try:
            nb, conv, mode = {  # type: ignore
                "1": (0, "", ""),
                "L": (1, "P", "L"),
                "P": (0, "", ""),
                "RGB": (3, "P", "RGB"),
                "CMYK": (4, "P", "CMYK"),
            }[_get_image_mode(base, 0, "")[0]]
        except KeyError:  # pragma: no cover
            logger_warning(
                f"Base {base} not coded please share the pdf file with pypdf dev team",
                __name__,
            )
            lookup = None
        else:
            if img.mode == "1":
                # Two values ("high" and "low").
                expected_count = 2 * nb
                actual_count = len(lookup)
                if actual_count != expected_count:
                    if actual_count < expected_count:
                        logger_warning(
                            f"Not enough lookup values: Expected {expected_count}, got {actual_count}.",
                            __name__
                        )
                        lookup += bytes([0] * (expected_count - actual_count))
                    elif not check_if_whitespace_only(lookup[expected_count:]):
                        logger_warning(
                            f"Too many lookup values: Expected {expected_count}, got {actual_count}.",
                            __name__
                        )
                    lookup = lookup[:expected_count]
                colors_arr = [lookup[:nb], lookup[nb:]]
                arr = b"".join(
                    b"".join(
                        colors_arr[1 if img.getpixel((x, y)) > 127 else 0]
                        for x in range(img.size[0])
                    )
                    for y in range(img.size[1])
                )
                img = Image.frombytes(mode, img.size, arr)
            else:
                img = img.convert(conv)
                if len(lookup) != (hival + 1) * nb:
                    logger_warning(f"Invalid Lookup Table in {obj_as_text}", __name__)
                    lookup = None
                elif mode == "L":
                    # gray lookup does not work: it is converted to a similar RGB lookup
                    lookup = b"".join([bytes([b, b, b]) for b in lookup])
                    mode = "RGB"
                # TODO: https://github.com/py-pdf/pypdf/pull/2039
                # this is a work around until PIL is able to process CMYK images
                elif mode == "CMYK":
                    _rgb = []
                    for _c, _m, _y, _k in (
                        lookup[n : n + 4] for n in range(0, 4 * (len(lookup) // 4), 4)
                    ):
                        _r = int(255 * (1 - _c / 255) * (1 - _k / 255))
                        _g = int(255 * (1 - _m / 255) * (1 - _k / 255))
                        _b = int(255 * (1 - _y / 255) * (1 - _k / 255))
                        _rgb.append(bytes((_r, _g, _b)))
                    lookup = b"".join(_rgb)
                    mode = "RGB"
                if lookup is not None:
                    img.putpalette(lookup, rawmode=mode)
            img = img.convert("L" if base == ColorSpaces.DEVICE_GRAY else "RGB")
    elif not isinstance(color_space, NullObject) and color_space[0] == "/ICCBased":
        # Table 65 - Additional Entries Specific to an ICC Profile Stream Dictionary
        mode2 = _get_image_mode(color_space, colors, mode)[0]
        if mode != mode2:
            img = Image.frombytes(mode2, size, data)  # reloaded as mode may have changed
    if mode == "CMYK":
        extension = ".tif"
        image_format = "TIFF"
    return img, image_format, extension, False


def _handle_jpx(
    size: tuple[int, int],
    data: bytes,
    mode: mode_str_type,
    color_space: str,
    colors: int,
) -> tuple[Image.Image, str, str, bool]:
    """
    Process image encoded in flateEncode
    Returns img, image_format, extension, inversion
    """
    extension = ".jp2"  # mime_type: "image/x-jp2"
    img1 = Image.open(BytesIO(data), formats=("JPEG2000",))
    mode, invert_color = _get_image_mode(color_space, colors, mode)
    if mode == "":
        mode = cast(mode_str_type, img1.mode)
        invert_color = mode in ("CMYK",)
    if img1.mode == "RGBA" and mode == "RGB":
        mode = "RGBA"
    # we need to convert to the good mode
    if img1.mode == mode or {img1.mode, mode} == {"L", "P"}:  # compare (unordered) sets
        # L and P are indexed modes which should not be changed.
        img = img1
    elif {img1.mode, mode} == {"RGBA", "CMYK"}:
        # RGBA / CMYK are 4bytes encoding where
        # the encoding should be corrected
        img = Image.frombytes(mode, img1.size, img1.tobytes())
    else:  # pragma: no cover
        img = img1.convert(mode)
    # CMYK conversion
    # https://stcom/questions/38855022/conversion-from-cmyk-to-rgb-with-pillow-is-different-from-that-of-photoshop
    # not implemented for the moment as I need to get properly the ICC
    if img.mode == "CMYK":
        img = img.convert("RGB")
    image_format = "JPEG2000"
    return img, image_format, extension, invert_color


def _apply_decode(
    img: Image.Image,
    x_object_obj: dict[str, Any],
    lfilters: FT,
    color_space: Union[str, list[Any], Any],
    invert_color: bool,
) -> Image.Image:
    # CMYK image and other color spaces without decode
    # requires reverting scale (cf p243,2§ last sentence)
    decode = x_object_obj.get(
        IA.DECODE,
        ([1.0, 0.0] * len(img.getbands()))
        if (
            (img.mode == "CMYK" and lfilters in (FT.DCT_DECODE, FT.JPX_DECODE))
            or (invert_color and img.mode == "L")
        )
        else None,
    )
    if (
        isinstance(color_space, ArrayObject)
        and color_space[0].get_object() == "/Indexed"
    ):
        decode = None  # decode is meaningless if Indexed
    if (
        isinstance(color_space, ArrayObject)
        and color_space[0].get_object() == "/Separation"
    ):
        decode = [1.0, 0.0] * len(img.getbands())
    if decode is not None and not all(decode[i] == i % 2 for i in range(len(decode))):
        lut: list[int] = []
        for i in range(0, len(decode), 2):
            dmin = decode[i]
            dmax = decode[i + 1]
            lut.extend(
                round(255.0 * (j / 255.0 * (dmax - dmin) + dmin)) for j in range(256)
            )
        img = img.point(lut)
    return img


def _get_mode_and_invert_color(
    x_object_obj: dict[str, Any], colors: int, color_space: Union[str, list[Any], Any]
) -> tuple[mode_str_type, bool]:
    if (
        IA.COLOR_SPACE in x_object_obj
        and x_object_obj[IA.COLOR_SPACE] == ColorSpaces.DEVICE_RGB
    ):
        # https://pillow.readthedocs.io/en/stable/handbook/concepts.html#modes
        mode: mode_str_type = "RGB"
    if x_object_obj.get("/BitsPerComponent", 8) < 8:
        mode, invert_color = _get_image_mode(
            f"{x_object_obj.get('/BitsPerComponent', 8)}bit", 0, ""
        )
    else:
        mode, invert_color = _get_image_mode(
            color_space,
            2
            if (
                colors == 1
                and (
                    not is_null_or_none(color_space)
                    and "Gray" not in color_space
                )
            )
            else colors,
            "",
        )
    return mode, invert_color


def _xobj_to_image(
        x_object: dict[str, Any],
        pillow_parameters: Union[dict[str, Any], None] = None
) -> tuple[Optional[str], bytes, Any]:
    """
    Users need to have the pillow package installed.

    It's unclear if pypdf will keep this function here, hence it's private.
    It might get removed at any point.

    Args:
        x_object:
        pillow_parameters: parameters provided to Pillow Image.save() method,
            cf. <https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.Image.save>

    Returns:
        Tuple[file extension, bytes, PIL.Image.Image]

    """
    def _apply_alpha(
        img: Image.Image,
        x_object: dict[str, Any],
        obj_as_text: str,
        image_format: str,
        extension: str,
    ) -> tuple[Image.Image, str, str]:
        alpha = None
        if IA.S_MASK in x_object:  # add alpha channel
            alpha = _xobj_to_image(x_object[IA.S_MASK])[2]
            if img.size != alpha.size:
                logger_warning(
                    f"image and mask size not matching: {obj_as_text}", __name__
                )
            else:
                # TODO: implement mask
                if alpha.mode != "L":
                    alpha = alpha.convert("L")
                if img.mode == "P":
                    img = img.convert("RGB")
                elif img.mode == "1":
                    img = img.convert("L")
                img.putalpha(alpha)
            if "JPEG" in image_format:
                image_format = "JPEG2000"
                extension = ".jp2"
            else:
                image_format = "PNG"
                extension = ".png"
        return img, extension, image_format

    # For error reporting
    obj_as_text = (
        x_object.indirect_reference.__repr__()
        if x_object is None  # pragma: no cover
        else x_object.__repr__()
    )

    # Get size and data
    size = (cast(int, x_object[IA.WIDTH]), cast(int, x_object[IA.HEIGHT]))
    data = x_object.get_data()  # type: ignore
    if isinstance(data, str):  # pragma: no cover
        data = data.encode()
    if len(data) % (size[0] * size[1]) == 1 and data[-1] == 0x0A:  # ie. '\n'
        data = data[:-1]

    # Get color properties
    colors = x_object.get("/Colors", 1)
    color_space: Any = x_object.get("/ColorSpace", NullObject()).get_object()
    if isinstance(color_space, list) and len(color_space) == 1:
        color_space = color_space[0].get_object()

    mode, invert_color = _get_mode_and_invert_color(x_object, colors, color_space)

    # Get filters
    filters = x_object.get(StreamAttributes.FILTER, NullObject()).get_object()
    lfilters = filters[-1] if isinstance(filters, list) else filters
    decode_parms = x_object.get(StreamAttributes.DECODE_PARMS, None)
    if decode_parms and isinstance(decode_parms, (tuple, list)):
        decode_parms = decode_parms[0]
    else:
        decode_parms = {}
    if not isinstance(decode_parms, dict):
        decode_parms = {}

    extension = None
    if lfilters in (FT.FLATE_DECODE, FT.RUN_LENGTH_DECODE):
        img, image_format, extension, _ = _handle_flate(
            size,
            data,
            mode,
            color_space,
            colors,
            obj_as_text,
        )
    elif lfilters in (FT.LZW_DECODE, FT.ASCII_85_DECODE):
        # I'm not sure if the following logic is correct.
        # There might not be any relationship between the filters and the
        # extension
        if lfilters == FT.LZW_DECODE:
            image_format = "TIFF"
            extension = ".tiff"  # mime_type = "image/tiff"
        else:
            image_format = "PNG"
            extension = ".png"  # mime_type = "image/png"
        try:
            img = Image.open(BytesIO(data), formats=("TIFF", "PNG"))
        except UnidentifiedImageError:
            img = _extended_image_from_bytes(mode, size, data)
    elif lfilters == FT.DCT_DECODE:
        img, image_format, extension = Image.open(BytesIO(data)), "JPEG", ".jpg"
        # invert_color kept unchanged
    elif lfilters == FT.JPX_DECODE:
        img, image_format, extension, invert_color = _handle_jpx(
            size, data, mode, color_space, colors
        )
    elif lfilters == FT.CCITT_FAX_DECODE:
        img, image_format, extension, invert_color = (
            Image.open(BytesIO(data), formats=("TIFF",)),
            "TIFF",
            ".tiff",
            False,
        )
    elif lfilters == FT.JBIG2_DECODE:
        img, image_format, extension, invert_color = (
            Image.open(BytesIO(data), formats=("PNG", "PPM")),
            "PNG",
            ".png",
            False,
        )
    elif mode == "CMYK":
        img, image_format, extension, invert_color = (
            _extended_image_from_bytes(mode, size, data),
            "TIFF",
            ".tif",
            False,
        )
    elif mode == "":
        raise PdfReadError(f"ColorSpace field not found in {x_object}")
    else:
        img, image_format, extension, invert_color = (
            _extended_image_from_bytes(mode, size, data),
            "PNG",
            ".png",
            False,
        )

    img = _apply_decode(img, x_object, lfilters, color_space, invert_color)
    img, extension, image_format = _apply_alpha(
        img, x_object, obj_as_text, image_format, extension
    )

    if pillow_parameters is None:
        pillow_parameters = {}
    # Preserve JPEG image quality - see issue #3515.
    if image_format == "JPEG":
        # This prevents: Cannot use 'keep' when original image is not a JPEG:
        # "JPEG" is the value of PIL.JpegImagePlugin.JpegImageFile.format
        img.format = "JPEG"  # type: ignore[misc]
        if "quality" not in pillow_parameters:
            pillow_parameters["quality"] = "keep"

    # Save image to bytes
    img_byte_arr = BytesIO()
    try:
        img.save(img_byte_arr, format=image_format, **pillow_parameters)
    except OSError:  # pragma: no cover  # covered with pillow 10.3
        # in case of we convert to RGBA and then to PNG
        img1 = img.convert("RGBA")
        image_format = "PNG"
        extension = ".png"
        img_byte_arr = BytesIO()
        img1.save(img_byte_arr, format=image_format)
    data = img_byte_arr.getvalue()

    try:  # temporary try/except until other fixes of images
        img = Image.open(BytesIO(data))
    except Exception as exception:
        logger_warning(f"Failed loading image: {exception}", __name__)
        img = None  # type: ignore
    return extension, data, img
Add __pycache__ and .venv directories 2026-01-09 09:48:03 +08:00			`"""Functions to convert an image XObject to an image"""`

			`import sys`
			`from io import BytesIO`
			`from typing import Any, Literal, Optional, Union, cast`

			`from ._utils import check_if_whitespace_only, logger_warning`
			`from .constants import ColorSpaces, StreamAttributes`
			`from .constants import FilterTypes as FT`
			`from .constants import ImageAttributes as IA`
			`from .errors import EmptyImageDataError, PdfReadError`
			`from .generic import (`
			`ArrayObject,`
			`DecodedStreamObject,`
			`EncodedStreamObject,`
			`NullObject,`
			`TextStringObject,`
			`is_null_or_none,`
			`)`

			`if sys.version_info[:2] >= (3, 10):`
			`from typing import TypeAlias`
			`else:`
			`from typing_extensions import TypeAlias`


			`try:`
			`from PIL import Image, UnidentifiedImageError`
			`except ImportError:`
			`raise ImportError(`
			`"pillow is required to do image extraction. "`
			`"It can be installed via 'pip install pypdf[image]'"`
			`)`

			`mode_str_type: TypeAlias = Literal[`
			`"", "1", "RGB", "2bits", "4bits", "P", "L", "RGBA", "CMYK"`
			`]`

			`MAX_IMAGE_MODE_NESTING_DEPTH: int = 10`


			`def _get_image_mode(`
			`color_space: Union[str, list[Any], Any],`
			`color_components: int,`
			`prev_mode: mode_str_type,`
			`depth: int = 0,`
			`) -> tuple[mode_str_type, bool]:`
			`"""`
			`Returns:`
			`Image mode, not taking into account mask (transparency).`
			`ColorInversion is required (like for some DeviceCMYK).`

			`"""`
			`if depth > MAX_IMAGE_MODE_NESTING_DEPTH:`
			`raise PdfReadError(`
			`"Color spaces nested too deeply. If required, consider increasing MAX_IMAGE_MODE_NESTING_DEPTH."`
			`)`
			`if is_null_or_none(color_space):`
			`return "", False`
			`color_space_str: str = ""`
			`if isinstance(color_space, str):`
			`color_space_str = color_space`
			`elif not isinstance(color_space, list):`
			`raise PdfReadError(`
			`"Cannot interpret color space", color_space`
			`) # pragma: no cover`
			`elif not color_space:`
			`return "", False`
			`elif color_space[0].startswith("/Cal"): # /CalRGB or /CalGray`
			`color_space_str = "/Device" + color_space[0][4:]`
			`elif color_space[0] == "/ICCBased":`
			`icc_profile = color_space[1].get_object()`
			`color_components = cast(int, icc_profile["/N"])`
			`color_space_str = icc_profile.get("/Alternate", "")`
			`elif color_space[0] == "/Indexed":`
			`color_space_str = color_space[1].get_object()`
			`mode, invert_color = _get_image_mode(`
			`color_space_str, color_components, prev_mode, depth + 1`
			`)`
			`if mode in ("RGB", "CMYK"):`
			`mode = "P"`
			`return mode, invert_color`
			`elif color_space[0] == "/Separation":`
			`color_space_str = color_space[2].get_object()`
			`mode, invert_color = _get_image_mode(`
			`color_space_str, color_components, prev_mode, depth + 1`
			`)`
			`return mode, True`
			`elif color_space[0] == "/DeviceN":`
			`original_color_space = color_space`
			`color_components = len(color_space[1])`
			`color_space_str = color_space[2].get_object()`
			`if color_space_str == "/DeviceCMYK" and color_components == 1:`
			`if original_color_space[1][0] != "/Black":`
			`logger_warning(`
			`f"Color {original_color_space[1][0]} converted to Gray. Please share PDF with pypdf dev team",`
			`__name__,`
			`)`
			`return "L", True`
			`mode, invert_color = _get_image_mode(`
			`color_space_str, color_components, prev_mode, depth + 1`
			`)`
			`return mode, invert_color`

			`mode_map: dict[str, mode_str_type] = {`
			`"1bit": "1", # must be zeroth position: color_components may index the values`
			`"/DeviceGray": "L", # must be first position: color_components may index the values`
			`"palette": "P", # must be second position: color_components may index the values`
			`"/DeviceRGB": "RGB", # must be third position: color_components may index the values`
			`"/DeviceCMYK": "CMYK", # must be fourth position: color_components may index the values`
			`"2bit": "2bits",`
			`"4bit": "4bits",`
			`}`

			`mode = (`
			`mode_map.get(color_space_str)`
			`or list(mode_map.values())[color_components]`
			`or prev_mode`
			`)`

			`return mode, mode == "CMYK"`


			`def bits2byte(data: bytes, size: tuple[int, int], bits: int) -> bytes:`
			`mask = (1 << bits) - 1`
			`byte_buffer = bytearray(size[0] * size[1])`
			`data_index = 0`
			`bit = 8 - bits`
			`for y in range(size[1]):`
			`if bit != 8 - bits:`
			`data_index += 1`
			`bit = 8 - bits`
			`for x in range(size[0]):`
			`byte_buffer[x + y * size[0]] = (data[data_index] >> bit) & mask`
			`bit -= bits`
			`if bit < 0:`
			`data_index += 1`
			`bit = 8 - bits`
			`return bytes(byte_buffer)`


			`def _extended_image_from_bytes(`
			`mode: str, size: tuple[int, int], data: bytes`
			`) -> Image.Image:`
			`try:`
			`img = Image.frombytes(mode, size, data)`
			`except ValueError as exc:`
			`nb_pix = size[0] * size[1]`
			`data_length = len(data)`
			`if data_length == 0:`
			`raise EmptyImageDataError(`
			`"Data is 0 bytes, cannot process an image from empty data."`
			`) from exc`
			`if data_length % nb_pix != 0:`
			`raise exc`
			`k = nb_pix * len(mode) / data_length`
			`data = b"".join(bytes((x,) * int(k)) for x in data)`
			`img = Image.frombytes(mode, size, data)`
			`return img`


			`def __handle_flate__indexed(color_space: ArrayObject) -> tuple[Any, Any, Any, Any]:`
			`count = len(color_space)`
			`if count == 4:`
			`color_space, base, hival, lookup = (value.get_object() for value in color_space)`
			`return color_space, base, hival, lookup`

			# Deal with strange AutoDesk files where `base` and `hival` look like this:
			`# /DeviceRGB\x00255`
			`element1 = color_space[1]`
			`element1 = element1 if isinstance(element1, str) else element1.get_object()`
			`if count == 3 and "\x00" in element1:`
			`color_space, lookup = color_space[0].get_object(), color_space[2].get_object()`
			`base, hival = element1.split("\x00")`
			`hival = int(hival)`
			`return color_space, base, hival, lookup`
			`raise PdfReadError(f"Expected color space with 4 values, got {count}: {color_space}")`


			`def _handle_flate(`
			`size: tuple[int, int],`
			`data: bytes,`
			`mode: mode_str_type,`
			`color_space: str,`
			`colors: int,`
			`obj_as_text: str,`
			`) -> tuple[Image.Image, str, str, bool]:`
			`"""`
			`Process image encoded in flateEncode`
			`Returns img, image_format, extension, color inversion`
			`"""`
			`extension = ".png" # mime_type: "image/png"`
			`image_format = "PNG"`
			`lookup: Any`
			`base: Any`
			`hival: Any`
			`if isinstance(color_space, ArrayObject) and color_space[0] == "/Indexed":`
			`color_space, base, hival, lookup = __handle_flate__indexed(color_space)`
			`if mode == "2bits":`
			`mode = "P"`
			`data = bits2byte(data, size, 2)`
			`elif mode == "4bits":`
			`mode = "P"`
			`data = bits2byte(data, size, 4)`
			`img = _extended_image_from_bytes(mode, size, data)`
			`if color_space == "/Indexed":`
			`if isinstance(lookup, (EncodedStreamObject, DecodedStreamObject)):`
			`lookup = lookup.get_data()`
			`if isinstance(lookup, TextStringObject):`
			`lookup = lookup.original_bytes`
			`if isinstance(lookup, str):`
			`lookup = lookup.encode()`
			`try:`
			`nb, conv, mode = { # type: ignore`
			`"1": (0, "", ""),`
			`"L": (1, "P", "L"),`
			`"P": (0, "", ""),`
			`"RGB": (3, "P", "RGB"),`
			`"CMYK": (4, "P", "CMYK"),`
			`}[_get_image_mode(base, 0, "")[0]]`
			`except KeyError: # pragma: no cover`
			`logger_warning(`
			`f"Base {base} not coded please share the pdf file with pypdf dev team",`
			`__name__,`
			`)`
			`lookup = None`
			`else:`
			`if img.mode == "1":`
			`# Two values ("high" and "low").`
			`expected_count = 2 * nb`
			`actual_count = len(lookup)`
			`if actual_count != expected_count:`
			`if actual_count < expected_count:`
			`logger_warning(`
			`f"Not enough lookup values: Expected {expected_count}, got {actual_count}.",`
			`__name__`
			`)`
			`lookup += bytes([0] * (expected_count - actual_count))`
			`elif not check_if_whitespace_only(lookup[expected_count:]):`
			`logger_warning(`
			`f"Too many lookup values: Expected {expected_count}, got {actual_count}.",`
			`__name__`
			`)`
			`lookup = lookup[:expected_count]`
			`colors_arr = [lookup[:nb], lookup[nb:]]`
			`arr = b"".join(`
			`b"".join(`
			`colors_arr[1 if img.getpixel((x, y)) > 127 else 0]`
			`for x in range(img.size[0])`
			`)`
			`for y in range(img.size[1])`
			`)`
			`img = Image.frombytes(mode, img.size, arr)`
			`else:`
			`img = img.convert(conv)`
			`if len(lookup) != (hival + 1) * nb:`
			`logger_warning(f"Invalid Lookup Table in {obj_as_text}", __name__)`
			`lookup = None`
			`elif mode == "L":`
			`# gray lookup does not work: it is converted to a similar RGB lookup`
			`lookup = b"".join([bytes([b, b, b]) for b in lookup])`
			`mode = "RGB"`
			`# TODO: https://github.com/py-pdf/pypdf/pull/2039`
			`# this is a work around until PIL is able to process CMYK images`
			`elif mode == "CMYK":`
			`_rgb = []`
			`for _c, _m, _y, _k in (`
			`lookup[n : n + 4] for n in range(0, 4 * (len(lookup) // 4), 4)`
			`):`
			`_r = int(255 * (1 - _c / 255) * (1 - _k / 255))`
			`_g = int(255 * (1 - _m / 255) * (1 - _k / 255))`
			`_b = int(255 * (1 - _y / 255) * (1 - _k / 255))`
			`_rgb.append(bytes((_r, _g, _b)))`
			`lookup = b"".join(_rgb)`
			`mode = "RGB"`
			`if lookup is not None:`
			`img.putpalette(lookup, rawmode=mode)`
			`img = img.convert("L" if base == ColorSpaces.DEVICE_GRAY else "RGB")`
			`elif not isinstance(color_space, NullObject) and color_space[0] == "/ICCBased":`
			`# Table 65 - Additional Entries Specific to an ICC Profile Stream Dictionary`
			`mode2 = _get_image_mode(color_space, colors, mode)[0]`
			`if mode != mode2:`
			`img = Image.frombytes(mode2, size, data) # reloaded as mode may have changed`
			`if mode == "CMYK":`
			`extension = ".tif"`
			`image_format = "TIFF"`
			`return img, image_format, extension, False`


			`def _handle_jpx(`
			`size: tuple[int, int],`
			`data: bytes,`
			`mode: mode_str_type,`
			`color_space: str,`
			`colors: int,`
			`) -> tuple[Image.Image, str, str, bool]:`
			`"""`
			`Process image encoded in flateEncode`
			`Returns img, image_format, extension, inversion`
			`"""`
			`extension = ".jp2" # mime_type: "image/x-jp2"`
			`img1 = Image.open(BytesIO(data), formats=("JPEG2000",))`
			`mode, invert_color = _get_image_mode(color_space, colors, mode)`
			`if mode == "":`
			`mode = cast(mode_str_type, img1.mode)`
			`invert_color = mode in ("CMYK",)`
			`if img1.mode == "RGBA" and mode == "RGB":`
			`mode = "RGBA"`
			`# we need to convert to the good mode`
			`if img1.mode == mode or {img1.mode, mode} == {"L", "P"}: # compare (unordered) sets`
			`# L and P are indexed modes which should not be changed.`
			`img = img1`
			`elif {img1.mode, mode} == {"RGBA", "CMYK"}:`
			`# RGBA / CMYK are 4bytes encoding where`
			`# the encoding should be corrected`
			`img = Image.frombytes(mode, img1.size, img1.tobytes())`
			`else: # pragma: no cover`
			`img = img1.convert(mode)`
			`# CMYK conversion`
			`# https://stcom/questions/38855022/conversion-from-cmyk-to-rgb-with-pillow-is-different-from-that-of-photoshop`
			`# not implemented for the moment as I need to get properly the ICC`
			`if img.mode == "CMYK":`
			`img = img.convert("RGB")`
			`image_format = "JPEG2000"`
			`return img, image_format, extension, invert_color`


			`def _apply_decode(`
			`img: Image.Image,`
			`x_object_obj: dict[str, Any],`
			`lfilters: FT,`
			`color_space: Union[str, list[Any], Any],`
			`invert_color: bool,`
			`) -> Image.Image:`
			`# CMYK image and other color spaces without decode`
			`# requires reverting scale (cf p243,2§ last sentence)`
			`decode = x_object_obj.get(`
			`IA.DECODE,`
			`([1.0, 0.0] * len(img.getbands()))`
			`if (`
			`(img.mode == "CMYK" and lfilters in (FT.DCT_DECODE, FT.JPX_DECODE))`
			`or (invert_color and img.mode == "L")`
			`)`
			`else None,`
			`)`
			`if (`
			`isinstance(color_space, ArrayObject)`
			`and color_space[0].get_object() == "/Indexed"`
			`):`
			`decode = None # decode is meaningless if Indexed`
			`if (`
			`isinstance(color_space, ArrayObject)`
			`and color_space[0].get_object() == "/Separation"`
			`):`
			`decode = [1.0, 0.0] * len(img.getbands())`
			`if decode is not None and not all(decode[i] == i % 2 for i in range(len(decode))):`
			`lut: list[int] = []`
			`for i in range(0, len(decode), 2):`
			`dmin = decode[i]`
			`dmax = decode[i + 1]`
			`lut.extend(`
			`round(255.0 * (j / 255.0 * (dmax - dmin) + dmin)) for j in range(256)`
			`)`
			`img = img.point(lut)`
			`return img`


			`def _get_mode_and_invert_color(`
			`x_object_obj: dict[str, Any], colors: int, color_space: Union[str, list[Any], Any]`
			`) -> tuple[mode_str_type, bool]:`
			`if (`
			`IA.COLOR_SPACE in x_object_obj`
			`and x_object_obj[IA.COLOR_SPACE] == ColorSpaces.DEVICE_RGB`
			`):`
			`# https://pillow.readthedocs.io/en/stable/handbook/concepts.html#modes`
			`mode: mode_str_type = "RGB"`
			`if x_object_obj.get("/BitsPerComponent", 8) < 8:`
			`mode, invert_color = _get_image_mode(`
			`f"{x_object_obj.get('/BitsPerComponent', 8)}bit", 0, ""`
			`)`
			`else:`
			`mode, invert_color = _get_image_mode(`
			`color_space,`
			`2`
			`if (`
			`colors == 1`
			`and (`
			`not is_null_or_none(color_space)`
			`and "Gray" not in color_space`
			`)`
			`)`
			`else colors,`
			`"",`
			`)`
			`return mode, invert_color`


			`def _xobj_to_image(`
			`x_object: dict[str, Any],`
			`pillow_parameters: Union[dict[str, Any], None] = None`
			`) -> tuple[Optional[str], bytes, Any]:`
			`"""`
			`Users need to have the pillow package installed.`

			`It's unclear if pypdf will keep this function here, hence it's private.`
			`It might get removed at any point.`

			`Args:`
			`x_object:`
			`pillow_parameters: parameters provided to Pillow Image.save() method,`
			`cf. <https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.Image.save>`

			`Returns:`
			`Tuple[file extension, bytes, PIL.Image.Image]`

			`"""`
			`def _apply_alpha(`
			`img: Image.Image,`
			`x_object: dict[str, Any],`
			`obj_as_text: str,`
			`image_format: str,`
			`extension: str,`
			`) -> tuple[Image.Image, str, str]:`
			`alpha = None`
			`if IA.S_MASK in x_object: # add alpha channel`
			`alpha = _xobj_to_image(x_object[IA.S_MASK])[2]`
			`if img.size != alpha.size:`
			`logger_warning(`
			`f"image and mask size not matching: {obj_as_text}", __name__`
			`)`
			`else:`
			`# TODO: implement mask`
			`if alpha.mode != "L":`
			`alpha = alpha.convert("L")`
			`if img.mode == "P":`
			`img = img.convert("RGB")`
			`elif img.mode == "1":`
			`img = img.convert("L")`
			`img.putalpha(alpha)`
			`if "JPEG" in image_format:`
			`image_format = "JPEG2000"`
			`extension = ".jp2"`
			`else:`
			`image_format = "PNG"`
			`extension = ".png"`
			`return img, extension, image_format`

			`# For error reporting`
			`obj_as_text = (`
			`x_object.indirect_reference.__repr__()`
			`if x_object is None # pragma: no cover`
			`else x_object.__repr__()`
			`)`

			`# Get size and data`
			`size = (cast(int, x_object[IA.WIDTH]), cast(int, x_object[IA.HEIGHT]))`
			`data = x_object.get_data() # type: ignore`
			`if isinstance(data, str): # pragma: no cover`
			`data = data.encode()`
			`if len(data) % (size[0] * size[1]) == 1 and data[-1] == 0x0A: # ie. '\n'`
			`data = data[:-1]`

			`# Get color properties`
			`colors = x_object.get("/Colors", 1)`
			`color_space: Any = x_object.get("/ColorSpace", NullObject()).get_object()`
			`if isinstance(color_space, list) and len(color_space) == 1:`
			`color_space = color_space[0].get_object()`

			`mode, invert_color = _get_mode_and_invert_color(x_object, colors, color_space)`

			`# Get filters`
			`filters = x_object.get(StreamAttributes.FILTER, NullObject()).get_object()`
			`lfilters = filters[-1] if isinstance(filters, list) else filters`
			`decode_parms = x_object.get(StreamAttributes.DECODE_PARMS, None)`
			`if decode_parms and isinstance(decode_parms, (tuple, list)):`
			`decode_parms = decode_parms[0]`
			`else:`
			`decode_parms = {}`
			`if not isinstance(decode_parms, dict):`
			`decode_parms = {}`

			`extension = None`
			`if lfilters in (FT.FLATE_DECODE, FT.RUN_LENGTH_DECODE):`
			`img, image_format, extension, _ = _handle_flate(`
			`size,`
			`data,`
			`mode,`
			`color_space,`
			`colors,`
			`obj_as_text,`
			`)`
			`elif lfilters in (FT.LZW_DECODE, FT.ASCII_85_DECODE):`
			`# I'm not sure if the following logic is correct.`
			`# There might not be any relationship between the filters and the`
			`# extension`
			`if lfilters == FT.LZW_DECODE:`
			`image_format = "TIFF"`
			`extension = ".tiff" # mime_type = "image/tiff"`
			`else:`
			`image_format = "PNG"`
			`extension = ".png" # mime_type = "image/png"`
			`try:`
			`img = Image.open(BytesIO(data), formats=("TIFF", "PNG"))`
			`except UnidentifiedImageError:`
			`img = _extended_image_from_bytes(mode, size, data)`
			`elif lfilters == FT.DCT_DECODE:`
			`img, image_format, extension = Image.open(BytesIO(data)), "JPEG", ".jpg"`
			`# invert_color kept unchanged`
			`elif lfilters == FT.JPX_DECODE:`
			`img, image_format, extension, invert_color = _handle_jpx(`
			`size, data, mode, color_space, colors`
			`)`
			`elif lfilters == FT.CCITT_FAX_DECODE:`
			`img, image_format, extension, invert_color = (`
			`Image.open(BytesIO(data), formats=("TIFF",)),`
			`"TIFF",`
			`".tiff",`
			`False,`
			`)`
			`elif lfilters == FT.JBIG2_DECODE:`
			`img, image_format, extension, invert_color = (`
			`Image.open(BytesIO(data), formats=("PNG", "PPM")),`
			`"PNG",`
			`".png",`
			`False,`
			`)`
			`elif mode == "CMYK":`
			`img, image_format, extension, invert_color = (`
			`_extended_image_from_bytes(mode, size, data),`
			`"TIFF",`
			`".tif",`
			`False,`
			`)`
			`elif mode == "":`
			`raise PdfReadError(f"ColorSpace field not found in {x_object}")`
			`else:`
			`img, image_format, extension, invert_color = (`
			`_extended_image_from_bytes(mode, size, data),`
			`"PNG",`
			`".png",`
			`False,`
			`)`

			`img = _apply_decode(img, x_object, lfilters, color_space, invert_color)`
			`img, extension, image_format = _apply_alpha(`
			`img, x_object, obj_as_text, image_format, extension`
			`)`

			`if pillow_parameters is None:`
			`pillow_parameters = {}`
			`# Preserve JPEG image quality - see issue #3515.`
			`if image_format == "JPEG":`
			`# This prevents: Cannot use 'keep' when original image is not a JPEG:`
			`# "JPEG" is the value of PIL.JpegImagePlugin.JpegImageFile.format`
			`img.format = "JPEG" # type: ignore[misc]`
			`if "quality" not in pillow_parameters:`
			`pillow_parameters["quality"] = "keep"`

			`# Save image to bytes`
			`img_byte_arr = BytesIO()`
			`try:`
			`img.save(img_byte_arr, format=image_format, **pillow_parameters)`
			`except OSError: # pragma: no cover # covered with pillow 10.3`
			`# in case of we convert to RGBA and then to PNG`
			`img1 = img.convert("RGBA")`
			`image_format = "PNG"`
			`extension = ".png"`
			`img_byte_arr = BytesIO()`
			`img1.save(img_byte_arr, format=image_format)`
			`data = img_byte_arr.getvalue()`

			`try: # temporary try/except until other fixes of images`
			`img = Image.open(BytesIO(data))`
			`except Exception as exception:`
			`logger_warning(f"Failed loading image: {exception}", __name__)`
			`img = None # type: ignore`
			`return extension, data, img`