The Wayback Machine - https://web.archive.org/web/20250524085040/https://github.com/python/cpython/issues/113895
Skip to content

tarfile: _Stream does not work correctly with a pipe #113895

Closed as not planned
Closed as not planned
@perillo

Description

@perillo

Bug report

Bug description:

The documentation of tarfile._Stream says that "A stream-like object could be for example: sys.stdin, sys.stdout, a socket, a tape device etc.", but when using a pipe it raises tarfile.StreamError: seeking backwards is not allowed when the write end of the pipe is closed.

Not sure if this is a bug in the tarfile implementation or in my code.
The size of the tar file I used is 40M.

#!/usr/bin/env python

import os
import os.path
from pathlib import Path
import sys
import tarfile
from threading import Thread


BUF_SIZE = 8 * 1024


def tar_strip_components(file, n=0):
    for info in file.getmembers():
        components = Path(info.path).parts
        info.path = Path(*components[1:])

        yield info


def tar_unpack_thread(path, size, wr):
    total_size = size
    size = 0
    with open(path, "rb") as f:
        buf = bytearray(BUF_SIZE)
        while True:
            rate = (size / total_size) * 100
            sys.stderr.write(f"{rate:.0f}%\r")
            n = f.readinto(buf)
            if n == 0:
                break

            size += n
            data = buf[:n]
            wr.write(data)

    sys.stderr.write("done\n")
    wr.close()


def tar_unpack(dest, path, size):
    rd, wr = os.pipe()
    rd = os.fdopen(rd, "rb")
    wr = os.fdopen(wr, "wb")
    t = Thread(target=tar_unpack_thread, args=(path, size, wr))
    t.start()

    # TODO: stream is not closed in case TarFile() raises an exception, but we
    # don't care.
    stream = tarfile._Stream(path, "r", "xz", rd, BUF_SIZE)
    with tarfile.TarFile(path, "r", stream) as file:
        members = tar_strip_components(file, 1)
        file.extractall(dest, members)

    rd.close()
    t.join()


def main():
    path = sys.argv[1]
    size = os.stat(path).st_size
    tar_unpack(".", path, size)


if __name__ == "__main__":
    main()
$ ./test.py archive.tar.xz
done
Traceback (most recent call last):
  File "/home/manlio/src/python/pkg/mperillo/zig-installer/test/./test.py", line 67, in <module>
    main()
  File "/home/manlio/src/python/pkg/mperillo/zig-installer/test/./test.py", line 63, in main
    tar_unpack(".", path, size)
  File "/home/manlio/src/python/pkg/mperillo/zig-installer/test/./test.py", line 54, in tar_unpack
    file.extractall(dest, members)
  File "/usr/lib/python3.11/tarfile.py", line 2264, in extractall
    self._extract_one(tarinfo, path, set_attrs=not tarinfo.isdir(),
  File "/usr/lib/python3.11/tarfile.py", line 2327, in _extract_one
    self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
  File "/usr/lib/python3.11/tarfile.py", line 2410, in _extract_member
    self.makefile(tarinfo, targetpath)
  File "/usr/lib/python3.11/tarfile.py", line 2453, in makefile
    source.seek(tarinfo.offset_data)
  File "/usr/lib/python3.11/tarfile.py", line 520, in seek
    raise StreamError("seeking backwards is not allowed")
tarfile.StreamError: seeking backwards is not allowed

If I replace the use of tarfile.TarFile() with

while True:
    buf = stream.read(BUF_SIZE)
    if buf == b"":
        sys.stderr.write("ok\n")
        return

the code works correctly.

Thanks.

CPython versions tested on:

3.11

Operating systems tested on:

Linux

Metadata

Metadata

Assignees

No one assigned

    Labels

    type-bugAn unexpected behavior, bug, or error

    Projects

    Status

    Done

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions