Python: mmap and array

我需要在Python程序中存取一个很大的数组,数组的每一项是(int, int, float, int)的记录。如果直接用list来存放,占据的内存巨大(因为不仅所有这些数都是对象,且tuple本身也是对象)。Python提供了一个array模块,以更有效地存取数字值,但是它只支持单一的数据类型,例如你无法创建这样的array对象:a = array.array('2lfl')。


class MMArray:
    __file = __mem = None
    __realsize = __capsize = 0

    def __init__(self, type='B', fname=None, capsize=1024*1024):
        self.__elmsize = struct.calcsize(type)

        if not fname:
            fno, self.__fname = tempfile.mkstemp("-mmarray", "pyslm-")
            self.__file = os.fdopen (fno, "w+")

    def fromfile(self, fname):
        if not os.path.exists(fname):
            raise "The file '%s' does not exist!"

        fsize = os.path.getsize(fname)
        if fsize == 0:
            raise "The size of file '%s' is zero!" % fname

        if self.__mem: self.__mem.close()
        if self.__file: self.__file.close()

        self.__file = open (fname, "r+")
        self.__mem = mmap.mmap(self.__file.fileno(), fsize)
        self.__realsize = self.__capsize = fsize/self.__elmsize

    def tofile(self, fname):
        if fname ==
            raise "Can not dump the array to currently mapping file!"
        tf = open(fname, "w+")
        bsize = self.__realsize * self.__elmsize
        tf.write (self.__mem[:bsize])

    def __enlarge(self, capsize):
        if self.__capsize >= capsize:
        self.__capsize = capsize * self.__capsize - 1)

        if (self.__mem): self.__mem.close()
        self.__mem = mmap.mmap(self.__file.fileno(), self.__file.tell())

    def __del__ (self):
        bsize = self.__realsize * self.__elmsize
        self.__file.truncate (bsize)
        if self.__mem: self.__mem.close()

    def __getitem__(self, idx):
        if idx < 0 or idx >= self.__realsize:
            raise IndexError
        return self.__access(idx)

    def __setitem__(self, idx, buf):
        if idx < 0 or idx >= self.__realsize:
            raise IndexError
        if type(buf) != type("") or len(buf) != self.__elmsize:
            raise "Not a string, or the buffer size is incorrect!"
        self.__access(idx, buf)

    def __access (self, idx, buf=None):
        start = idx * self.__elmsize
        end = start + self.__elmsize
        if not buf: return self.__mem[start:end]
        self.__mem[start:end] = buf

    def size(self):
        return self.__realsize

    def append(self, buf):
        if type(buf) != type("") or len(buf) != self.__elmsize:
            raise "Not a string, or the buffer size is incorrect!"

        if self.__realsize >= self.__capsize:

        self.__access(self.__realsize, buf)
        self.__realsize += 1

    def __iter__(self):
        for i in xrange(0, self.__realsize):
            yield self.__access(i)

    def truncate(self, tsize):
        if self.__realsize >= tsize:
            self.__realsize = tsize


2 thoughts on “Python: mmap and array

Leave a Reply

Your email address will not be published. Required fields are marked *

You may use these HTML tags and attributes: <a href="" title=""> <abbr title=""> <acronym title=""> <b> <blockquote cite=""> <cite> <code> <del datetime=""> <em> <i> <q cite=""> <strike> <strong>

To submit your comment, click the image below where it asks you to...
Clickcha - The One-Click Captcha