/usr/lib/python3/dist-packages/pyutilib/excel/excel.py

#  _________________________________________________________________________
#
#  PyUtilib: A Python utility library.
#  Copyright (c) 2008 Sandia Corporation.
#  This software is distributed under the BSD License.
#  Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
#  the U.S. Government retains certain rights in this software.
#  _________________________________________________________________________

#
# See http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/440661
#

""" This modules provides a lightweight API to access Excel data.
    There are many ways to read Excel data, including ODBC. This module
    uses ADODB and has the advantage of only requiring a file name and a
    sheet name (no setup required).
"""

try:
    import win32com.client
except ImportError:
    pass
try:
    unicode
except:
    def unicode(x):
        return x

class ExcelDocument(object):
    """ Represents an opened Excel document.
    """

    def __init__(self, filename):
        self.connection = win32com.client.Dispatch('ADODB.Connection')
        self.connection.Open(
            'PROVIDER=Microsoft.Jet.OLEDB.4.0;'+
            'DATA SOURCE=%s'%filename+
            ';Extended Properties="Excel 8.0;HDR=1;IMEX=1"'
        )

    def sheets(self):
        """ Returns a list of the name of the sheets found in the document.
        """
        result = []
        recordset = self.connection.OpenSchema(20)
        while not recordset.EOF:
            result.append(recordset.Fields[2].Value)
            recordset.MoveNext()
        recordset.Close()
        del recordset
        return result

    def sheet(self, name, encoding=None, order_by=None):
        """ Returns a sheet object by name. Use sheets() to obtain a list of
            valid names. encoding is a character encoding name which is used
            to encode the unicode strings returned by Excel, so that you get
            plain Python strings.
        """
        return _ExcelSheet(self, name, encoding, order_by)

    def __del__(self):
        self.close()

    def close(self):
        """ Closes the Excel document. It is automatically called when
            the object is deleted.
        """
        try:
            self.connection.Close()
            del self.connection
        except:
            pass

def _strip(value):
    """ Strip the input value if it is a string and returns None
        if it had only whitespaces """
    if isinstance(value, basestring):
        value = value.strip()
        if len(value)==0:
            return None
    return value

class _ExcelSheet(object):
    """ Represents an Excel sheet from a document, gives methods to obtain
        column names and iterate on its content.
    """
    def __init__(self, document, name, encoding, order_by):
        self.document = document
        self.name = name
        self.order_by = order_by
        if encoding:
            def encoder(value):
                if isinstance(value, unicode):
                    value = value.strip()
                    if len(value)==0:
                        return None
                    else:
                        return value.encode(encoding)
                elif isinstance(value, str):
                    value = value.strip()
                    if len(value)==0:
                        return None
                    else:
                        return value
                else:
                    return value
            self.encoding = encoder
        else:
            self.encoding = _strip

    def columns(self):
        """ Returns a list of column names for the sheet.
        """
        recordset = win32com.client.Dispatch('ADODB.Recordset')
        recordset.Open(unicode('SELECT * FROM [%s]'%self.name), self.document.connection, 0, 1)
        try:
            return [self.encoding(field.Name) for field in recordset.Fields]
        finally:
            recordset.Close()
            del recordset

    def __iter__(self):
        """ Returns a paged iterator by default. See paged().
        """
        return self.paged()

    def naive(self):
        """ Returns an iterator on the data contained in the sheet. Each row
            is returned as a dictionary with row headers as keys.
        """
        # SLOW algorithm ! A lot of COM calls are performed.
        recordset = win32com.client.Dispatch('ADODB.Recordset')
        if self.order_by:
            recordset.Open(unicode('SELECT * FROM [%s] ORDER BY %s'%(self.name, self.order_by)), self.document.connection, 0, 1)
        else:
            recordset.Open(unicode('SELECT * FROM [%s]'%self.name), self.document.connection, 0, 1)
        try:
            while not recordset.EOF:
                source = {}
                for field in recordset.Fields:
                    source[self.encoding(field.Name)] = self.encoding(field.Value)
                yield source
                recordset.MoveNext()
            recordset.Close()
            del recordset
        except:
            # cannot use "finally" here because Python doesn't want
            # a "yield" statement inside a "try...finally" block.
            recordset.Close()
            del recordset
            raise

    def paged(self, pagesize=128):
        """ Returns an iterator on the data contained in the sheet. Each row
            is returned as a dictionary with row headers as keys. pagesize is
            the size of the buffer of rows ; it is an implementation detail but
            could have an impact on the speed of the iterator. Use pagesize=-1
            to buffer the whole sheet in memory.
        """
        # FAST algorithm ! It is about 10x faster than the naive algorithm
        # thanks to the use of GetRows, which dramatically decreases the number
        # of COM calls.
        recordset = win32com.client.Dispatch('ADODB.Recordset')
        if self.order_by:
            recordset.Open(unicode('SELECT * FROM [%s] ORDER BY %s'%(self.name, self.order_by)), self.document.connection, 0, 1)
        else:
            recordset.Open(unicode('SELECT * FROM [%s]'%self.name), self.document.connection, 0, 1)
        try:
            fields = [self.encoding(field.Name) for field in recordset.Fields]
            ok = True
            while ok:
                # Thanks to Rogier Steehouder for the transposing tip
                rows = zip(*recordset.GetRows(pagesize))

                if recordset.EOF:
                    # close the recordset as soon as possible
                    recordset.Close()
                    recordset = None
                    ok = False

                for row in rows:
                    yield dict(zip(fields, map(self.encoding, row)))
        except:
            if recordset is not None:
                recordset.Close()
                del recordset
            raise
python3-pyutilib 5.3.5-1 / usr / lib / python3 / dist-packages / pyutilib / excel / excel.py