#!/usr/bin/env python
# @(#) $Id: MultipartHandler.py,v 1.7 2006/03/17 22:05:12 awicenec Exp $
#
#  ALMA - Atacama Large Millimiter Array
#  (c) European Southern Observatory, 2002
#  Copyright by ESO (in the framework of the ALMA collaboration),
#  All rights reserved
#
#  This library is free software; you can redistribute it and/or
#  modify it under the terms of the GNU Lesser General Public
#  License as published by the Free Software Foundation; either
#  version 2.1 of the License, or (at your option) any later version.
#
#  This library is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
#  Lesser General Public License for more details.
#
#  You should have received a copy of the GNU Lesser General Public
#  License along with this library; if not, write to the Free Software
#  Foundation, Inc., 59 Temple Place, Suite 330, Boston,
#  MA 02111-1307  USA
#
"""
Parser for multipart/related messages with one part being a VOTable
which is referring to binary attachments.

SYNOPSIS: python -i MultipartHandler.py -f <MultipartMessage> [-r <ResourceName>] [-c <contentID>]

where <MultipartMessage> is the name of an ALMA multipart/related mime message file
and <ResourceName> is an optional parameter to select one Resource according to
the name or ID attribute.

$Date: 2006/03/17 22:05:12 $ $Revision: 1.7 $
"""

import sys, types

import email, re
from email.Parser import Parser
import struct

# the following does not work if there is only
# minidom:
#from xml.dom import implementation 

from xml.dom.minidom import getDOMImplementation
implementation = getDOMImplementation()

# adjust and uncomment if necessary
# sys.path.append('/home/awicenec/VO/VOTable')

import VOTable


def usage():
    """
    This gets catched by the main try loop.
    Just output the contents of __doc__
    """
    sys.exit(__doc__)

def getMimeParts(msg, mime):
    """
    Get the part of the email with the specified mime-type
    
    INPUTS:
        msg, mim-type message object
        mime, string, mime-type to be searched for
        
    RETURNS:
        list of the parts
    """
    parts = []
    for m in msg.walk():
        try:
            mtype = m.get('Content-Type').split(';')[0]
        except Exception, e:
            errMsg = "MultipartHandler.getMimeParts: %s" % str(e)
            raise Exception, errMsg
#        mtype = m.get_content_type()  new in version 2.2.2
        if mtype == mime:
            parts.append(m)
# return full mime part instead of contents
#            parts.append(m.get_payload())

    return parts


def getPartId(msg, contentId):
    """
    Return the part which belongs to the contentId given.
    The contentId can be a regular expression

    INPUTS:
        msg, mime-type message object
        contentId, string, return the part(s) which match the given
            contentId (regexp)
    
    RETURNS:
        list of the matching parts
    """
    rcID = re.compile('^'+contentId+'$')
    parts = []
    for m in msg.walk():
        cID = m.get('Content-ID')
        if type(cID) == type('') and rcID.match(cID[1:-1]):
            parts.append(m)
    if len(parts) == 0:
        errMsg = "Unable to locate part matching cid %s.\n" % contentId
        errMsg += "Check references and IDs in data!!" 
        raise Exception, errMsg
    else:
        return parts

def votypToForm(votyp, arraysize):
    """
    Function provides the mapping between the VO data-type strings and the
    charcters used by the struct function for the conversion. It takes the
    VO-type strings and the arraysize on input and returns a tuple: the format code
    and two flags which are used to control the unpacking of the arrays.
    
    Synopsis: (forms, mult, compl) = votypToForm(votyp, arraysize)
    
    INPUTS:
        votyp:        VOTable type string
        arraysize:    VOTable arraysize specification string of form '[N,M]' where
                      N and M are integer values.
                      
    RETURNS:
        (forms, mult, compl):    tuple with three values, forms is a string, mult
                                 is an integer and compl is either 1 or 2.
    """
    # for multi-dimension arrays, enforce the format [N,M]
    if arraysize.find('x') > -1:
        arraysize = arraysize.replace('x',',')
        arraysize = '['+arraysize+']'
        
    if arraysize.find(',') > -1:
        as = arraysize.split(',')
        N = as[0]
        N = N[1:]
        M = as[1]
        M = M[:len(M)-1]
        arraysize = int(N)*int(M)
    
    arraysize = [long(arraysize)]
    
    mult = 0
    compl = 1
    
    if votyp == 'char' and arraysize[0] == 1:
        form = 'c'
    elif votyp == 'short':
        form = 'h'
    elif votyp == 'int':
        form = 'i'
    elif votyp == 'long':
        form = 'l'
    elif votyp == 'unsigned long':
        form = 'L'
    elif votyp == 'long long':
        form = 'q'
    elif votyp == 'unsigned long long':
        form = 'Q'        
    elif votyp == 'float':
        form = 'f'
    elif votyp == 'double':
        form = 'd'
    elif votyp == 'char' and arraysize[0] > 1:
        form = 's'
    elif votyp == 'floatComplex':
        form = 'f'
        compl = 2
    else:
        return ([-1],-1,-1)
    
    forms = []
    for as in arraysize:
        if as > 1:
            mult += as
            forms.append(str(compl*as)+form)
        elif len(arraysize) == 1:
            mult = 1
            forms.append(str(compl)+form)
    
    return (forms,mult,compl)


def unpackBinary(votyp, arraysize, binstr, endian='<'):
    """
    Function provides the mapping between the format codes
    in the VOTable definition and the struct.unpack function.

    It does the unpacking of a binary-string <binstr> according
    to the description contained in the associated FIELD element.

    In case the format does not match the size of the string -2
    is returned. In case the format found in field.datatype is
    unknown to this function -1 is returned.

    The value of the parameter endian can be '<' for little-endian
    or '>' for big endian. This refers to the endianess of the
    values in the binstr. The output is always native.
    
    INPUTS:
        votyp, string containing the VO compatible type spec
        arraysize, VOTable arraysize specification string of form '[N,M]' where
                      N and M are integer values.
        binstr, string containing the binary data.
        endian,string, ['>','<'] denoting the endianess of the binary data.

    RETRUNS:
        list of decoded values, type is determined by votypToForm
    """
    (forms,mult,compl) = votypToForm(votyp, arraysize)
    if forms[0] == -1:
        errMsg = "ERROR: Unable to decode type: %s" % votyp
        raise Exception, errMsg


# TODO: solve the issue with multidimensional arrays
    expected = 0
    for ff in forms:
        expected += struct.calcsize(ff)


    if expected != len(binstr):
        return (expected,len(binstr))
    else:
        ubin = struct.unpack(endian+forms[0],binstr)
        if mult*compl > 1:
            cbin = []

            if compl == 2:
                for kk in xrange(0,mult*compl,2):
                    cbin.append(complex(ubin[kk],ubin[kk+1]))
            else:
                cbin.append(ubin[:mult*compl])
            bin = cbin
        else:
            bin = ubin
        return bin


def interpretVotable(root, selection='', verbose=0):
    """
    This is the main logic behind a parsed VOTable DOM object (root).
    It first loops on all resource elements and selects the one matching
    the selection regexp. The it loops through all selected resources and
    does the interpretation of the FIELD definitions, including the
    non-standard LINKs.
    
    INPUTS:
        root, VOTABLE DOM object
        selection, string, regexp to select certain RESOURCE elements
        verbose, int, controls the verbosity of the output
    
    RETURNS: tuple len(3) containing matching resources, the 
             LINK FIELD definitions (dictionary) and
             the reference strings as a list.
    """
    cidI = {}
    cids = []
    res = []

    if verbose == 3: print root

    for resource in root.RESOURCE:
        if selection != '':
            pres = re.compile('^'+selection+'$')
            if pres.match(str(resource.name)) or pres.match(str(resource.ID)):
                if pres.match(str(resource.name)): 
                    rId = str(resource.name)
                else:
                    rId = str(resource.ID)
                if verbose: 
                    print "Interpreting resource: %s\n" % rId
                    if verbose == 2: print resource
                res.append(resource)
        else:
            res.append(resource)
            
    for r in res:
#            print r.TABLE[0].name

#
# TODO: expand for multiple tables.
#
        for field in r.TABLE[0].FIELD:
                
#                print field
                
    
    # do it only if the FIELD type == 'reference'
            if field.type == 'reference':
    
    # deal with optional SCALE and ZERO OPTIONS
    
                zero = 0.
                scale = 1.

                for V in field.VALUES:
                    for O in V.OPTION:
                        if O.name == 'ZERO':
                            zero = eval(O.value.replace('e','**'))
                        elif O.name == 'SCALE':
                            scale = eval(O.value.replace('e','**'))
                                
    
    # make sure that there is a LINK element
                try:
                    (prefix,ref)=field.LINK.href.split(':',1)  # take everything after the first ':'
                    cidI.update({ref:[field.datatype, field.arraysize, zero, scale]})
                    cids.append(ref)
                    # if verbose: print ref
                except Exception, e:
                    errormsg = "FIELD type is reference but " + \
                  "no valid LINK element found! %s" % str(e)
                    if verbose:  # ignore this error
                        print errormsg
                        print field
                prefix = ''
                
                
    return (res, cidI, cids)
    
    


def createVotableDOM(votable=''):
    """
    Return a VOTable DOM. The votable string on input may contain
    a complete VOTable document string which will be imported into
    the DOM.
    
    INPUTS:
        votable, string contaning a votable document or nothing
        
    RETURNS:
        VOTable DOM object
    """
    try:
        rootname = votable.__nodeName
        if rootname.upper() != 'VOTABLE':
            raise Exception, "Wrong root element: %s " % rootname
        else:
            doctype=implementation.createDocumentType('VOTABLE','','VOTABLE.dtd')
            newdoc=implementation.createDocument(None,"",doctype)
            voTable = newdoc.importNode(votable,deep=1)
            newdoc.appendChild(voTable)
    except:
        doctype=implementation.createDocumentType('VOTABLE','','VOTABLE.dtd')
        newdoc=implementation.createDocument(None,"VOTABLE",doctype)
        voTable=newdoc.documentElement
    return newdoc



def constructMessage(votable, res, binParts):    
    """
    Construct a new message 

    INPUTS:
        votable, VOTable DOM object.
        res, list of resources to be kept.
        binParts, list of strings containing the binary parts to be attached

    RETURNS:
        newmesg, mime message object
    """
    # all resources to be returned have to be in the res list all additional ones
    # (if any) are deleted.
    del(votable.RESOURCE[len(res):])
    for ii in range(len(res)):
        votable.setResource(ii,res[ii])
    xmlPart = email.Message.Message()
    xmlPart.set_payload(votable.__repr__())
    newmsg = email.Message.Message()
    newmsg.set_type('multipart/related')
    newmsg.attach(xmlPart)
    for b in binParts:
        newmsg.attach(b)

    return newmsg
    
def interpretBinary(part, cidI, endian='<', verbose=0):
    """
    Function takes a message attachment object and returns the decoded binary.

    INPUTS:
        part, mime-message object
        cidI, dictionary as returned by interpretVotable
        endian, string ['<','>'], denotes the endianess of the data
        verbose, int, controls the verbosity of the output
        
    RETURNS:
        tuple of the decoded binary values. Type is dynamic
    """ 
    if type(part) == types.IntType:
        print "ERROR: cid %s not found!" % ref
        sys.exit()
    sbin = part.get_payload()
    ct = part.get('Content-Type')
    try:
        ct = part.get('Content-Type').split(';')[0]
    except Exception, e:
        errMsg = "ERROR: MultipartHandler.interpretBinary: %s" % str(e)
        raise Exception, errMsg
#                ct = part.get_content_type() new in version 2.2.2
    if ct == 'binary/octet-stream':
        if verbose: print 'Decoding binary attachment: ',ref
        zero = cidI[2]
        scale = cidI[3]
        datatype = cidI[0]
        arraysize = cidI[1]
        try:
            bin = unpackBinary(datatype, arraysize, sbin, endian=endian)
            if verbose:
                print bin
        except Exception, e:
            errormsg = "ERROR: Expected size of binary attachment does not match the "+\
                "actual size! (expected, actual) = %s. %s" % (str(bin),str(e))
            raise Exception, errormsg
        # apply scale and zero
        if type(bin) == type([]):
            bin = bin[0]
        sbin = tuple(map(lambda x:x*scale+zero,bin))
    else:
        sbin = tuple(map(lambda x:x*scale+zero,sbin))
    if verbose:
        print sbin
        print
    return sbin

    
def interpretAllBinaries(msg, cids, cidI, verbose=1):
    """
    Function does the interpretation of the binary attachments according to the
    information stored in the cids dictionary.

    INPUTS:
        msg, mime-message object
        cids, list of references (LINK)
        cidI, dictionary containing the FIELD definitions of the LINK elements
        verbose, int, controls the verbosity of the output.

    RETURNS:
        tuple len(2), containing the list of all parts and a list of all decoded
             binary attachments.
    """
    binParts = []
    binarr = []
    for ref in cids:
        part = getPartId(msg, ref)[0]    # there should be only a single one
        binParts.append(part)
        print 
        sbin = interpretBinary(part, cidI[ref])
        binarr.append(sbin)
        if verbose:
            print "Interpreted attachment %s" % ref
            print sbin
            print
            
    return (binParts, binarr)
    


def main(args):
    """
    Interpretation of command line arguments and execution of appropriate
    functions.
    """
    import getopt

    args = sys.argv[1:]
    opts,args = getopt.getopt(args,"f:r:c:v:",\
               ["file","resource","cid","verbosity"])
    
    if len(opts) == 0:
        usage()

    mailFile = ''
    ResourceName = ''
    cid = ''
    verbose = 1

    for o,v in opts:
        if o in ['-f','--file']:
            mailFile = v
        if o in ['-r','--resource']:
            ResourceName = v
        elif o in ['-c','--cid']:    # exclusive options
            cid = v
        if o in ['-v','--verbosity']:
            verbose = int(v)

    try:
        f=open(mailFile)
    except:
        errMsg = "Unable to open %s" % mailFile
        raise Exception, errMsg

    eP = Parser()  # instantiate the email Parser
    try:
        msg = eP.parse(f) # and parse the file
        f.close()
    except Exception, e:
        errMsg = "email parsing failed: " + str(e)
        raise Exception, errMsg

    # get the first xml part of the email...
    xmlParts = getMimeParts(msg, 'text/xml')
    if len(xmlParts) == 0:
        errMsg = "No text/xml part found!"
        raise Exception, errMsg
    else:
        xml = xmlParts[0].get_payload()  # There should only be one!
    

    # ...and parse it
    try:
        root = VOTable.parseString(xml)
    except Exception, e:
#        print xml
        print "XML parsing failed! " + str(e)
        raise Exception, errMsg

    (res, cidI, cids) = interpretVotable(root, selection=ResourceName, verbose=verbose)

    # if cid is specified just return that part and bail out
    if len(cid) > 0:
        part = getPartId(msg, cid)
        if part == -1:
            print "Part with cid %s not found!" % cid
        else:
            for p in part:
                pcid = p.get('Content-ID')[1:-1]
                sbin = interpretBinary(p, cidI[pcid])
                print "Interpreting part %s" % pcid
                print sbin
                print
        return part

    try:
        (binParts, binarr) = interpretAllBinaries(msg, cids, cidI)
    except Exception, e:
        errMsg = "Interpretation of binary attachment failed. %s" % str(e)
        raise Exception, errMsg
                    
# If a selection has been requested prepare a new message
# NOTE: The binary parts are available in the binParts list.
# newmsg is the newly construted message
    if ResourceName:
        newmsg = constructMessage(root, res, binParts)
        return newmsg
#        print newmsg.as_string()
    
    

if __name__ == '__main__':
    try:
        newmsg = main(sys.argv)
    except Exception, e:
        print str(e)
    
#    if type(newmsg) == types.ListType:
#        for msg in newmsg:
#            print msg
#    else:
#        print newmsg



# EOF
