#!/usr/bin/env python

# Filename: sge.py 
# Author: Shreyas Cholia, NERSC-LBL, scholia@lbl.gov
# Modified by Lisa Gerhardt, NERSC-LBL, lgerhardt@lbl.gov
# Description: SGE probe for gratia
#
# Usage: sge.py [options]
# options:
#  -h, --help            show this help message and exit
#  -a, --accounting=ACCOUNTING
#                        SGE accounting file. Default:
#                        $SGE_ROOT/default/common/accounting
#  -p, --prev-accounting=PACCOUNTING
#                        previous SGE accounting file. Default:
#                        $SGE_ROOT/$SGE_CELL/common/accounting.previous
#  -m, --gridmap=GRIDMAP
#                        grid-mapfile. Default: /etc/grid-security/grid-mapfile
#  -n, --nogridmap       Turn off grid-mapfile account to DN mapping
#  -d, --debug           print debug output
#  -c, --checkpoint      Only reports records past checkpoint. Default is to
#                        report all records.






import gratia.common.Gratia as Gratia
import os, copy, pwd, sys, string

# This module requires python v2.3
from optparse import OptionParser

info_level = 0
verbose_level = 0
debug_level = 0
noGridmap = False
gridmapFile = ""
debug = False

class SGE:
    sgeRecord = {}
    __sgeFields__ = [ "qname",
                      "hostname",
                      "group",
                      "owner",
                      "job_name",
                      "job_number",
                      "account",
                      "priority",
                      "submission_time",
                      "start_time",
                      "end_time",
                      "failed",
                      "exit_status",
                      "ru_wallclock",
                      "ru_utime",
                      "ru_stime",
                      "ru_maxrss",
                      "ru_ixrss",
                      "ru_ismrss",
                      "ru_idrss",
                      "ru_isrss",
                      "ru_minflt",
                      "ru_majflt",
                      "ru_nswap",
                      "ru_inblock",
                      "ru_oublock",
                      "ru_msgsnd",
                      "ru_msgrcv",
                      "ru_nsignals",
                      "ru_nvcsw",
                      "ru_nivcsw",
                      "project",
                      "department",
                      "granted_pe",
                      "slots",
                      "task_number",
                      "cpu",
                      "mem",
                      "io",
                      "category",
                      "iow",
                      "pe_taskid",
                      "maxvmem" ]
    

    def reverseGridMap(self):
        user = self.sgeRecord['owner']
        try:
            gridmapfile=open(gridmapFile, "r")
        except IOError:
            Gratia.DebugPrint(verbose_level, "Warning: Failed to read " + gridmapFile + ". Continuing without DN")
            return ""

        # read the gridmapfile
        for entry in gridmapfile:
            # strip leading and trailing whitespace
            entry=entry.strip()
            if entry=="" or entry.startswith("#"):
              continue

            # split on whitespace, for local username
            pieces=entry.split()
            localuser=pieces[len(pieces)-1]

            # rest of the entry (before localuser) is dn
            dn=entry[:entry.rfind(localuser)]
	    dn=dn.strip().strip("\"")
            if localuser==user:
                return dn
        # if not found return empty string
        return ""
        
    def __init__(self, valueList):
        self.sgeRecord = dict(zip(self.__sgeFields__, valueList))

                
    
    def createUsageRecord(self):
        r=Gratia.UsageRecord("Batch")
        # do a straight mapping where applicable
        if noGridmap:
            dn=""
        else:
            dn=self.reverseGridMap()

        # Values map to the Usage Record format recommendation
        # http://www.psc.edu/~lfm/PSC/Grid/UR-WG/UR-Spec-gfd.58-ggf18.pdf


        # 3.1 RecordIdentity is set by Gratia.py on record creation 
        # RecordIdentity is a required string which must be unique

        # 3.2 GlobalJobId - optional, string
        # format: SGE:hostname:job_number.index
        # eg. SGE:pc1805.nersc.gov:121443.0
        globalJobId="SGE:" + self.sgeRecord['hostname'] + ":" + self.sgeRecord['job_number'] + "." + self.sgeRecord['task_number']
        r.GlobalJobId(globalJobId)

        # 3.3 LocalJobId - optional, string
        # use SGE job_number field
        r.LocalJobId(self.sgeRecord['job_number'])

        # 3.4  ProcessId - optional, integer
        # TBD

        # 3.5 LocalUserId - optional, string
        # local username 
        r.LocalUserId(self.sgeRecord['owner'])

        # 3.6 GlobalUsername -
        #
        # Get the GlobalUserName from pwent
        #
        # get DN from reverse gridmap lookup to get the 
        # distinguished name from the certificate
        try: 
            pwent=pwd.getpwnam(self.sgeRecord['owner'])
            # TBD: If we need to further qualify this use:
            # globalUserName=pwent.pw_gecos + " (" + pwent.pw_name + ":" + `pwent.pw_uid` + ")"
            globalUserName=pwent.pw_gecos
        except KeyError:
            globalUserName=self.sgeRecord['owner']
        r.GlobalUsername(globalUserName) 

        # dn from reverse gridmap lookup
        if dn != "":
            r.DN(dn)

        # 3.7 JobName  - optional, string
        # Use SGE job_name field
        r.JobName(self.sgeRecord['job_name'])
        
        # 3.8 Charge - optional, integer, site dependent
        # TBD

        # 3.9 Status - optional, integer, exit status
        # Use SGE exit_status
        # TBD - May want to do additional error code handling along with "failed"
        r.Status(self.sgeRecord['exit_status'])

        # 3.10 WallDuration -  "Wall clock time that elpased while the job was running."
        # Use SGE ru_wallclock field
        # Use float or int so that Gratia can format it appropriately
        r.WallDuration(float(self.sgeRecord['ru_wallclock']),"was entered in seconds")

        # 3.11 CpuDuration - "CPU time used, summed over all processes in the job
        # SGE ru_utime is the user CpuDuration
        # SGE ru_stime is the sys CpuDuration
        # Use float or int so that Gratia can format it appropriately
        # fix to correctly calculate CPU times
        # r.CpuDuration(float(self.sgeRecord['ru_utime']),"user","was entered in seconds")
        r.CpuDuration(float(self.sgeRecord['cpu'])-float(self.sgeRecord['ru_stime']),"user","was entered in seconds")
        r.CpuDuration(float(self.sgeRecord['ru_stime']),"sys","was entered in seconds")

        # 3.12  EndTime - "The time at which the job completed"
        # SGE end_time field
        # Use float or int so that Gratia can format it appropriately
        endTime=float(self.sgeRecord['end_time'])
        r.EndTime(endTime,"Was entered in seconds")

        # 3.13 StartTime - The time at which the job started"
        # SGE start_time field
        # Use float or int so that Gratia can format it appropriately
        startTime=float(self.sgeRecord['start_time'])
        r.StartTime(startTime,"Was entered in seconds")

        # 3.14 MachineName - can be host name or the sites name for a cluster
        # SGE hostname field
        # TBD - May want to use a generic cluster name
        # r.MachineName(self.sgeRecord['hostname'])

        # 3.15 Host - hostname on which job was run
        r.Host(self.sgeRecord['hostname'])

        # 3.16 SubmitHost
        # TBD - May want to read contents of $SGE_ROOT/default/common/act_qmaster
        # Optionally - we may want to use Globus GK hostname instead
        # r.SubmitHost(self.sgeRecord['hostname'])

        # 3.17 Queue - string, name of the queue from which job executed
        # SGE qname field
        r.Queue(self.sgeRecord['qname'])

        # 3.18 ProjectName - optional, effective GID (string)
        # SGE project field
        r.ProjectName(self.sgeRecord['project'])


        # 4 Differentiated UsageRecord Properties
        
        # 4.1 Network
        # NA

        # 4.2 Disk
        # NA

        # 4.3 Memory - optional, integer, mem use by all concurrent processe
        # SGE maxvmem field
        r.Memory(float(self.sgeRecord['maxvmem']), "B", description = "maxvmem")

        # 4.4 Swap
        # TBD - we could use ru_nswap - but need to verify if SGE actually sets this
        # What about ru_minflt and ru_majflt

        # 4.5 NodeCount

        # 4.6 Processors
        try:
            num_slots = int(self.sgeRecord['slots'])
            r.Processors(num_slots)
        except:
            Gratia.DebugPrint(verbose_level,
                              "Warning: Unable to parse " \
                              "number of slots to an integer: %s." % self.sgeRecord['slots'])

        # 4.7 TimeDuration
        
        # 4.8 TimeInstant - a discrete time that is relevant to the reported usage time.
        # Type can be 'submit','connect', or 'other'
        # SGE submission_time field
        r.TimeInstant(float(self.sgeRecord['submission_time']),"submit", "was entered in seconds")

        # 4.9 Service Level - identifies the quality of service associated with
        # the resource consumption.For example, service level may represent a
        # priority associated with the usage.
        # SGE field - priority

        r.ServiceLevel(self.sgeRecord['priority'], serviceLevelType="priority", description="SGE Priority")
        
        # 4.10 Extension
        # TBD lookup grid VO related information and add  it to record

	return r	
	

    def printRecord(self):
        Gratia.DebugPrint(debug_level, "=================================")
        for key in self.__sgeFields__:
            Gratia.DebugPrint(debug_level, key +  " : " + self.sgeRecord[key])
        Gratia.DebugPrint(debug_level, "=================================")
                

'''Read in the sge file and increment the checkpoint in the checkpointfile'''
def readsgefile(filename,docheckpoint,checkpoint,checkpointFile):
    # print 'reading file',filename
    linecount = 0
    try:
        file = open(filename, "r")
    except IOError:
        Gratia.DebugPrint(info_level, "IOError: Failed to read accounting file " + filename)
        sys.exit(5);
        
    Gratia.DebugPrint(verbose_level, "Opened file: " + filename)

    for line in file:
        # keep going until we hit the checkpoint
        linecount = linecount + 1
        if docheckpoint:
            if linecount <= checkpoint:
                continue

        if line[0] == '#':
            # ignore comments
            continue
        elif line[-1] != '\n':
            # and break on an incomplete line (EOF)
            Gratia.DebugPrint(info_level, "INFO: incomplete line (no trailing LF)")
            break
        
        # break up line into fields
        sgeList = line.rstrip("\r\n").split(":")
        Gratia.DebugPrint(debug_level, sgeList)

        # create sgeRecord
        rec = SGE(sgeList)
        rec.printRecord()

        # convert sgeRecord into Gratia UsageRecord
        gratiaRec = rec.createUsageRecord()

        if debug:
            xmlRec = copy.deepcopy(gratiaRec)
            xmlRec.XmlCreate()            
            Gratia.DebugPrint(debug_level, string.join(xmlRec.XmlData, " "))

        # send UsageRecord
        Gratia.Send(gratiaRec)

        if docheckpoint:      
            # Write our checkpoint to a file 
            checkpoint = linecount
            # Do some error checking if file io fails
            try:
                CPFILE=open(checkpointFile, "w")
                CPFILE.write(str(checkpoint) + "\n")
                CPFILE.close()
            except IOError:
                Gratia.DebugPrint(info_level, "IOError: Failed to write checkpoint file " + checkpointFile)
    
            


def main():

    sgeRoot = os.getenv("SGE_ROOT", "/sge")
    sgeCell = os.getenv("SGE_CELL", "default") 

    
    gridmapFile = os.getenv("GRIDMAP", "/etc/grid-security/grid-mapfile")


    # parse options

    optParser = OptionParser()

    optParser.add_option("-a", "--accounting",
                         action="store", dest="accounting", type="string",
                         help="SGE accounting file. Default: $SGE_ROOT/$SGE_CELL/common/accounting" )
    optParser.add_option("-p", "--prev-accounting",
                         action="store", dest="paccounting", type="string",
                         help="Previous SGE accounting file. Default: $SGE_ROOT/$SGE_CELL/common/accounting.previous" )
    optParser.add_option("-m", "--gridmap",
                         action="store", dest="gridmap", type="string",
                         default=gridmapFile,
                         help="grid-mapfile. Default: /etc/grid-security/grid-mapfile" )
    optParser.add_option("-d", "--debug",
                         action="store_true", dest="debug",
                         default=False,
                         help="print debug output")
    optParser.add_option("-n", "--nogridmap",
                         action="store_true", dest="noGridmap",
                         default=False,
                         help="turn off grid-mapfile account to DN mapping")
    optParser.add_option("-c", "--checkpoint",
                         action="store_true", dest="checkpoint",
                         default=False,
                         help="Only reports records past checkpoint. Default is to report all records.")    

    
    (opts, args)=optParser.parse_args()

    debug = opts.debug
    noGridmap = opts.noGridmap

    if noGridmap:
        gridmapFile = ""
    else:
        gridmapFile = opts.gridmap


    # Initialize Gratia
    Gratia.Initialize()

    # Set the default accounting file per the ProbeConfig file if attribute exists
    defaultAccountingFile = Gratia.Config.getConfigAttribute("SGEAccountingFile")
#    defaultAccountingFile = ''
    if not defaultAccountingFile or not os.path.exists(defaultAccountingFile):
        defaultAccountingFile = sgeRoot+"/"+sgeCell+"/common/accounting"

    # get the accounting file name

    if opts.accounting:
        accFileName = opts.accounting
    else:
        accFileName = defaultAccountingFile

    prevAccFileName=''
    # Set the default previous accounting file per the ProbeConfig file if attribute exists
    defaultPrevAccountingFile = Gratia.Config.getConfigAttribute("SGEPrevAccountingFile")
    if not os.path.exists(defaultPrevAccountingFile):
        defaultPrevAccountingFile = ""

    # get the previous accounting file name
    if opts.paccounting:
        prevAccFileName = opts.paccounting
        if not os.path.exists(prevAccFileName):
            prevAccFileName = defaultPrevAccountingFile
    else:
        prevAccFileName = defaultPrevAccountingFile

    

    checkpointFile=os.path.join(Gratia.Config.get_WorkingFolder(), "checkpoint")
    
    if debug:
        info_level = 0
        verbose_level = 0
        debug_level = 0
    else:
        info_level = 1
#        verbose_level = 2
#        debug_level = 3
        verbose_level = 1
        debug_level = 1
    
    Gratia.DebugPrint(info_level, "Using " + checkpointFile)
   

    checkpoint=0
    if opts.checkpoint:	
        # Do some error checking if file io fails
        if os.path.isfile(checkpointFile):
            try:
                CPFILE=open(checkpointFile, "r")
                checkpoint=int(CPFILE.readline())
                # print "checkpoint is",checkpoint
                CPFILE.close()
            except IOError:
                Gratia.DebugPrint(info_level, "IOError: Failed to read checkpoint file " + checkpointFile)
                checkpoint=0
        else:
            checkpoint=0
             

    Gratia.DebugPrint(verbose_level, "Using Accounting file: " + accFileName)
    if prevAccFileName:
        Gratia.DebugPrint(verbose_level, "Using previous Accounting file: " + prevAccFileName)
        Gratia.DebugPrint(verbose_level, "Expecting accounting files to rotate at some point.")
    if noGridmap:
        Gratia.DebugPrint(verbose_level, "Not using grid-mapfile")
    else:
        Gratia.DebugPrint(verbose_level, "Using grid-mapfile: " + gridmapFile)



    if opts.checkpoint and prevAccFileName:
        #check if rotation has happened since the last time 
        chkage=os.stat(checkpointFile).st_mtime
        sgeage=os.stat(prevAccFileName).st_mtime
        Gratia.DebugPrint(verbose_level, "Checking file ages: checkpoint file: " + str(chkage) + " previous file " + str(sgeage))
        if chkage<sgeage:
            #accounting file has rotated, need to look at previous file first
	    Gratia.DebugPrint(verbose_level, "Account file has rotated, looking at previous file first. Checkpoint value at " + str(checkpoint))
            readsgefile(prevAccFileName,opts.checkpoint,checkpoint,checkpointFile)
            checkpoint=0
    Gratia.DebugPrint(verbose_level, "Looking at regular file. Checkpoint value at " + str(checkpoint))
    readsgefile(accFileName,opts.checkpoint,checkpoint,checkpointFile)
            

    # Clean things up
    # Do we need to define a clean exit function?

if __name__ == '__main__':
    main()


