#!/usr/bin/env python

from __future__ import print_function
import os, sys, getopt, re 
import datetime
import urllib

STARTUP_DIR=sys.path[0]
sys.path.append(os.path.join(STARTUP_DIR,"../../.."))

from glideinwms.lib import xmlParse
from glideinwms.factory.tools.lib import analyze


def frontend_print(frontend, fe_mode, d, div, to_be_printed, fe_sorting, fe_attr_list, fe_sort_attribute):

    fdiv = 1
    if fe_attr_list[fe_sort_attribute] in ['total_time', 'val', 'used', 'idle', 'waste', 'badput']:
        fdiv = float(div)
    if fe_sort_attribute in [2, 4, 6, 8, 10]:  #['u%','v%','i%','w%','b%']:
        fdiv = float(div)*(float(d['total_time'])/float(div))
    elif fe_attr_list[fe_sort_attribute] in ['eff']:
        fdiv = float(d['waste']+d['used'])

    sorter = int(100.0*float(d[fe_attr_list[fe_sort_attribute]])/fdiv)

    to_be_printed.append((sorter, 
      "%-25s %6s|%6s %3d%%|%6s %3d%%|%6s %3d%%|%6s %3d%%|%6s %3d%%|%5.2f" % 
      (frontend.lstrip("frontend_"), 
      analyze.km(float(d['total_time'])/float(div)), 
      analyze.km(float(d['used'])/float(div)), 
      (float(d['used'])/float(div))/(float(d['total_time'])/float(div))*100.0, 
      analyze.km(float(d['validation'])/float(div)), 
      (float(d['validation'])/float(div))/(float(d['total_time'])/float(div))*100.0, 
      analyze.km(float(d['idle'])/float(div)), 
      (float(d['idle'])/float(div))/(float(d['total_time'])/float(div))*100.0, 
      analyze.km(float(d['waste'])/float(div)), 
      (float(d['waste'])/float(div))/(float(d['total_time'])/float(div))*100.0, 
      analyze.km(float(d['badput'])/float(div)), 
      (float(d['badput'])/float(div))/(float(d['total_time'])/float(div))*100.0, 
      float(d['used'])/float(d['used']+d['waste'])))) 
    return to_be_printed

def blockprint(title,d,p,pd):
    if pd!=0:
       s = " - %.1f%% of total" % ((float(d['glideins'])/float(pd['glideins'])*100.0))
       t = ""
    else: 
        s=""
        t="Total "
    if d['jobs']>0:
        avg_len=float(d['used'])/d['jobs']
    else:
        avg_len=float('NaN')

    print(
"""
%sGlideins: %s%s
%sJobs: %d (Avg jobs/glidein: %.2f, avg job len: %.2fh) 

%stime:            %s
%stime used:       %s
%stime validating: %s
%stime idle:       %s
%stime wasted:     %s
%sbadput:          %s
Time used/time wasted: %.1f
Time efficiency: %.2f  Goodput fraction: %.2f
"""
      % ( 
        t,d['glideins'],s, 
        t,d['jobs'], 
        float(d['jobs'])/float(d['glideins']),
        avg_len/3600.0,
        t,analyze.printline(d['total_time'],1,p),
        t,analyze.printline(d['used'],d['total_time'],p),
        t,analyze.printline(d['validation'],d['total_time'],p),
        t,analyze.printline(d['idle'],d['total_time'],p),
        t,analyze.printline(d['waste'],d['total_time'],p),
        t,analyze.printline(d['badput'],d['total_time'],p),
        float(d['used'])/float(d['waste']),
        float(d['used'])/float(d['waste']+d['used']),
        float(d['total_time']-d['badput'])/float(d['total_time'])))

LINEPRINT_HEADER_FMT="%-40s%5s %4s %4s | %4s %4s %4s %4s | %6s %7s %7s"
LINEPRINT_HEADER_DIV=LINEPRINT_HEADER_FMT% (
                     "----------", "---","---","---","---","---","---","---","---","---","---")
LINEPRINT_HEADER=LINEPRINT_HEADER_FMT% (
                 "", "strt","fval","0job","val","idle","wst","badp","waste","time","total")
def lineprint(entry_name, entry, g_entry,g,to_be_printed, attr_list, sort_attribute,min_limit,totals=None):  
    if g==1:
       s = 'Lasted'
    else: s = 'total_time'


    # For Sorting
    list=dict(entry,**g_entry) #combine the two dictionaries
    div=1
    if attr_list[sort_attribute] in ["FailedNr",'JobsNr_None']:
        div=float(g_entry['glideins'])
    elif attr_list[sort_attribute] in ['validation_All','validation','idle','wst','badput']:
        div=float(entry[s]) 
    if attr_list[sort_attribute]=='wst':
        sorter = int((float(list['waste'])/div)*100)
    else:
        sorter = int((float(list[attr_list[sort_attribute]])/div)*100)

    if not (totals is None):
        ekeys=('FailedNr','validation_All','JobsNr_None',s)
        gkeys=('glideins','validation','idle','waste','badput')
        
        # save raw totals
        if len(totals)==0:
            # still needs to be initialized... do it
            totals['entry']={}
            for k in ekeys:
                totals['entry'][k]=0.0
            totals['g_entry']={}
            for k in gkeys:
                totals['g_entry'][k]=0.0
        for k in ekeys:
            totals['entry'][k]+=entry[k]
        for k in gkeys:
            totals['g_entry'][k]+=g_entry[k]

    p={} # pre-compute the values to print
    p['strt'] = (float(entry['FailedNr'])/float(g_entry['glideins']))*100
    p['fval'] = (float(entry['validation_All'])/float(entry[s]))*100
    p['0job'] = (float(entry['JobsNr_None'])/float(g_entry['glideins']))*100
    p['val'] =  (float(g_entry['validation'])/float(entry[s]))*100
    p['idle'] = (float(g_entry['idle'])/float(entry[s]))*100
    p['wst'] =  (float(g_entry['waste'])/float(entry[s]))*100
    p['badp'] = (float(g_entry['badput'])/float(entry[s]))*100
    p['waste']= g_entry['waste']/3600.0
    p['time'] = entry[s]/3600.0
    p['total']= g_entry['glideins']

    # see if it is failing all min limits
    pass_limits=False
    if len(min_limit.keys())==0:
      pass_limits=True # no limits, so go ahead
    else:
      for k in p.keys():
       if k in min_limit:
         if p[k]>=min_limit[k]:
           pass_limits=True
           break

    if not pass_limits:
        return
    else:
        to_be_printed.append((sorter, "%-40s %3d%% %3d%% %3d%% | %3d%% %3d%% %3d%% %3d%% | %6d  %6d  | %4d" 
                              % (entry_name.lstrip("entry_"),
                                 p['strt'],p['fval'],p['0job'], p['val'],p['idle'],p['wst'],p['badp'], p['waste'],p['time'], p['total'])))

def main():

    usage="""
USAGE: 
    -x [#] : interval to do verbose stats (default 24)
    --source [ABSPATH or http addr] : factory base (default current directory)
    -s [attribute]: sort by attribute 
              (-s x to see list of choices)
    -f [frontend] : filter by a single frontend
         (can omit "frontend_" before name)
    -p : show all periods (default off - only show 24 hours)
    -m : frontend mode - emphasize frontend data, no entries shown.
               (default data - slots).
    --ms : frontend mode, showing seconds instead of slots.
    --mh : frontend mode, showing hours instead of slots.
    --msort [attribute]: frontend mode, sort by attribute.
           (--msort x to see list of choices)
    --nb : Do not print out the top block
    -l [field,nr] : do not print entries below the limit
    -h : this usage message
"""

    # flags
    x = 24
    dir = os.getcwd()
    sorting = 0
    fe_sorting = 0
    sort_attribute = 0
    fe_sort_attribute = 0
    filter_frontend = 0
    show_all_periods = 0 
    frontend_mode = 0
    show_block = True
    min_limit={}
 
    try:
        opts, args = getopt.getopt(sys.argv[1:], 'x:hwf:l:pms:', ['source=','ms','mh','msort=','nb'])
    except getopt.GetoptError:
        print("\n Option not recognized or missing parameters.")
        print(" Use -h for usage.\n")
        sys.exit(0)
    for o, a in opts:
        if o == "-x":
            x = a
        elif o == "--source":
            dir = a
        elif o in ("-h", "-help"):
            print(usage)
            return
        elif o == "-s":
            sorting = 1
            sort_attribute=a.lower() 
        elif o == "-p":
            show_all_periods = 1
        elif o == "-m":
            frontend_mode = 1
        elif o == "--ms":
            frontend_mode = 2
        elif o == "--mh":
            frontend_mode = 3
        elif o == "--msort":
            fe_sorting = 1
            fe_sort_attribute = a.lower() 
        elif o == "-f":
            filter_frontend = a
            if 'frontend_' not in filter_frontend:
                filter_frontend = 'frontend_' + filter_frontend
        elif o == "--nb":
            show_block = False
        elif o == "-l":
            min_a,min_v = a.split(',')
            min_limit[min_a]=float(min_v)

    
    attributes = {'glideins':0,'jobs':0,'used':0,'waste':0,
                  'validation':0,'idle':0,'badput':0,'total_time':0} 
    sort_attributes = ['strt','fval','0job','val','idle','wst','badp','waste','time','total']
    attr_list = ['FailedNr','validation_All','JobsNr_None','validation',
                  'idle','wst','badput','waste','Lasted','glideins']

    if sorting !=0:
        if sort_attribute not in sort_attributes:
            print("%s not in list of attributes. Choices are:\n" % sort_attribute)
            for a in sort_attributes:
                print(a)
            print()
            return
        sort_attribute = sort_attributes.index(sort_attribute)

    #Frontend Sorting
    fe_sort_attributes = ['total','used','u%','val','v%','idle','i%','waste','w%','badput','b%','eff']
    fe_attr_list = ['total_time','used','used','validation','validation','idle','idle','waste','waste','badput','badput','used']
    if fe_sorting !=0:
        if fe_sort_attribute not in fe_sort_attributes:
            print("%s not in list of attributes. Choices are:\n" % fe_sort_attribute)
            for a in fe_sort_attributes:
                print(a)
            print()
            return
        fe_sort_attribute = fe_sort_attributes.index(fe_sort_attribute)

    data = {}
    rrd_list = {"completed_data": "rrd_Log_Completed.xml", 
                "job_dur_data": "rrd_Log_Completed_Stats.xml", 
                "wastemill_data": "rrd_Log_Completed_WasteTime.xml"}

    if "http" in dir:
        file_dir = os.path.join(dir)
    else: 
        file_dir = os.path.join(dir, "monitor")
        if not os.path.isdir(file_dir):
            # Try RPM location
            rpmdir = "/var/lib/gwms-factory/work-dir"
            rpmfile_dir = os.path.join(rpmdir, "monitor")
            if not os.path.isdir(rpmfile_dir):
                print("\nCannot open", file_dir)
                print("Please set --source to the factory work dir and try again.")
                sys.exit(1)
            if dir != os.getcwd():
                # Directory explicitly set, print warning
                print("\nWARNING: Cannot open", file_dir)
                print("Using RPM default %s instead" % rpmfile_dir)
            dir = rpmdir
            file_dir = rpmfile_dir

    for name, xml in rrd_list.iteritems():
        try: 
            file_loc = os.path.join(file_dir, xml)
            u = urllib.urlopen(file_loc)        
            data[name] = xmlParse.xmlfile2dict(u)
        except:
            print("\nCannot open", file_loc)
            print("Please set --source to the factory work dir and try again.")
            sys.exit(1)
        u.close()

    c_data = data["completed_data"]
    j_data = data["job_dur_data"]
    w_data = data["wastemill_data"]


###############################################################################
#   Rearranges rrd_data into data = [periods][frontends][entries][elements]
#      (periods are integers and in seconds)
###############################################################################

    data = {}
    frontend_list = []

    for entry in c_data['entries']:
        for frontend in c_data['entries'][entry]['frontends']:
            if frontend not in frontend_list:
                frontend_list.append(frontend)

    if filter_frontend != 0:
        if filter_frontend not in frontend_list:
            print("\nFrontend", filter_frontend, "not found at source.\n")
            print("Choices are:\n ")
            for frontend in frontend_list:
                print(frontend)
            print()
            sys.exit(1)
      
    for entry in c_data['entries']:

        for frontend in c_data['entries'][entry]['frontends']:

            if filter_frontend != 0:
                if frontend != filter_frontend:
                    continue
            
            for period in c_data['entries'][entry]['frontends'][frontend]['periods']:

                if int(period) not in data:
                    data[int(period)] = {}
                if frontend not in data[int(period)]:
                    data[int(period)][frontend] = {}
                if entry not in data[int(period)][frontend]:
                    data[int(period)][frontend][entry] = {}

                rrd_name_list = ["Log_Completed.xml", "Log_Completed_Stats.xml", "Log_Completed_Wastetime.xml"]
                i = 0
                for x_data in [c_data, j_data, w_data]:
                    try:
                       for a, b in x_data['entries'][entry]['frontends'][frontend]['periods'][period].iteritems():
                            data[int(period)][frontend][entry][a] = int(float(b)*int(period))   
                    except:
                        print("\nEntry %s in %s is missing %s; continuing\n" % (entry, rrd_name_list[i], frontend))
                    i=i+1 
    

######################################################################
# Organize totals/stats for each period, frontend, and entry independantly
######################################################################

    if filter_frontend == 0:
        print("""
Glidein log analysis for All Entries - %s
""" % datetime.datetime.now().strftime("%d-%m-%Y_%H:%M:%S"))
    else: print("""
Glidein log analysis for frontend %s - %s
""" % (filter_frontend, datetime.datetime.now().strftime("%d-%m-%Y_%H:%M:%S")))

    period_data = {}
    frontend_data = {}
    entry_data = {}
    entry_data_all_frontends = {}
    
    for period, frontends in data.iteritems():
        period = int(period)
        period_data[period] = {}
        frontend_data[period] = {}
        entry_data[period] = {}
        entry_data_all_frontends[period] = {}
        for a in attributes.keys(): period_data[period][a] = 0

        for frontend, entries in frontends.iteritems(): 
            frontend_data[period][frontend] = {}
            entry_data[period][frontend] = {}
            for a in attributes.keys(): frontend_data[period][frontend][a] = 0

            for entry, elements in entries.iteritems():
                entry_data[period][frontend][entry] = {}  
                for a in attributes.keys(): entry_data[period][frontend][entry][a]=0

                # may have gotten entry from another frontend
                if entry not in entry_data_all_frontends[period]:
                    entry_data_all_frontends[period][entry] = {}
                    for a in attributes.keys(): entry_data_all_frontends[period][entry][a]=0

                # try catch all attributes because the xml file
                #     sometimes loses them at random. Example: <period name="7200"/>  
                try: attributes['glideins'] = elements["Glideins"] 
                except: attributes['glideins']=0
                try: attributes['jobs'] = elements["JobsNr"]
                except: attributes['jobs']=0
                try: attributes['used'] = elements["JobsLasted"]
                except: attributes['used']=0
                try: attributes['total_time'] = elements["Lasted"]
                except: attributes['total_time']=0
                try: attributes['waste'] = elements["Lasted"] - elements["JobsLasted"]
                except: attributes['waste']=0
                try: attributes['badput'] = elements["Lasted"] - elements["JobsGoodput"]
                except: attributes['badput']=0
                attributes['validation'] = 0
                attributes['idle'] = 0 

                for name, value in elements.iteritems():
                    if 'validation' in name:
                        multiplier = re.findall('validation_(.*)', name)[0].rstrip('m')
                        if multiplier == "All": multiplier = 1000
                        if multiplier == "Most": multiplier = 750 # approximation
                        if multiplier == "None": multiplier = 0
                        attributes['validation'] += value*(int(multiplier)*1.0e-3)
                    if 'idle' in name:
                        multiplier = re.findall('idle_(.*)', name)[0].rstrip('m')
                        if multiplier == "All": multiplier = 1000
                        if multiplier == "Most": multiplier = 750 # approximation
                        if multiplier == "None": multiplier = 0
                        attributes['idle'] += value*(int(multiplier)*1.0e-3)

                for a in attributes.keys():
                    entry_data[period][frontend][entry][a] += attributes[a]
                    frontend_data[period][frontend][a] += attributes[a] 
                    period_data[period][a] += attributes[a] 
                    entry_data_all_frontends[period][entry][a] += attributes[a]

                for a in ['FailedNr','validation_All','JobsNr_None','badput_All','Lasted']:
                    if a not in entry_data_all_frontends[period][entry]:
                        try: entry_data_all_frontends[period][entry][a] = data[period][frontend][entry][a]
                        except: entry_data_all_frontends[period][entry][a] = 0
                    else: 
                        try: entry_data_all_frontends[period][entry][a] += data[period][frontend][entry][a]
                        except: entry_data_all_frontends[period][entry][a] = 0
                          

######################################################################
#   Print top block
######################################################################

    # sort periods from least to greatest, with 24 hours at the top 
    period_list = period_data.keys()
    period_list.sort()
    period_list.remove(86400)
    period_list.insert(0,86400) 

    period = int(x)*3600

    # if filtering by period, make sure it's in the data
    if period not in period_list:
        print("Interval",x,"does not exist in data.\n Choices are:")
        for a in period_list:
           print(a/3600)
        print()
        return

    # default - show only one period, either user-chosen or default(24)
    if show_all_periods==0:
        period_list = [period] 

    for p in period_list:
        title = ("Past %.1f hours" % (float(p)/3600))
       
        # if no glideins, omit this entry
        if period_data[p]['glideins'] == 0:
            continue        
        # to avoid division by zero
        if period_data[p]['waste'] == 0:
            period_data[p]['waste'] = 1

        if show_block:
          print("----------------------------------------\n%s:\n"%title)
          blockprint(title, period_data[p], p, 0)

################################################################################
#    Print per entry stats (all frontends)
################################################################################


    if filter_frontend==0 and frontend_mode==0:


        print("""
---------------------------------------
---------------------------------------
Per Entry (all frontends) stats for the past %s hours.\n""" % x)

        print(LINEPRINT_HEADER)

        to_be_printed = []  # dict of paired entry data with wastetime for sorting
        totals = {}
        totals_to_be_printed = []

        for entry_name, entry in entry_data_all_frontends[period].iteritems():
 
            if entry['glideins'] == 0 or entry['total_time'] == 0:
                continue
            if entry['validation'] == 0:
                entry['validation'] = 1
            
            lineprint(entry_name,entry,entry,0,to_be_printed,attr_list,sort_attribute,min_limit,totals) 
        lineprint('Total/Average',totals['entry'],totals['g_entry'],0,totals_to_be_printed,attr_list,sort_attribute,{}) 
        print(totals_to_be_printed[0][1])
        print(LINEPRINT_HEADER_DIV)
        
        if sorting == 1:
            to_be_printed.sort()
            to_be_printed.reverse()
            for a in to_be_printed:
                print(a[1])
            print("----------------------------------")
        else: 
            for a in to_be_printed:
                print(a[1])


################################################################################
#    Print per frontend per entry stats
################################################################################



    if frontend_mode==0:
        print("""
---------------------------------------
---------------------------------------
Per Entry (per frontend) stats for the past %s hours.\n""" % (x))
    else:
        units = ["Slots","Seconds","Hours"]
        print("""
---------------------------------------
---------------------------------------
Frontend stats for the past %s hours.\n""" % (x))
        print("%-25s%7s %7s %3s %7s %3s %7s %3s %7s %3s %7s %3s %4s\n" % (
              "Units: "+units[frontend_mode-1], "total","used","u%","val","v%","idle","i%","waste","w%","badput","b%","eff"))

    fe_to_be_printed = []

    for frontend, entries in data[period].iteritems():
        d = frontend_data[period][frontend]

        if period_data[period]['glideins'] == 0 or d['glideins'] == 0:
            continue
        # to avoid divisions by zero
        if d['waste'] == 0:
            d['waste'] = 1

        if frontend_mode == 0:
            print("----------------------------------------\n%s:\n"%frontend)

            if show_block:
              blockprint(frontend,d,period,period_data[period])
            
            print(LINEPRINT_HEADER)

            to_be_printed = []  # dict of paired entry data with wastetime for sorting
            totals={}
            totals_to_be_printed = []
            
            for entry, e in entries.iteritems():
                en = entry_data[period][frontend][entry] 

                if en['glideins'] == 0 or e['Lasted'] == 0:
                    continue
                if en['validation'] == 0:
                    en['validation'] = 1

                lineprint(entry, e, en, 1, to_be_printed, attr_list, sort_attribute, min_limit, totals)
            lineprint('Total/Average', totals['entry'], totals['g_entry'], 1, totals_to_be_printed, attr_list, sort_attribute, {})
            print(totals_to_be_printed[0][1])
            print(LINEPRINT_HEADER_DIV)
            
            if sorting == 1:
                to_be_printed.sort()
                to_be_printed.reverse()
                for a in to_be_printed:
                    print(a[1])
                print("----------------------------------")
            else: 
                for a in to_be_printed:
                    print(a[1])

         
        else:  # print frontend like entries, and omit entries
            divs = [period, 1.0, 3600.0]
            fe_to_be_printed = frontend_print(frontend, frontend_mode, d, divs[frontend_mode-1], fe_to_be_printed, fe_sorting, fe_attr_list, fe_sort_attribute)
            
    if fe_sorting == 1:
        fe_to_be_printed.sort()
        fe_to_be_printed.reverse()
        for a in fe_to_be_printed:
            print(a[1])
        print("----------------------------------")
    else: 
        for a in fe_to_be_printed:
            print(a[1]) 

################################################################################
#    Print Key
################################################################################

    if frontend_mode == 0:
        print("""\n-----------------------------------
LEGEND:

K - kilseconds (*1,000 seconds)
M - megaseconds (*1,000,000 seconds)

strt - % of jobs where condor failed to start
fval - % of glideins that failed to validate (hit 1000s limit)
0job - %  0 jobs/glidein
----------
val - % of time used for validation
idle - % of time spend idle
wst - % of time wasted (Lasted - JobsLasted) 
badp - % of badput (Lasted - JobsGoodput)
----------
waste - wallclock time wasted (hours) (Lasted - JobsLasted)
time - total wallclock time (hours) (Lasted)
total - total number of glideins
-------------------------------------
Lasted      - total wallclock time
JobsLasted  - wallclock time used to run jobs
JobsGoodput - wallclock time used by jobs terminatig with exit code 0 
\n""")

    else:  #frontend_mode legend
        print("""\n-----------------------------------
LEGEND:

K - kilseconds (*1,000 seconds)
M - megaseconds (*1,000,000 seconds)

total - total number of units (slots, seconds or hours,
        see units in upper left hand corner of frontend stats.
----------
used - time used 
val  - time spent validating
idle - time spent idle
wst  - time wasted (Lasted - JobsLasted) 
badp - badput (Lasted - JobsGoodput)

u,v,i,w,b% - percentage of previous corresponding attribute
             (used, val, idle, waste, badput) over total time
----------
eff     - efficiency - time used over total time 
-------------------------------------
Lasted      - total wallclock time
JobsLasted  - wallclock time used to run jobs
JobsGoodput - wallclock time used by jobs terminatig with exit code 0 
-------------------------------------
        \n""")


if __name__ == "__main__":
    main()
