#!/bin/ksh
#
# $Id: pndchk.sh,v 1.1 2023/03/27 16:07:05 root Exp $
#
# The following code is Confidential and is covered by the installation license
# (c) Copyright Fortra, LLC. and its group of companies.
#
#&& Performance Navigator Data CHecK (pndchk.sh) 
#& First removes PCPU# PCPU_ALL SCPU# SCPU_ALL lines from the mpgdata2.hostname
#& Performs data validation of mpgdata2 file data, and creates resequenced file
#& if required.
#
#&@ pndcutils.sh is also required
#
#&% Ran on the second of the month in crontab 
#
# Initial Created 06/23/09
#
#

mkdir -p /tmp/helpsystems_tmp

invocdir=`dirname $0`
if [ -s $invocdir/pndcutils.sh ]; then
   . $invocdir/pndcutils.sh
fi

if [ ! -z "$debug" ] ; then
   echo "### Debug ###  def_datafile is ($def_datafile)"
   echo "### Debug ###  jdatafile is ($jdatafile)"
   echo "### Debug ###  ext_data_jdatafile is ($ext_data_jdatafile)"
   echo ""
fi

if [ -s $invocdir/pn.config ] ; then
   . $invocdir/pn.config 
else
   echo "pn.config file does not exist at `pwd`"
fi

if [ ! -z "$debug" ] ; then
   echo "### Debug ###  def_datafile is ($def_datafile)"
   echo "### Debug ###  jdatafile is ($jdatafile)"
   echo "### Debug ###  ext_data_jdatafile is ($ext_data_jdatafile)"
   echo ""
fi

# Three new environmental variables
# installdir
# mpgdatadir
# nmondatadir

# ~-~-~-~-~-~-~-~-~-~-~-~-~-
# Running automatic trimming of mpgdata2_archive.gz  mpgdata2_archive.gz and rdhist
# ~-~-~-~-~-~-~-~-~-~-~-~-~-
./mpgd2_arcgztrim.sh -a
./mpgd3_arcgztrim.sh -a
./rdhist_trim.sh -a
# Completed running automatic trimming of 
# mpgdata2_archive.gz  mpgdata2_archive.gz and rdhist
# ~-~-~-~-~-~-~-~-~-~-~-~-~-

# installdir should be defined, but if not use invocdir
if [ -z "$installdir" ] ; then
   installdir=$invocdir
fi

# mpgdatadir should be defined, but if not use invocdir
if [ -z "$mpgdatadir" ] ; then
   mpgdatadir=$invocdir
fi

# nmondatadir should be defined, but if not use invocdir
if [ -z "$nmondatadir" ] ; then
   nmondatadir=$invocdir
fi

# We have to define the external datafile name and the datafile name

# set > /tmp/helpsystems_tmp/set.out
if [ "x${1}x" != "xx" ]; then
    jdatafile=$1
    sysname=`echo ${jdatafile} | awk -F. '{print $2}'`
    ext_data_jdatafile="${nmondatadir}/mpgdata3.$sysname"
else
   ext_data_jdatafile="${mpgdatadir}/mpgdata3.$sysname"
   jdatafile="${mpgdatadir}/mpgdata2.$sysname"
fi

if [ ! -z "$debug" ] ; then
   echo "### Debug ###  def_datafile is ($def_datafile)"
   echo "### Debug ###  jdatafile is ($jdatafile)"
   echo "### Debug ###  ext_data_jdatafile is ($ext_data_jdatafile)"
   echo ""
fi

if [ ! -s $jdatafile ] ; then 
   echo "### Error ###  Cannot find file specified ($jdatafile)"
   echo ""
   exit 1
fi

# echo "Remove Later"; exit 1

# First Automatically remove any PCPU# PCPU_ALL SCPU# SCPU_ALL lines from the 
# mpgdata2.hostname file

egrep -v "^PCPU[0-9]|^PCPU_ALL|^SCPU[0-9]|^SCPU_ALL" ${jdatafile} | gzip -c > ${jdatafile}.gz
jchk=$?
if [ $jchk -eq 0 ] ; then
   echo "Successfully removed unwanted items from mpgdata2.hostname file."
   # echo "Keeping timestamp"
   touch -r ${jdatafile} ${jdatafile}.gz
   gunzip -f ${jdatafile}.gz
else
   echo "Error removing unwanted items from mpgdata2.hostname file."
   exit 1
fi 

# echo "Remove Later"; exit 1

rm -f ${jdatafile}.data_errors ${jdatafile}.out.of.sequence 

echo "Validating data in $jdatafile into report."
echo "Review details in: ${jdatafile}.dv_rpt"  
# echo $invocdir
# wc -l $jdatafile
cd $invocdir

# Added a date reference to ensure the ${jdatafile}'s date/time does not change.
rm -f ${jdatafile}.timeref
touch -r ${jdatafile} ${jdatafile}.timeref

jreccnt=`wc -l ${jdatafile} | awk '{print $1}'`

# Identify errors of lines starting in mid line
# caused by multiple instances of nmon
egrep "^ZZZZ,T[0-9][0-9][0-9][0-9],[0-9][0-9]:[0-9][0-9]:[0-9][0-9],[0-9][0-9]-[JFMASOND][AEPUCO][NBRYLGPTVC]-[0-9][0-9][0-9][0-9]" $jdatafile \
  | awk -F, '{print substr($2,2,5),substr($3,1,2)substr($3,4,2)substr($3,7,2),$4}' \
   |awk ' 
   BEGIN {
      lasttime=000000
      lastdate=""   
      }
   {if($1 != "0001" &&  lastdate == $3 && lasttime > $2 )
      {
      print $3
      }
   }
   {
      lasttime=$2
      lastdate=$3
   }
   ' \
   | uniq > ${jdatafile}.data_errors.tmp
if [ -s ${jdatafile}.data_errors.tmp ]; then
   echo  '---------------------------' >> ${jdatafile}.data_errors 
   echo  "Errors were found with the data in $jdatafile." >> ${jdatafile}.data_errors
   echo  "Times were decreasing in the data.  This can happen once" >> ${jdatafile}.data_errors
   echo  "during daylight savings in early November, otherwise " >> ${jdatafile}.data_errors
   echo  "it may be caused by nmon runnning multiple times simultaneously." >> ${jdatafile}.data_errors
   echo  "The following dates are affected:" >> ${jdatafile}.data_errors
   echo  '---------------------------' >> ${jdatafile}.data_errors
   cat ${jdatafile}.data_errors.tmp >> ${jdatafile}.data_errors 
   echo  '' >> ${jdatafile}.data_errors 
   echo  '' >> ${jdatafile}.data_errors 
fi

# Identify errors of lines starting in mid line
# caused by multiple instances of nmon
egrep -n "ZZZZ,T|AAA\,(progname|date|time|note|command),|DISKBSIZE" ${jdatafile} | egrep -v ":ZZZZ,T|:AAA\,(progname|date|time|note|command),|:DISKBSIZE"  > ${jdatafile}.data_errors.tmp1
if [ -s ${jdatafile}.data_errors.tmp1 ]; then
   echo  '---------------------------' >> ${jdatafile}.data_errors 
   echo  "Errors were found with the data in $jdatafile." >> ${jdatafile}.data_errors 
   echo  "Header info placed mid line." >> ${jdatafile}.data_errors 
   echo  "Most likley cause is nmon runnning multiple times simultaneously." >> ${jdatafile}.data_errors 
   echo  "The following command detected the problem:" >> ${jdatafile}.data_errors 
   echo  '---------------------------' >> ${jdatafile}.data_errors 
   echo 'egrep -n "ZZZZ,T|AAA\,(progname|date|time|note|command),|DISKBSIZE" mpgdata2.hostname | egrep -v ":ZZZZ,T|:AAA\,(progname|date|time|note|command),|:DISKBSIZE"' >> ${jdatafile}.data_errors 
   echo  '---------------------------' >> ${jdatafile}.data_errors 
   echo  'Details:' >> ${jdatafile}.data_errors 
   echo  '---------------------------' >> ${jdatafile}.data_errors 
   cat ${jdatafile}.data_errors.tmp1 >> ${jdatafile}.data_errors 
   echo  '' >> ${jdatafile}.data_errors 
   echo  '' >> ${jdatafile}.data_errors 
fi 


egrep -n "^ZZZZ,T[0-9][0-9][0-9][0-9],[0-9][0-9]:[0-9][0-9]:[0-9][0-9],[0-9][0-9]-[JFMASOND][AEPUCO][NBRYLGPTVC]-[0-9][0-9][0-9][0-9]|^AAA\,(progname|date|time|note|command)|^DISKBSIZE|^TOP|^NPIV"  $jdatafile \
   | sed s/\:/\,/ \
   | awk -v "jrecs=$jreccnt" '

function dtconv (date_in) {
   split("JAN FEB MAR APR MAY JUN JUL AUG SEP OCT NOV DEC", month, " ")
   for (i=1; i<=12; i++) mdigit[month[i]]=i
      m=toupper(substr(date_in,4,3))
   out_date=substr(date_in,8,4) sprintf("%02d",mdigit[m]) substr(date_in,1,2)
   # Date goes out yyyymmdd
   return out_date
}

BEGIN { 
   FS=","
   errhdrrec = "Ok"  
   errsubrec = "Ok"  
   errdbfrec = "Ok"  
   bdbfrec = 1 
   bhdrrec = 1 
   findhdrrec = "F"  
   findsubrec = "F"
   cntsubrec = 0
   # dtconv(datein) 12-AUG-2009 returns 20090812
}
 
{if($2 == "AAA" && findhdrrec != "T" && bdbfrec == 1 && index($0,"nmon.") == 0) 
# {if($2 == "AAA" && findhdrrec != "T" && NR == 1 && index($0,"nmon.") == 0) 
   {  # print $0
      edbfrec = $1-1 
      esubrec = edbfrec 
      findhdrrec = "T"
      findsubrec = "F"
      # print $1,$2,$3
      {  if(NR != 1) 
         {  esubrec = edbfrec
            { if(subrecdate != newhdrdate && subrecdate != newhdrdate + 1){
                errsubrec = "Error_subrec_sequence"
                errdbfrec = "Error" }
         }
            {  if(cntsubrec != 0) 
               {  print "SUB_REC",bsubrec, esubrec, subrecboth, errsubrec, cntsubrec }
            } 
            {  if(cntsubrec == 0) 
               { errdbfrec = "Error_NoRecords"}
            } 
            {  if(cntsubrec > 288) 
               { errdbfrec = "Error_RecCntHi"}
            } 
            # Testing only ## print NR, "DBF_REC",bdbfrec, edbfrec, newhdrboth, errdbfrec, cntsubrec
            print "DBF_REC",bdbfrec, edbfrec, newhdrboth, errdbfrec, cntsubrec
            cntsubrec = 0
            errsubrec = "Ok"
            errdbfrec = "Ok"
         }
      } 
      bdbfrec = $1 
      bhdrrec = $1
   } 
}  

{if($2 == "AAA" && findhdrrec != "T" && bdbfrec != 1 && index($0,"nmon.") == 0 )
   {  # print $0
      edbfrec = $1-1 
      esubrec = edbfrec 
      findhdrrec = "T"
      findsubrec = "F"
      # print $1,$2,$3
      { if(subrecboth < newhdrboth){
           errsubrec = "Error_subrec_sequence"
           errdbfrec = "Error"}}
      {  if(cntsubrec != 0) 
         {  print "SUB_REC",bsubrec, esubrec, subrecboth, errsubrec, cntsubrec }
      } 
      {  if(cntsubrec == 0) 
         { errdbfrec = "Error_NoRecords"}
      } 
      {  if(cntsubrec > 288) 
         { errdbfrec = "Error_RecCntHi"}
      } 
      # Testing only ## print NR, "DBF_REC",bdbfrec, edbfrec, newhdrboth, errdbfrec, cntsubrec 
      print "DBF_REC",bdbfrec, edbfrec, newhdrboth, errdbfrec, cntsubrec 
      cntsubrec = 0
      errsubrec = "Ok"
      errdbfrec = "Ok"
      bdbfrec = $1 
      bhdrrec = $1
   } 
}  

# {if($2 == "AAA" && $3 == "progname")
#    {  # print $0
#       hdrdate = $5
#       # print "dtconv", $4
#       newhdrdate = dtconv($4) 
#       newhdrboth = newhdrdate newhdrtime
#       findhdrrec = "T"
#       findsubrec = "F"
#       cntsubrec = 0
#       # print $0, newhdrdate 
#       # print $1,$2,$3,$4,$5
#    }
# }  

{if($2 == "AAA" && $3 == "date")
   {  # print $0
      hdrdate = $5
      # print "dtconv", $4
      newhdrdate = dtconv($4) 
      newhdrboth = newhdrdate newhdrtime
      findhdrrec = "T"
      findsubrec = "F"
      cntsubrec = 0
      # print $0, newhdrdate 
      # print $1,$2,$3,$4,$5
   }
}  

{if($2 == "AAA" && $3 == "time")
   {  # print $0
      hdrtime = $4
      # print "hdrtime", $4
      newhdrtime = substr($4,1,2)substr($4,4,2)substr($4,7,2)
      # print "newhdrtime", newhdrtime
      # print "newhdrboth", newhdrboth
      # print $0, newhdrtime 
      # print $1,$2,$3,$4,$5
   }
}  

{if($2 == "ZZZZ" && $3 == "T0001")
   {  # print $0
      # Determine subrecord date on each subrecord
      subrecdate = dtconv($5) 
      subrectime = substr($4,1,2)substr($4,4,2)substr($4,7,2)
      subrecboth = subrecdate subrectime
      ehdrrec = $1-1 
      findhdrrec = "F" 
      bsubrec = $1 
      # print "HDR_REC", bhdrrec, ehdrrec, newhdrdate, errhdrrec 
      print "HDR_REC", bhdrrec, ehdrrec, newhdrboth, errhdrrec 
      errhdrrec = "Ok"
      cntsubrec = cntsubrec + 1 
   }  
} 
 
{if($2 == "ZZZZ" && $3 != "T0001")
   {  # print $0
      # Determine subrecord date on each subrecord
      findhdrrec = "F" 
      esubrec = $1-1 
      { if( subrecboth < newhdrboth ){
           errsubrec = "Error_subrec_sequence"
           errdbfrec = "Error"}}
      {  if(cntsubrec != 0) 
         {  print "SUB_REC",bsubrec, esubrec, subrecboth, errsubrec, cntsubrec }
      } 
      subrecdate = dtconv($5) 
      subrectime = substr($4,1,2)substr($4,4,2)substr($4,7,2)
      subrecboth = subrecdate subrectime
      errsubrec = "Ok"
      cntsubrec = cntsubrec + 1 
      bsubrec = $1 
   }
}  

# { if(substr($2,1,9) == "DISKBSIZE" || substr($2,1,3) == "TOP" )
#    {  # print $0
#      edbfrec = $1 
#      findhdrrec = "F" 
#      findsubrec = "F"
#   }
# }  

{if($2 == "ZZZZ" && findsubrec != "T")
   {  # print $0
      bsubrec = $1 
      findhdrrec = "F" 
      findsubrec = "T"
      # print $1,$2,$3
   }
}  

{if($2 == "AAA" && findsubrec != "T" && index($0,"nmon.") != 0)
   {  # print $0
      bsubrec = $1 
      findhdrrec = "F" 
      findsubrec = "T"
      cntsubrec = 0
      # print $1,$2,$3
   }
}

END { 
    # jrecs is the record count of the mpgdata2.filename.
    # It is set at the end of the awk statement just before it goes into the output file
    edbfrec = jrecs
    esubrec = edbfrec 
    { if( subrecboth < newhdrboth ){
         errsubrec = "Error_subrec_sequence"
         errdbfrec = "Error"}}
    {  if(cntsubrec != 0) 
       {  print "SUB_REC",bsubrec, esubrec, subrecboth, errsubrec, cntsubrec }
    } 
    {  if(cntsubrec == 0) 
       { errdbfrec = "Error_NoRecords"}
    } 
    {  if(cntsubrec > 288) 
       { errdbfrec = "Error_RecCntHi"}
    } 
    # Testing only ## print NR, "DBF_REC",bdbfrec, edbfrec, newhdrboth, errdbfrec, cntsubrec
    print "DBF_REC",bdbfrec, edbfrec, newhdrboth, errdbfrec, cntsubrec
    errsubrec = "Ok"
    errdbfrec = "Ok"
    }
    ### Output to datafilename+.dv_rpt ### 
'  >  ${jdatafile}.dv_rpt  


# This looks for duplicate records or those that do not have DBF & HDR records 
egrep "SUB"  ${jdatafile}.dv_rpt  | awk '{print $4}' | sort -n | uniq -c | grep -v " 1 "  | awk '{print $1,$2}' > ${jdatafile}.dup_recs  
egrep "HDR|DBF"  ${jdatafile}.dv_rpt  | awk '{print $4}' | sort -n | uniq -c | grep -v " 2 " | awk '{print $1,$2}' >> ${jdatafile}.dup_recs  
if [ -s ${jdatafile}.dup_recs ]; then
   rm -f ${jdatafile}.dup_recs_details  
   while 
   read  tmpcnt dttime
   do
      echo "(${dttime})"  >> ${jdatafile}.dup_recs_details  
      grep ${dttime}  ${jdatafile}.dv_rpt >> ${jdatafile}.dup_recs_details  
      echo ""  >> ${jdatafile}.dup_recs_details  
   done < ${jdatafile}.dup_recs  
   echo "There may be duplicate, or problem records."
   echo "Please check ${jdatafile}.dup_recs and  ${jdatafile}.dup_recs_details"   
else
   rm -f ${jdatafile}.dup_recs  ${jdatafile}.dup_recs_details   
fi 


# List DBF records 
grep DBF ${jdatafile}.dv_rpt > ${jdatafile}.dv_rpt_DBF

# echo "Remove later" ; exit 1

# List DBF records sorted by date
# Linux
uname_s=`uname -s`
if [ "x${uname_s}x" = "xLinuxx" ]
then
   # Sort command for Linux to sort by 4th field
   grep DBF ${jdatafile}.dv_rpt | sort -n -k +4 > ${jdatafile}.dv_rpt_DBF_sort
else
   # Sort command for SunOS or AIX to sort by 4th field
   grep DBF ${jdatafile}.dv_rpt | sort -n +3 > ${jdatafile}.dv_rpt_DBF_sort
fi

# Identify errors that are in the record report.
grep -v Ok ${jdatafile}.dv_rpt > ${jdatafile}.data_errors.tmp2
if [ -s ${jdatafile}.data_errors.tmp2 ]; then
   echo  '---------------------------' >> ${jdatafile}.data_errors 
   echo  "Errors were found with the data in the report file $jdatafile.dv_rpt" >> ${jdatafile}.data_errors 
   echo  "Most likley cause is nmon runnning multiple times simultaneously." >> ${jdatafile}.data_errors 
   echo  "Normal counts are 288(12*24). During daylight savings in early " >> ${jdatafile}.data_errors
   echo  "November they could be 300, for one day when time rolls back" >> ${jdatafile}.data_errors
   echo  "Or this could be from causes unknown. " >> ${jdatafile}.data_errors 
   echo  '---------------------------' >> ${jdatafile}.data_errors 
   echo  'Details:' >> ${jdatafile}.data_errors 
   echo  '---------------------------' >> ${jdatafile}.data_errors 
   cat ${jdatafile}.data_errors.tmp2 >> ${jdatafile}.data_errors 
   echo  '' >> ${jdatafile}.data_errors 
   echo  '' >> ${jdatafile}.data_errors 
fi 

# Remove  sorted file from previous times this script is ran.
rm -f ${jdatafile}.sorted 

# If both DBF lists are the same, indicate sequenced correctly.
# Otherwise, create a new file with correctly sequenced DBF records.
diff ${jdatafile}.dv_rpt_DBF ${jdatafile}.dv_rpt_DBF_sort > /dev/null
jdiffchk=$?
if [[ ${jdiffchk} -ne 0 ]] ; then
   echo "${jdatafile} is out of date sequence."
   touch ${jdatafile}.out.of.sequence
   jsequence="OutOfSequence"
   echo "Creating a correctly indexed file ${jdatafile}.sorted "
   rm -f ${jdatafile}.sorted 
   touch ${jdatafile}.sorted
   # Added reduction process to record list.
   awk '{if(NR == 1){jbegrec=$2; jendrec=$3}}
   {if(NR != 1 && $2-jendrec == 1) 
   { jendrec=$3 } 
   else if(NR != 1 && $2-jendrec != 1)
   {print jbegrec, jendrec ; jbegrec=$2; jendrec=$3}}
   END {print jbegrec, jendrec}' ${jdatafile}.dv_rpt_DBF_sort > ${jdatafile}.dv_rpt_DBF_sort_reduced 
   # End of reduction process
   while
   read jRecBegin jRecEnd
   # read jRecType jRecBegin jRecEnd jRecDate jRecStat
   do
      ## Uncomment next line to see actual sed commands as they are issued
      ## echo "sed -n \"${jRecBegin},${jRecEnd}p\" ${jdatafile} >> ${jdatafile}.sorted"
      # Had started with sed, but awk is much faster, and perl just a bit 
      # slower than awk.  All three approaches are listed, but awk is faster.
      # sed -n "${jRecBegin},${jRecEnd}p" ${jdatafile} >> ${jdatafile}.sorted
      # perl -nle "print if \$.>=${jRecBegin}; exit if \$. >= ${jRecEnd}" ${jdatafile} >> ${jdatafile}.sorted
      # awk -v jbegin=${jRecBegin} -v jend=${jRecEnd} '{if(NR >= jbegin && NR <= jend )print $0}{if (NR > jend) exit}' ${jdatafile} >> ${jdatafile}.sorted
       awk -v jbegin=${jRecBegin} -v jend=${jRecEnd} '{if(NR >= jbegin && NR <= jend )print $0}{if (NR > jend) exit}' ${jdatafile} >> ${jdatafile}.sorted
   done < ${jdatafile}.dv_rpt_DBF_sort_reduced
   # Previously # done < ${jdatafile}.dv_rpt_DBF_sort
   # Ensure the date is the same for the original and corrected file
   find ${jdatafile} -newer ${jdatafile}.timeref > /tmp/helpsystems_tmp/pndchk.timeref
   if [ -s /tmp/helpsystems_tmp/pndchk.timeref ]; then
      echo "The data has changed while it was being processed."
      echo "Results are not correct, do not use the ${jdatafile}.sorted file." 
   else
      touch -r ${jdatafile} ${jdatafile}.sorted  
      echo "If the size is slightly smaller, there may have been invalid data,"
      echo "please check the file ${jdatafile}.dv_rpt for lines without OK"
      echo "Please check the date and size below."
      ls -al ${jdatafile}.sorted ${jdatafile}
      jfdate=`date '+%A %B %d, %Y'`
      echo "The sorted data should be copied over the actual data once confirmed,"
      echo "providing the date is still ${jfdate}."
      echo "Do not do the mv command below unless it is still ${jfdate}."
      echo "Command required:"
      echo "   mv ${jdatafile}.sorted ${jdatafile}"
      echo "If it is not ${jfdate}, please run pndchk.sh again."
   fi
      # Do we want to fix automatically?
      # How do we want to handle totally invalid data that may be in the datafile?
else
   echo "${jdatafile} is sequenced correctly."
   jsequence="Ok"
fi

if [ -s ${jdatafile}.data_errors ] ; then
   echo "***   There are errors in ${jdatafile}"         
   echo "***   Please review ${jdatafile}.data_errors for details"
fi

if [ -s ${jdatafile}.data_errors.tmp ]; then
   echo  "Times were decreasing in the data."
fi

if [ -s ${jdatafile}.data_errors.tmp1 ]; then
   echo  "Header info placed mid line."
fi

if [ -s ${jdatafile}.data_errors.tmp2 ]; then
   echo  "Assorted errors in the data." 
fi

rm -f ${jdatafile}.data_errors.tmp
rm -f ${jdatafile}.data_errors.tmp1
rm -f ${jdatafile}.data_errors.tmp2

truncate_minimum=30
truncate_default=366
if [ "x${truncate_days}x" = "xx" ]; then
   echo "truncate_days not specified (${truncate_days}); setting value to (${truncate_default})."
   truncate_days=${truncate_default}
fi

# jdbf_cnt is count of actual database records
jdbf_cnt=`cat ${jdatafile}.dv_rpt_DBF | awk '{print $4}' | uniq | wc -l| awk '{print $1}'`
echo "There are (${jdbf_cnt}) days of data in ${jdatafile}"

if [ ${truncate_days} -lt ${truncate_minimum} ] ; then
   echo "Truncate days (${truncate_days}) is less than min retention of (${truncate_minimum}).  Exiting!"
   echo ""
   exit 1
fi

# This next section does not impact external data truncation
jnmonfiles=`ls -1tr mpg_*nmon* 2>/dev/null | wc -l | awk '{print $1}'`
# echo "jnmonfiles is (${jnmonfiles}), remove later"
if [ ${truncate_days} -lt ${jnmonfiles} ] ; then
   echo "Truncate days (${truncate_days}) is less than the number of nmon files."
   echo "Setting truncate days to (${jnmonfiles}) and continuing." 
   truncate_days=${jnmonfiles} 
fi


if [ "x${jsequence}x" != "xOkx" ] ; then
   echo "Cannot truncate data that is out of sequence."
   echo "Please run the move (mv) specified above to properly order the data,"
   echo "then rerun this script.  Exiting!"
   echo ""
   exit 1
fi

# set > /tmp/helpsystems_tmp/jset.tmp
# echo "remove later" ; exit 1

# Removal of TOP data starts here and must be done prior to truncation process.

# Set purge_top_days
purge_top_minimum=30
purge_top_default=90  # Apx 2 years, may set it different in the future
if [ "x${purge_top_days}x" = "xx" ]; then
   echo "purge_top_days not specified (${purge_top_days}); setting value to (${purge_top_default})."
   purge_top_days=${purge_top_default}
elif [ ${purge_top_days} -gt ${truncate_days} ]; then
   purge_top_days=${purge_top_default}
fi

if [ ${purge_top_days} -lt ${jdbf_cnt} ] ; then
   echo "(${jdbf_cnt}) is greater than the specified purge_top_days value of (${purge_top_days})."

   # 
   # Beginning of values to place marker for use in later truncation process
   # 
   # jdbf_date_select is to allow placement of marker in correct location for 
   # truncation in a later truncation process. 
   # Date of record to keep and archive above it later.
    jdbf_date_select=`cat ${jdatafile}.dv_rpt_DBF | awk '{print $4}' | uniq | tail -n -${truncate_days} | head -1`

   # jdbf_date_line is the full text of the DBF date line.
   jdbf_date_line=`grep ${jdbf_date_select} ${jdatafile}.dv_rpt_DBF | head -1`
   
   # jdbf_tail_lineno is the starting line number of the data to be retained.
   jdbf_tail_lineno=`echo ${jdbf_date_line} | awk '{print $2}'`

   # jdbf_head_lineno is the number of lines that are being archived.
   jdbf_head_lineno=`echo ${jdbf_date_line} | awk '{print ($2 - 1)}'`
   # 
   # End of data for use in placing marker for later truncation process
   # 
   # 
   # 
   # jdbf_date_select_top is the first date of the TOP data retained.
   # Any TOP data before this date will be purged.
   jdbf_date_select_top=`cat ${jdatafile}.dv_rpt_DBF | awk '{print $4}' | uniq | tail -n -${purge_top_days} | head -1`

   # jdbf_date_line_top is the full text of the DBF date line for TOP purging.
   jdbf_date_line_top=`grep ${jdbf_date_select_top} ${jdatafile}.dv_rpt_DBF | head -1`
   
   # jdbf_tail_lineno_top is the starting line number of the data to be retained.
   jdbf_tail_lineno_top=`echo ${jdbf_date_line_top} | awk '{print $2}'`

   # jdbf_head_lineno_top is the number of lines that are being archived.
   jdbf_head_lineno_top=`echo ${jdbf_date_line_top} | awk '{print ($2 - 1)}'`

   # Determine location of truncate line so marker can be placed properly
   # while removing TOP lines
   # echo "jdbf_date_select=${jdbf_date_select}"
   # echo "jdbf_date_line=${jdbf_date_line}"
   # echo "jdbf_tail_lineno=${jdbf_tail_lineno}"
   # echo "jdbf_head_lineno=${jdbf_head_lineno}"

   # echo "jdbf_date_select_top=${jdbf_date_select_top}"
   # echo "jdbf_date_line_top=${jdbf_date_line_top}"
   # echo "jdbf_tail_lineno_top=${jdbf_tail_lineno_top}"
   # echo "jdbf_head_lineno_top=${jdbf_head_lineno_top}"

   # If there is a datafile and a gzipped datafile, report it and exit.
   if [ -s ${jdatafile} ] && [ -s ${jdatafile}.gz ] ; then
      echo "### error ###"
      echo "Both files ${jdatafile}_archive and ${jdatafile}.gz exist,"
      echo "exiting.  Please investigate the problem."
      echo ""
      exit 1
   fi

   # If someone has gzipped the datafile, report it and exit.
   if [ -s ${jdatafile}.gz ] && [ ! -s ${jdatafile} ] ; then
      echo "Archive file ${jdatafile} has been gzipped and needs to be corrected"
      echo "Recommend running - gunzip ${jdatafile}.gz"
      echo ""
      exit 1
   fi

   ###   echo "Remove Later, stopping prior to purging of TOP data! " ; exit 1

   # Start by gzipping the datafile with -f (force)
   # This file will be used to build a new version without top data 
   gzip -f ${jdatafile} 

   ###   echo "Remove Later, stopping prior to purging of TOP data! " ; exit 1

   # Start removing the DISK, FC, IO, NPIV, TOP data prior to archiving data
   echo "Purging DISK, FC, IO, NPIV, and TOP data that is older than "
   echo "(${purge_top_days}) days into file ${jdatafile}_top_temp.gz" 
   # Significant changes start here.
   gunzip -c ${jdatafile}.gz | awk -F, -v jbegin=${jdbf_tail_lineno_top} -v jmarker=${jdbf_head_lineno}  '{if(NR >= jbegin || ($1 !~ "^DISK"  && $1 !~ "^FC" && $1 !~ "^IO" && $1 !~ "^TOP" && $1 !~ "^NPIV" ) ) print $0}  {if(NR == jmarker ) print "marker4truncate"}' | gzip -c > ${jdatafile}_top_temp.gz
#   gunzip -c ${jdatafile}.gz | awk -F, -v jbegin=${jdbf_tail_lineno_top} -v jmarker=${jdbf_head_lineno}  '{if(NR >= jbegin || ($1 !~ /^DISK/||/^FC/||/^IO/||/^TOP/||/^NPIV/ ) ) print $0}  {if(NR == jmarker ) print "marker4truncate"}' | gzip -c > ${jdatafile}_top_temp.gz

   # Confirm that the new datafile is not missing record starts.
   jcnt_after=`gunzip -c ${jdatafile}_top_temp.gz | grep '^AAA,progname' | wc -l | awk '{print $1}'`

   # End of all of this process will change. 
   # Added check to ensure record count is as expected.
   if [ ${jcnt_after} -ge ${jdbf_cnt} ] ; then
      echo "Confirmed record count (${jcnt_after}) in modified version is greater than or "
      echo "equal to (${jdbf_cnt}).  Building new (${jdatafile})."
      truncate_marker=`gunzip -c ${jdatafile}_top_temp.gz | grep -n marker4truncate | cut -d: -f1`
      echo "truncate_marker location (${truncate_marker})"
      mv ${jdatafile}_top_temp.gz ${jdatafile}.gz 
   else
      echo "### Error ### Unexpected result, returning to original datafile"
      echo "${jdatafile}"
   fi
   gunzip ${jdatafile}.gz
else
   echo "(${jdbf_cnt}) days is not greater than purge_top_days value (${purge_top_days})."
   echo "No purging of TOP took place."
fi

# Removal of TOP data ends here.

# Start of truncation process
if [ ${truncate_days} -lt ${jdbf_cnt} ] ; then
   echo "(${jdbf_cnt}) is greater than the specified truncate value of (${truncate_days})."
   
      # jdbf_date_select is the first date of the data retained.
      # Any data before this date will be archived.
       jdbf_date_select=`cat ${jdatafile}.dv_rpt_DBF | awk '{print $4}' | uniq | tail -n -${truncate_days} | head -1`
   
      # jdbf_date_line is the full text of the DBF date line.
      jdbf_date_line=`grep ${jdbf_date_select} ${jdatafile}.dv_rpt_DBF | head -1`
   if [ "x${truncate_marker}x" = "xx" ] ; then

      # if top data is not removed the original method will work
      # to get the head and tail positions
      # Had to change the truncate head adn tail markers, since we removed 
      # the TOP lines. 
      
      # jdbf_tail_lineno is the starting line number of the data to be retained.
      jdbf_tail_lineno=`echo ${jdbf_date_line} | awk '{print $2}'`

      # jdbf_head_lineno is the number of lines that are being archived.
      jdbf_head_lineno=`echo ${jdbf_date_line} | awk '{print ($2 - 1)}'`

   else
      
      # jdbf_tail_lineno is the starting line number of the data to be retained.
      jdbf_tail_lineno=`echo ${truncate_marker} | awk '{print ($1 + 1)}'`

      # jdbf_head_lineno is the number of lines that are being archived.
      jdbf_head_lineno=`echo ${truncate_marker} | awk '{print ($1 - 1)}'`

   fi
      # echo "jdbf_date_select=${jdbf_date_select}"
      # echo "jdbf_date_line=${jdbf_date_line}" 
      # echo "jdbf_tail_lineno=${jdbf_tail_lineno}"
      # echo "jdbf_head_lineno=${jdbf_head_lineno}"

   if [ -s ${jdatafile}_archive ] && [ -s ${jdatafile}_archive.gz ] ; then
      echo "### error ###"
      echo "Both files ${jdatafile}_archive and ${jdatafile}_archive.gz exist,"
      echo "exiting.  Please investigate the problem."
      echo ""
      exit 1
   fi
   # If someone has gunzipped the archive file, gzip it back.
   if [ -s ${jdatafile}_archive ] && [ ! -s ${jdatafile}_archive.gz ] ; then
      echo "Archive file ${jdatafile}_archive is not gzipped"
      echo "Running - gzip ${jdatafile}_archive"
      gzip -f ${jdatafile}_archive
   fi

   # Start by gzipping the datafile with -f (force)
   gzip -f ${jdatafile} 

   # Add to the archive file.
   echo "Archiving data that is older than (${truncate_days}) days into file ${jdatafile}_archive.gz"
   gunzip -c ${jdatafile}.gz | head -${jdbf_head_lineno} | gzip -c >> ${jdatafile}_archive.gz
   echo "Creating a truncated version of ${jdatafile} retaining (${truncate_days}) days."
   gunzip -c ${jdatafile}.gz | tail -n +${jdbf_tail_lineno} | gzip -c > ${jdatafile}_tmp.gz
   jtailcnt=`gunzip -c ${jdatafile}_tmp.gz | grep '^AAA,progname' | wc -l | awk '{print $1}'`
   # Added check to ensure record count is as expected.
   if [ ${jtailcnt} -ge ${truncate_days} ] ; then
      echo "Confirmed record count (${jtailcnt}) in modified version is greater than or "
      echo "equal to (${truncate_days}).  Building new (${jdatafile})."
      mv ${jdatafile}_tmp.gz ${jdatafile}.gz 
   else
      echo "### Error ### Unexpected result, returning to original datafile"
      echo "${jdatafile}"
   fi
   gunzip ${jdatafile}.gz
else
   echo "(${jdbf_cnt}) days is not greater than truncate value (${truncate_days})."
   echo "No truncation took place."
fi
### End of truncation process

### Start of external data truncation ###
if [ -s ${ext_data_jdatafile} ]; then

   ext_data_truncate_minimum=30
   ext_data_truncate_default=90
   if [ "x${ext_data_truncate_days}x" = "xx" ]; then
      echo "ext_data_truncate_days not specified (${ext_data_truncate_days}); setting value to (${ext_data_truncate_default})."
      ext_data_truncate_days=${ext_data_truncate_default}
   fi

   # ${ext_data_jdatafile}_DBF_info is first line in each day in the mpgdata3.hostname file. 
   grep -n ^ZZZZ, ${ext_data_jdatafile} | sed s/:/,/ | uniq | awk -F, 'BEGIN {{lastdate="x"}}{if( lastdate != $5 ) {{print $1,$2,$3,$4,$5} } {lastdate=$5}}' > ${ext_data_jdatafile}_DBF_info 

   # ext_data_jdbf_cnt is count of actual days in the mpgdata3.hostname file. 
   ext_data_jdbf_cnt=`wc -l ${ext_data_jdatafile}_DBF_info | awk '{print $1}'`
   echo "There are (${ext_data_jdbf_cnt}) days of data in ${ext_data_jdatafile}"

   if [ ${ext_data_truncate_days} -lt ${ext_data_truncate_minimum} ] ; then
      echo "External data truncate days (${ext_data_truncate_days}) is less than min retention of (${ext_data_truncate_minimum}).  Exiting!"
      echo ""
      exit 1
   fi

   if [ ${ext_data_truncate_days} -lt ${ext_data_jdbf_cnt} ] ; then
      echo "(${ext_data_jdbf_cnt}) is greater than the specified truncate value of (${ext_data_truncate_days})."

      # ext_data_jdbf_date_select is the first date of the data retained.
      # Any data before this date will be archived.
      ext_data_jdbf_date_select=`cat ${ext_data_jdatafile}_DBF_info | awk '{print $5}' | uniq | tail -n -${ext_data_truncate_days} | head -1`

      # ext_data_jdbf_date_line is the full text of the DBF date line.
      ext_data_jdbf_date_line=`grep ${ext_data_jdbf_date_select} ${ext_data_jdatafile}_DBF_info | head -1`
   
      # ext_data_jdbf_tail_lineno is the starting line number of the data to be retained.
      ext_data_jdbf_tail_lineno=`echo ${ext_data_jdbf_date_line} | awk  '{print $1}'`
 
      # ext_data_jdbf_head_lineno is the number of lines that are being archived.
      ext_data_jdbf_head_lineno=`echo ${ext_data_jdbf_date_line} | awk -F, '{print ($1 - 1)}'`
       # Recomment the next 5 lines later
       # echo "ext_data_jdbf_date_select=${ext_data_jdbf_date_select}"
       # echo "ext_data_jdbf_date_line=${ext_data_jdbf_date_line}"
       # echo "ext_data_jdbf_tail_lineno=${ext_data_jdbf_tail_lineno}"
       # echo "ext_data_jdbf_head_lineno=${ext_data_jdbf_head_lineno}"
       # echo "remove later" ; exit 1

      if [ -s ${ext_data_jdatafile}_archive ] && [ -s ${ext_data_jdatafile}_archive.gz ] ; then
         echo "### error ###"
         echo "Both files ${ext_data_jdatafile}_archive and ${ext_data_jdatafile}_archive.gz exist,"
         echo "exiting.  Please investigate the problem."
         echo ""
         exit 1
      fi
      # If someone has gunzipped the archive file, gzip it back.
      if [ -s ${ext_data_jdatafile}_archive ] && [ ! -s ${ext_data_jdatafile}_archive.gz ] ; then
         echo "Archive file ${ext_data_jdatafile}_archive is not gzipped"
         echo "Running - gzip ${ext_data_jdatafile}_archive"
         gzip -f ${ext_data_jdatafile}_archive
      fi

      # Start by gzipping the datafile with -f (force)
      gzip -f ${ext_data_jdatafile} 

      # Add to the archive file.
      echo "Archiving data that is older than (${ext_data_truncate_days}) days into file ${ext_data_jdatafile}_archive.gz"
      gunzip -c ${ext_data_jdatafile}.gz | head -${ext_data_jdbf_head_lineno} | gzip -c >> ${ext_data_jdatafile}_archive.gz
      echo "Creating a truncated version of ${ext_data_jdatafile} retaining (${ext_data_truncate_days}) days."
      gunzip -c ${ext_data_jdatafile}.gz | tail -n +${ext_data_jdbf_tail_lineno} | gzip -c > ${ext_data_jdatafile}_tmp.gz
      ext_data_jtailcnt=`gunzip -c ${ext_data_jdatafile}_tmp.gz | grep '^ZZZZ' | awk -F, '{print $4}' | uniq -c | wc -l | awk '{print $1}'`
      # Added check to ensure record count is as expected.
      if [ ${ext_data_jtailcnt} -ge ${ext_data_truncate_days} ] ; then
         echo "Confirmed record count (${ext_data_jtailcnt}) in modified version is greater than or "
         echo "equal to (${ext_data_truncate_days}).  Building new (${ext_data_jdatafile})."
         mv ${ext_data_jdatafile}_tmp.gz ${ext_data_jdatafile}.gz 
      else
         echo "### Error ### Unexpected result, returning to original datafile"
         echo "${ext_data_jdatafile}"
      fi
      gunzip ${ext_data_jdatafile}.gz
   else
      echo "(${ext_data_jdbf_cnt}) days is not greater than truncate value (${ext_data_truncate_days})."
      echo "No truncation took place."
   fi
else
   echo "ext_data_jdatafile (${ext_data_jdatafile}) was NOT found. "
fi


# Remove temporary files for ensuring sequence is correct.
# They are used in other processes, so need to be deleted later.
if [ "x${jsequence}x" = "xOkx" ] ; then
   rm -f ${jdatafile}.dv_rpt_DBF_sort ${jdatafile}.dv_rpt_DBF
fi

# echo "Remove later!!!!!!"; exit
echo "Done!"
echo ""
