cs_create_daily_archive

one of the documented procedures in this installation of the ACS
Usage:
cs_create_daily_archive   { start_time "" }
What it does:
Creates a gzip archive of a single day's logs and delete the original clickstream logs.
Defined in: /web/philip/tcl/1-cs-defs.tcl

Source code:



    set time [ns_time]
    regsub {^0} [ns_fmttime $time "%S"] "" seconds
    regsub {^0} [ns_fmttime $time "%M"] "" minutes
    regsub {^0} [ns_fmttime $time "%H"] "" hours
    set yesterday_time [expr { $time - $seconds - 60 * $minutes - 3600 * $hours - 86400 }]
    
    if { [empty_string_p $start_time] } {
	set start_time $yesterday_time
	set time $yesterday_time
    } else {
	set time $start_time
    }

    set list_of_files [list]
    set delete_me [list]
    
    # look at first chunk and weed out the previous day's data
    set filename [cs_log_file $start_time]
    set delete_me $filename
    
    if { ![file exists $filename] } {
	ns_log Notice "cs: log file $filename does not exist (first chunk; $start_time)"
	return
    } else {
	set out ""
	set file [open $filename "r"]

	set first_chunk_tmp_filename [ns_tmpnam]
	set first_chunk_tmp_file [open $first_chunk_tmp_filename "w"]
	
	while { [gets $file line] >= 0 } {
	    set curtime [string range $line 0 [expr { [string first "\t" $line] - 1 }]]
	    if { $curtime >= $start_time && $curtime < $start_time + 86400 } {
		append out $line
		append out "\n"
		if { [string length $out] >= 8192 } {
		    puts $first_chunk_tmp_file $out
		    set out ""
		}
	    }
	}

	set llength [expr [string length $out] - 2]
	set out [string range $out 0 $llength]

	puts $first_chunk_tmp_file $out

	close $file
	close $first_chunk_tmp_file

	lappend list_of_files $first_chunk_tmp_filename
    }

    # now, we process the chunks in the middle
    incr time 3600

    for { set i 1 } { $i < 24 } { incr i } {
	set filename [cs_log_file $time]

	if { ![file exists $filename] } {
	    ns_log Notice "cs: log file $filename does not exist (${i}th chunk; $time)"
	    return
	}

	lappend list_of_files $filename
	incr time 3600
    }

    # now we look at the 25th chunk
    set filename [cs_log_file $time]

    if { ![file exists $filename] } {
	ns_log "Notice" "cs: log file $filename does not exist, 25th chunk"
	return
    } else {
	set out ""
	set file [open $filename "r"]
	set last_chunk_tmp_filename [ns_tmpnam]
	set last_chunk_tmp_file [open $last_chunk_tmp_filename "w"]
	
	while { [gets $file line] >= 0 } {
	    set curtime [string range $line 0 [expr { [string first "\t" $line] - 1 }]]
	    if { $curtime >= $start_time && $curtime < $start_time + 86400 } {
		append out $line
		append out "\n"
		if { [string length $out] >= 8192 } {
		    puts $last_chunk_tmp_file $out
		    set out ""
		}
	    }
	}

	set llength [expr [string length $out] - 2]
	set out [string range $out 0 $llength]

	puts $last_chunk_tmp_file $out

	close $file
	close $last_chunk_tmp_file

	lappend list_of_files $last_chunk_tmp_filename
    }

    # write out the file
    set gzip_filename [cs_archive_file $start_time]
    ns_log Notice "cs: condensing [join $list_of_files ","] to $gzip_filename"

    catch { eval exec cat $list_of_files [list "|" "/usr/local/bin/gzip" "-c" > $gzip_filename] }

    # delete the old logs/tmp files IF the start time was before yesterday
    if { $start_time <= $yesterday_time } {
  	ns_log Notice "Deleting old clickstream logs..."
  	foreach cs_log $list_of_files {
  	    if {[catch {set exec_result [exec /bin/rm -f $cs_log]} errmsg]} {
  		ns_log Notice "Error: could not delete file: $errmsg"
  	    }
  	}

	# delete that pesky first file
	if {[catch {set exec_result [exec /bin/rm -f $delete_me]} errmsg]} {
	    ns_log Notice "Error: could not delete file: $errmsg"
	}
    }



philg@mit.edu