cs_create_daily_archive { start_time " " }What it does:
Creates a gzip archive of a single day's logs and delete the original clickstream logs.Defined in: /web/philip/tcl/1-cs-defs.tcl
Source code:
set time [ns_time]
regsub {^0} [ns_fmttime $time "%S"] "" seconds
regsub {^0} [ns_fmttime $time "%M"] "" minutes
regsub {^0} [ns_fmttime $time "%H"] "" hours
set yesterday_time [expr { $time - $seconds - 60 * $minutes - 3600 * $hours - 86400 }]
if { [empty_string_p $start_time] } {
set start_time $yesterday_time
set time $yesterday_time
} else {
set time $start_time
}
set list_of_files [list]
set delete_me [list]
# look at first chunk and weed out the previous day's data
set filename [cs_log_file $start_time]
set delete_me $filename
if { ![file exists $filename] } {
ns_log Notice "cs: log file $filename does not exist (first chunk; $start_time)"
return
} else {
set out ""
set file [open $filename "r"]
set first_chunk_tmp_filename [ns_tmpnam]
set first_chunk_tmp_file [open $first_chunk_tmp_filename "w"]
while { [gets $file line] >= 0 } {
set curtime [string range $line 0 [expr { [string first "\t" $line] - 1 }]]
if { $curtime >= $start_time && $curtime < $start_time + 86400 } {
append out $line
append out "\n"
if { [string length $out] >= 8192 } {
puts $first_chunk_tmp_file $out
set out ""
}
}
}
set llength [expr [string length $out] - 2]
set out [string range $out 0 $llength]
puts $first_chunk_tmp_file $out
close $file
close $first_chunk_tmp_file
lappend list_of_files $first_chunk_tmp_filename
}
# now, we process the chunks in the middle
incr time 3600
for { set i 1 } { $i < 24 } { incr i } {
set filename [cs_log_file $time]
if { ![file exists $filename] } {
ns_log Notice "cs: log file $filename does not exist (${i}th chunk; $time)"
return
}
lappend list_of_files $filename
incr time 3600
}
# now we look at the 25th chunk
set filename [cs_log_file $time]
if { ![file exists $filename] } {
ns_log "Notice" "cs: log file $filename does not exist, 25th chunk"
return
} else {
set out ""
set file [open $filename "r"]
set last_chunk_tmp_filename [ns_tmpnam]
set last_chunk_tmp_file [open $last_chunk_tmp_filename "w"]
while { [gets $file line] >= 0 } {
set curtime [string range $line 0 [expr { [string first "\t" $line] - 1 }]]
if { $curtime >= $start_time && $curtime < $start_time + 86400 } {
append out $line
append out "\n"
if { [string length $out] >= 8192 } {
puts $last_chunk_tmp_file $out
set out ""
}
}
}
set llength [expr [string length $out] - 2]
set out [string range $out 0 $llength]
puts $last_chunk_tmp_file $out
close $file
close $last_chunk_tmp_file
lappend list_of_files $last_chunk_tmp_filename
}
# write out the file
set gzip_filename [cs_archive_file $start_time]
ns_log Notice "cs: condensing [join $list_of_files ","] to $gzip_filename"
catch { eval exec cat $list_of_files [list "|" "/usr/local/bin/gzip" "-c" > $gzip_filename] }
# delete the old logs/tmp files IF the start time was before yesterday
if { $start_time <= $yesterday_time } {
ns_log Notice "Deleting old clickstream logs..."
foreach cs_log $list_of_files {
if {[catch {set exec_result [exec /bin/rm -f $cs_log]} errmsg]} {
ns_log Notice "Error: could not delete file: $errmsg"
}
}
# delete that pesky first file
if {[catch {set exec_result [exec /bin/rm -f $delete_me]} errmsg]} {
ns_log Notice "Error: could not delete file: $errmsg"
}
}