ad_general_link_check

one of the documented procedures in this installation of the ACS
Usage:
ad_general_link_check   db   link_id
What it does:
checks a link and steals meta tags
Defined in: /web/philip/tcl/ad-general-links.tcl

Source code:



    set url [database_to_tcl_string $db "select url from general_links where link_id = $link_id"]

    ns_db dml $db "begin transaction"
    ns_db dml $db "update general_links set last_checked_date=sysdate where link_id = $link_id"

    # strip off any trailing #foo section directives to browsers
    set complete_url $url
    regexp {^(.*/?[^/]+)\#[^/]+$} $complete_url match complete_url
    if [catch { set response [get_http_status $complete_url 0] } errmsg ] {
	# we got an error (probably a dead server)
	ns_db dml $db "end transaction"
	return $errmsg
    } elseif {$response == 404 || $response == 405 || $response == 500 } {
	# we should try again with a full GET 
	# because a lot of program-backed servers return 404 for HEAD
	# when a GET works fine
	if [catch { set response [get_http_status $complete_url 1] } errmsg] {
	    # probably the foreign server isn't responding
	    ns_db dml $db "end transaction"
	    return "server not responding"
	} 
    }

    if { $response != 200 && $response != 302 } {
	ns_db dml $db "end transaction"
	return "error in reaching server"
    } else {
	if {![catch {ns_httpget $complete_url 3 1} url_content]} {
	    
	    set meta_description ""
	    set meta_keywords ""
	    
	    regexp -nocase {<meta name="description" content="([^"]*)">} $url_content match meta_description
	    regexp -nocase {<meta name="keywords" content="([^"]*)">} $url_content match meta_keywords
	    
	    # process and truncate outrageously long meta tags

	    set QQmeta_description [DoubleApos $meta_description]
	    set QQmeta_keywords [DoubleApos $meta_keywords]

	    if {[string length $QQmeta_keywords]>4000} {
		set QQmeta_keywords "[string range $QQmeta_keywords 0 3996]..."
	    }
	    if {[string length $QQmeta_description]>4000} {
		set QQmeta_description "[string range $QQmeta_description 0 3996]..."
	    }
	} else {
	    return $url_content
	}
    }

    ns_db dml $db "update general_links
    set meta_description = '$QQmeta_description',
    meta_keywords = '$QQmeta_keywords',
    last_live_date = sysdate
    where link_id = $link_id
    "

    ns_db dml $db "end transaction"

    return 1



philg@mit.edu