ns_log Notice "Load turl procs"
package require dns
package require uri
package require log
proc base62_func {} {
return [list 0 1 2 3 4 5 6 7 8 9 a b c d e f g h i j k l m n o p q r s t u v w x y z A B C D E F G H I J K L M N O P Q R S T U V W X Y Z]
}
proc get_base62 { n } {
set base62 [base62_func]
return [lsearch -exact $base62 $n]
}
proc print_base62 { n } {
set base62 [base62_func]
return [lindex $base62 $n]
}
proc decode { num } {
set length [max_encode_length]
if { $num == "random" } {
set num [random_turl]
}
if { ![regexp {([0-9a-zA-Z]+)} $num] } {
return -1
}
if { [string length $num] != $length } {
return -1
}
set max [expr $length - 1 ]
set output 0
foreach c [split $num {}] {
set output [expr $output + [expr [get_base62 $c] * pow(62,$max)]]
incr max -1
}
return [expr int($output)]
}
proc convert { m } {
if { $m < 62 } {
return [print_base62 $m]
} else {
set line "[convert [expr $m / 62]][print_base62 [expr $m % 62]]"
return $line
}
}
proc encode { num } {
set tmp [convert $num]
set diff [expr [max_encode_length] - [string length $tmp]]
for {set i 0} { $i < $diff } {incr i} {
set tmp "0$tmp"
}
return $tmp
}
proc get_page { conn } {
set url_list [ns_conn urlv $conn]
set url_list_size [ns_conn urlc $conn]
set offset [turl_url_offset]
set index [expr $url_list_size - $offset ]
if { $index > 0 } {
# we have /xxxxx which may or may not be a redirect
set monitor_id [ decode [ lindex $url_list $offset ] ]
# if the page does decode it is a redirect attempt
if { $monitor_id != -1 } {
# Okay, lets see if it is a valid redirect
set db [ns_db gethandle]
set url [database_to_tcl_string_or_null $db "select url from turl where monitor_id=$monitor_id and enabled='t'"]
if {[empty_string_p $url]} {
# not a valid redirect, so we'll return an error
ns_log Notice "Error, id $monitor_id not valid"
} else {
#set remote_addr [ns_conn peeraddr]
set remote_addr [ns_set get [ns_conn headers $conn] X-Forwarded-For]
set referrer [get_referrer]
set sql "insert into turl_click_log
(monitor_id,remote_addr,click_time,referrer)
values
($monitor_id,'[DoubleApos $remote_addr]',now(),'[DoubleApos $referrer]')"
if [catch {ns_db dml $db $sql} errmsg] {
ns_log Error "Could not log click for monitor $monitor_id, from $remote_addr/$referrer"
}
set rest [lindex $url_list [expr $offset + 1] end ]
if { ![empty_string_p $rest] } {
append url "/$rest"
}
set query [ns_conn query $conn]
if {![empty_string_p $query]} {
append url "?$query"
}
ns_log Notice "redirecting to $url"
if { [regexp {([^:]+)://([^:/]+)(:([0-9]+))?(/.*)?} $url \
match protocol server x port path] } {
dns::configure -nameserver 127.0.0.1 -loglevel critical
set tok [dns::resolve $server]
if { [string compare [dns::status $tok] "ok"] == 0 } {
set host [dns::address $tok]
catch {dns::clean $tok}
if {[rbl_blocked_p $host "sbl-xbl.spamhaus.org"]} {
ns_log Notice "$url: blocked"
ns_returnnotfound
catch {ns_conn close $conn}
return -1
}
} else {
# dns lookup failed, reject for now
ns_returnnotfound
catch {ns_conn close $conn}
return -1
}
}
ns_returnredirect $url
catch {ns_conn close $conn}
return -1
}
}
}
# if we get this far, we are not redirecting, therefore lets
# try to server the page
if { [empty_string_p [lindex $url_list [turl_url_offset]]] } {
set url "/index"
} else {
set url [ns_conn url $conn]
}
return "page_$url"
}
proc turl {conn ignored} {
#set ip [ns_conn peeraddr]
set ip [ns_set get [ns_conn headers $conn] X-Forwarded-For]
ns_log Notice "ip $ip"
set blocked_p [rbl_blocked_p $ip "sbl-xbl.spamhaus.org"]
#set blocked_p 0
if $blocked_p {
ns_returnnotfound
return
} else {
set page [get_page $conn]
}
if { $page != -1 } {
if { [regexp {^page_([^\.]+)(\.)?(.+)?} $page dummy file dot ext]} {
set pagefile "[doc_root]${file}"
if { [regexp {^/archive/?(.*)?} $file dummy archive_request]} {
archive_page $archive_request
return
}
# If we have an empty extention, add .adp to it (assuming the
# file does not exists
if {[empty_string_p $ext]} {
if {![valid_pagefile_p $pagefile]} {
foreach page_ext [turl_page_ext] {
if {[valid_pagefile_p "${pagefile}.${page_ext}"]} {
append pagefile ".$page_ext"
break
}
}
} else {
# Okay, it is not a valid page request, lets check for a directory.
if { [string compare [string range [ns_conn url $conn] end end] /] != 0 } {
set url "[turl_system_url][ns_conn url $conn]/"
ns_returnredirect $url
return 0
}
# if ext is empty, but is a valid maybe it is a directory?
if { [file isdirectory $pagefile] } {
set show_list_p 1
foreach server_ext [get_directory_files] {
if { [valid_pagefile_p "${pagefile}/$server_ext"] } {
append pagefile "/$server_ext"
set show_list_p 0
break
}
}
if { $show_list_p } {
# not index.adp or index.html, show a directory
# listing and return
ns_return 200 text/html "[turl_header [turl_system_name]]\n[directory_listing $pagefile]\n
\n[turl_footer]\n"
catch {ns_close $conn}
return 0
}
} else {
ns_returnnotfound
return
}
}
} else {
append pagefile ".${ext}"
}
if {![valid_pagefile_p $pagefile]} {
ns_returnnotfound
return
}
set type [ns_guesstype $pagefile]
if {[string match "*[ns_config ns/server/[ns_info server]/adp map]" $pagefile]} {
set page [catch {ns_adp_parse -file $pagefile} error]
ns_return 200 text/html $error
} else {
switch $type {
"*/*" {
}
default {
ns_returnfile 200 $type $pagefile
}
}
}
}
} else {
ns_returnnotfound
}
}
proc get_directory_files {} {
return [split [ns_config "ns/server/[ns_info server]" directoryfile] ,]
}
proc valid_pagefile_p { pagefile } {
if {[catch {set fp [open $pagefile]}]} {
return 0
} else {
return 1
}
}
proc directory_listing { dir } {
# taken directlry from OpenACS's reqeuest process
# ad_proc -private rp_html_directory_listing
set list "
| File | Size | Date |
| .. |
"
# Loop through the files, adding a row to the table for each.
foreach file [lsort [glob -nocomplain $dir/*]] {
set tail [file tail $file]
set link "$tail"
# Build the stat array containing information about the file.
file stat $file stat
set size [expr $stat(size) / 1000 + 1]K
set mtime $stat(mtime)
set time [clock format $mtime -format "%d-%h-%Y %H:%M"]
# Write out the row.
append list "| $link | $size | $time |
\n"
}
append list "
"
return $list
}
proc valid_page_p { url } {
if {[util_link_responding_p $url]} {
if { [catch { set status [util_get_http_status $url] } ] } {
set status "500"
}
} else {
set status "500"
}
if {$status == 500 || $status == 404} {
return 0
} else {
return 1
}
}
proc get_title { url } {
if { [catch {set page [ns_httpget $url]}] } {
set title ""
} else {
if {![regexp -nocase {(.*)?} $page match title]} {
set title ""
}
}
return $title
}
proc print_turl_link { monitor_id title } {
return " $title - [turl_system_url]/[encode $monitor_id][turl_system_url]/[encode $monitor_id]
"
}
proc turl_link_to_clipboard { monitor_id } {
return "
"
}
proc url_count { db url } {
return [database_to_tcl_string $db "select count(*) from turl where url='$url'"]
}
proc get_monitor_id_from_url { db url } {
return [database_to_tcl_string $db "select monitor_id from turl where url='$url'"]
}
proc get_monitor_id { db } {
return [database_to_tcl_string $db "select nextval('turl_monitor_id_seq')"]
}
proc add_url { url } {
if {![regexp {([^:]+)://([^:/]+)(:([0-9]+))?(/.*)?} $url match protocol host x serverport path]} {
return "You entered a bad url, make sure you have started the url with http://"
} else {
if {![valid_page_p $url]} {
return "Error getting url$url"
} else {
ns_log Notice "url to add: $url"
set title [get_title $url]
set db [ns_db gethandle]
set QQurl [DoubleApos $url]
dns::configure -nameserver 127.0.0.1 -loglevel critical
set tok [dns::resolve $host]
if { [string compare [dns::status $tok] "ok"] == 0 } {
set addr [dns::address $tok]
catch {dns::clean $tok}
if {[rbl_blocked_p $addr "sbl-xbl.spamhaus.org"]} {
return "Unknown Error"
}
} else {
# dns lookup failed, reject for now
return "Unknown Error"
}
if { $host == "uk.geocities.com" } {
return "Unknown Error"
}
if { [url_count $db $QQurl] > 0 } {
set monitor_id [get_monitor_id_from_url $db $QQurl]
set sql "update turl set checked_date = now(),working_date = now(),enabled='t' where monitor_id = $monitor_id"
catch {ns_db dml $db $sql}
} else {
set monitor_id [get_monitor_id $db]
while { [url_count $db $QQurl] > 0 } {
set monitor_id [get_monitor_id $db]
}
set sql "insert into turl (monitor_id,url,enabled,title,entered_date,checked_date,working_date) values ($monitor_id,'$QQurl','t','[DoubleApos $title]',now(),now(),now())"
if { [catch {ns_db dml $db $sql} errmsg] } {
return "Database error"
}
}
return "[print_turl_link $monitor_id $title]
[ turl_link_to_clipboard $monitor_id]"
}
}
}
proc list_of_urls { } {
set db [ns_db gethandle]
set urls ""
set selection [ns_db select $db "select monitor_id,url,title from turl order by monitor_id"]
while { [ns_db getrow $db $selection] } {
set_variables_after_query
set turl "[turl_system_url]/[encode $monitor_id]"
set link "$turl"
if {![empty_string_p $title]} {
#append urls "
$title - $link\n"
append urls "$link - $url\n"
} else {
append urls "$link - $url\n"
}
}
return $urls
}
proc click_count { {limit ""} } {
set db [ns_db gethandle]
set output ""
set sql "select t.monitor_id as monitor_id, t.title as title, count(*) as clicks from turl as t, turl_click_log as c
where c.monitor_id = t.monitor_id
group by t.monitor_id,t.title
order by clicks desc"
if {![empty_string_p $limit] } {
append sql " limit $limit"
}
set selection [ns_db select $db $sql]
while { [ns_db getrow $db $selection] } {
set_variables_after_query
set turl "[turl_system_url]/[encode $monitor_id]"
set link "$turl"
append output "| $link | $title | $clicks |
\n"
}
return $output
}
proc clicks_for_one_url { monitor_id } {
set db [ns_db gethandle]
set output ""
append output "[get_url_info $db $monitor_id ]
\n"
append output "\n"
append output "| Remote Address | Referrer | Clicks |
\n"
set sql "select remote_addr as addr ,referrer,count(*) as clicks
from turl_click_log
where monitor_id = $monitor_id
group by referrer,addr
order by clicks desc"
set selection [ns_db select $db $sql]
while { [ns_db getrow $db $selection] } {
set_variables_after_query
append output "| $addr | $referrer | $clicks |
\n"
}
append output "
\n"
return $output
}
proc get_url_info { db monitor_id } {
set output ""
set selection [ns_db 0or1row $db "select title,url,entered_date from turl where monitor_id = $monitor_id"]
set_variables_after_query
if {![empty_string_p $title]} {
append output "$title -"
}
append output "$url
entered:$entered_date"
return $output
}
proc check_urls {} {
set db [ns_db gethandle]
set monitor_ids [database_to_tcl_list $db "select monitor_id from turl"]
set n_urls [llength $monitor_ids]
set start_time [ns_time]
if { $n_urls == 0 } {
ns_log Notice "Found no urls to monitor"
} else {
ns_log Notice "Starting to check $n_urls urls"
foreach monitor_id $monitor_ids {
set selection [ns_db 0or1row $db "select url,date_part('day',now() - working_date) as non_working_days from turl where monitor_id = $monitor_id "]
if { $selection ==""} {
#this should never happen
continue
}
set_variables_after_query
ns_log Notice "$url/$non_working_days"
set sql "update turl set checked_date = now() where monitor_id = $monitor_id"
if { [catch {ns_db dml $db $sql} errmsg] } {
ns_log Error "Database error: $errmsg"
}
if {[valid_page_p $url]} {
set sql "update turl set working_date = now(),enabled='t' where monitor_id = $monitor_id"
if { [catch {ns_db dml $db $sql} errmsg] } {
ns_log Error "Database error: $errmsg"
} else {
ns_log Notice "monitor $monitor_id working"
}
} else {
if {$non_working_days > [disable_days]} {
set sql "update turl set enabled = 'f' where monitor_id = $monitor_id"
if { [catch {ns_db dml $db $sql} errmsg] } {
ns_log Error "Database error: $errmsg"
} else {
ns_log Notice "monitor $monitor_id disabled"
}
}
}
}
}
ns_log Notice "Took [expr [ns_time] - $start_time] seconds to check $n_urls urls"
}
proc counts {} {
set db [ns_db gethandle]
array set counts [list 1d day 1w week 1m month 1y year]
foreach count [array names counts] {
set where_clause "where click_time >= now() - interval \'1 $counts($count)\'"
set sql "select count(*) as clicks from turl_click_log ${where_clause};"
set selection [ns_db 0or1row $db $sql]
set_variables_after_query
set sql "select count(distinct monitor_id) as urls from turl_click_log ${where_clause};"
set selection [ns_db 0or1row $db $sql]
set_variables_after_query
ns_cache set counts_cache clicks_${count} $clicks
ns_cache set counts_cache urls_${count} $urls
ns_log Notice "Updated counts_cache for $counts($count) stats."
}
}
proc rbl_blocked_p { host domain } {
set str $host
set rc 0
set status "passed"
dns::configure -nameserver 127.0.0.1
catch {regexp "(\[0-9]{1,3})\.(\[0-9]{1,3})\.(\[0-9]{1,3})\.(\[0-9]{1,3})" $str all first second third fourth}
set address "$fourth.$third.$second.$first"
set tok [dns::resolve $address.$domain]
if { $first >= 210 && $first <= 211 } {
ns_log Notice "Asia Pacific ... no go"
set rc 1
}
if { [string compare [dns::status $tok] "ok"] == 0 } {
ns_log Notice "Address $str blocked by $domain"
set rc 1
}
catch {dns::clean $tok}
if {$rc} { set status "blocked" }
ns_log Notice "Host: $host $status"
return $rc
}
proc random_list_memeber { list } {
return [lindex $list [expr {int(rand()*[llength $list])}]]
}
proc random_turl {} {
set db [ns_db gethandle]
# get a list of all enabled turls
set sql "select monitor_id from turl where enabled='t'"
# convert enabled turls to a tcl list
set monitor_list [database_to_tcl_list $db $sql]
# Pick up a random turl
set monitor_id [random_list_memeber $monitor_list]
# Log it 'cause .. well, do I need a reason?
set sql "insert into turl_random_log (monitor_id,click_time) values ($monitor_id,now())"
if [catch {ns_db dml $db $sql} errmsg] {
ns_log Error "Could not log random click for monitor $monitor_id, from $remote_addr/$referrer"
}
# release the db handle
catch {ns_db releasehandle $db}
# Since we store decoded monitor_ids in the db, encoded and return
return [encode $monitor_id]
}
proc archive_page { request } {
set month 0; set year 0; set day 0;
set year_p 0; set month_p 0; set day_p 0
set urls {} ; set count 0; set page_title "MyTurl Achive for [turl_system_name]"
set desc "Myturls for "
if { [regexp {([0-9][0-9][0-9][0-9])?/?([0-9][0-9])?/?([0-9][0-9])?} $request dummy year month day] } {
set year [format "%04s" $year]
set month [format "%02s" $month]
set day [format "%02s" $day]
if { $year > 0 } { set year_p 1 }
if { $month > 0 } { set month_p 1 }
if { $day > 0 } { set day_p 1 }
if { $year_p } {
ns_log Notice "archive request for $year / $month / $day"
set db [ns_db gethandle]
set sql "select monitor_id,url,title,date(entered_date) as entered_date from turl where "
if { $year_p } {
append sql " date_part('year',entered_date) = $year "
append desc "Year: $year "
}
if { $month_p } {
append sql " and date_part('month',entered_date) = $month"
append desc "Month: $month "
}
if { $day_p } {
append sql " and date_part('day',entered_date) = $day"
append desc "Day: $day"
}
append sql " order by entered_date"
set selection [ns_db select $db $sql]
while { [ns_db getrow $db $selection] } {
set_variables_after_query
set turl "[turl_system_url]/[encode $monitor_id]"
set link "$entered_date: "
if {![empty_string_p $title]} {
append urls "${link}${title}\n"
} else {
append urls "${link}${url}\n"
}
incr count
}
catch {ns_db releasehandle $db}
}
if { $count > 0 } {
append page_title " ($count turls)"
} else {
append page_title ": no turls found"
set urls "No turls found."
}
set output "[turl_header $page_title]\n"
append output "$desc
"
append output "$urls\n"
append output "[turl_footer]\n"
ns_return 200 text/html $output
} else {
set output "[turl_header $page_title]\n"
append output "Error, incorrecty format\n"
append output "[turl_footer]\n"
ns_return 200 text/html $output
}
}
ns_register_proc GET [turl_url] turl
ns_register_proc POST [turl_url] turl
ns_register_proc HEAD [turl_url] turl
ns_log Notice "Done loading turl procs"
ns_share -init {set schedule_check_urls 0} schedule_check_urls
if {!$schedule_check_urls} {
ns_schedule_daily -thread 0 0 check_urls
ns_log Notice "URL check has been scheduled."
set schedule_check_urls 1
}
ns_share -init {set schedule_counts 0} schedule_counts
if {!$schedule_counts} {
ns_cache create counts_cache -size 100000
counts
ns_schedule_proc -thread 3600 counts
ns_log Notice "Counts update has been scheduled"
set schedule_counts 1
}