ns_log Notice "Load turl procs" package require dns package require uri package require log proc base62_func {} { return [list 0 1 2 3 4 5 6 7 8 9 a b c d e f g h i j k l m n o p q r s t u v w x y z A B C D E F G H I J K L M N O P Q R S T U V W X Y Z] } proc get_base62 { n } { set base62 [base62_func] return [lsearch -exact $base62 $n] } proc print_base62 { n } { set base62 [base62_func] return [lindex $base62 $n] } proc decode { num } { set length [max_encode_length] if { $num == "random" } { set num [random_turl] } if { ![regexp {([0-9a-zA-Z]+)} $num] } { return -1 } if { [string length $num] != $length } { return -1 } set max [expr $length - 1 ] set output 0 foreach c [split $num {}] { set output [expr $output + [expr [get_base62 $c] * pow(62,$max)]] incr max -1 } return [expr int($output)] } proc convert { m } { if { $m < 62 } { return [print_base62 $m] } else { set line "[convert [expr $m / 62]][print_base62 [expr $m % 62]]" return $line } } proc encode { num } { set tmp [convert $num] set diff [expr [max_encode_length] - [string length $tmp]] for {set i 0} { $i < $diff } {incr i} { set tmp "0$tmp" } return $tmp } proc get_page { conn } { set url_list [ns_conn urlv $conn] set url_list_size [ns_conn urlc $conn] set offset [turl_url_offset] set index [expr $url_list_size - $offset ] if { $index > 0 } { # we have /xxxxx which may or may not be a redirect set monitor_id [ decode [ lindex $url_list $offset ] ] # if the page does decode it is a redirect attempt if { $monitor_id != -1 } { # Okay, lets see if it is a valid redirect set db [ns_db gethandle] set url [database_to_tcl_string_or_null $db "select url from turl where monitor_id=$monitor_id and enabled='t'"] if {[empty_string_p $url]} { # not a valid redirect, so we'll return an error ns_log Notice "Error, id $monitor_id not valid" } else { #set remote_addr [ns_conn peeraddr] set remote_addr [ns_set get [ns_conn headers $conn] X-Forwarded-For] set referrer [get_referrer] set sql "insert into turl_click_log (monitor_id,remote_addr,click_time,referrer) values ($monitor_id,'[DoubleApos $remote_addr]',now(),'[DoubleApos $referrer]')" if [catch {ns_db dml $db $sql} errmsg] { ns_log Error "Could not log click for monitor $monitor_id, from $remote_addr/$referrer" } set rest [lindex $url_list [expr $offset + 1] end ] if { ![empty_string_p $rest] } { append url "/$rest" } set query [ns_conn query $conn] if {![empty_string_p $query]} { append url "?$query" } ns_log Notice "redirecting to $url" if { [regexp {([^:]+)://([^:/]+)(:([0-9]+))?(/.*)?} $url \ match protocol server x port path] } { dns::configure -nameserver 127.0.0.1 -loglevel critical set tok [dns::resolve $server] if { [string compare [dns::status $tok] "ok"] == 0 } { set host [dns::address $tok] catch {dns::clean $tok} if {[rbl_blocked_p $host "sbl-xbl.spamhaus.org"]} { ns_log Notice "$url: blocked" ns_returnnotfound catch {ns_conn close $conn} return -1 } } else { # dns lookup failed, reject for now ns_returnnotfound catch {ns_conn close $conn} return -1 } } ns_returnredirect $url catch {ns_conn close $conn} return -1 } } } # if we get this far, we are not redirecting, therefore lets # try to server the page if { [empty_string_p [lindex $url_list [turl_url_offset]]] } { set url "/index" } else { set url [ns_conn url $conn] } return "page_$url" } proc turl {conn ignored} { #set ip [ns_conn peeraddr] set ip [ns_set get [ns_conn headers $conn] X-Forwarded-For] ns_log Notice "ip $ip" set blocked_p [rbl_blocked_p $ip "sbl-xbl.spamhaus.org"] #set blocked_p 0 if $blocked_p { ns_returnnotfound return } else { set page [get_page $conn] } if { $page != -1 } { if { [regexp {^page_([^\.]+)(\.)?(.+)?} $page dummy file dot ext]} { set pagefile "[doc_root]${file}" if { [regexp {^/archive/?(.*)?} $file dummy archive_request]} { archive_page $archive_request return } # If we have an empty extention, add .adp to it (assuming the # file does not exists if {[empty_string_p $ext]} { if {![valid_pagefile_p $pagefile]} { foreach page_ext [turl_page_ext] { if {[valid_pagefile_p "${pagefile}.${page_ext}"]} { append pagefile ".$page_ext" break } } } else { # Okay, it is not a valid page request, lets check for a directory. if { [string compare [string range [ns_conn url $conn] end end] /] != 0 } { set url "[turl_system_url][ns_conn url $conn]/" ns_returnredirect $url return 0 } # if ext is empty, but is a valid maybe it is a directory? if { [file isdirectory $pagefile] } { set show_list_p 1 foreach server_ext [get_directory_files] { if { [valid_pagefile_p "${pagefile}/$server_ext"] } { append pagefile "/$server_ext" set show_list_p 0 break } } if { $show_list_p } { # not index.adp or index.html, show a directory # listing and return ns_return 200 text/html "[turl_header [turl_system_name]]\n[directory_listing $pagefile]\n

\n[turl_footer]\n" catch {ns_close $conn} return 0 } } else { ns_returnnotfound return } } } else { append pagefile ".${ext}" } if {![valid_pagefile_p $pagefile]} { ns_returnnotfound return } set type [ns_guesstype $pagefile] if {[string match "*[ns_config ns/server/[ns_info server]/adp map]" $pagefile]} { set page [catch {ns_adp_parse -file $pagefile} error] ns_return 200 text/html $error } else { switch $type { "*/*" { } default { ns_returnfile 200 $type $pagefile } } } } } else { ns_returnnotfound } } proc get_directory_files {} { return [split [ns_config "ns/server/[ns_info server]" directoryfile] ,] } proc valid_pagefile_p { pagefile } { if {[catch {set fp [open $pagefile]}]} { return 0 } else { return 1 } } proc directory_listing { dir } { # taken directlry from OpenACS's reqeuest process # ad_proc -private rp_html_directory_listing set list " " # Loop through the files, adding a row to the table for each. foreach file [lsort [glob -nocomplain $dir/*]] { set tail [file tail $file] set link "$tail" # Build the stat array containing information about the file. file stat $file stat set size [expr $stat(size) / 1000 + 1]K set mtime $stat(mtime) set time [clock format $mtime -format "%d-%h-%Y %H:%M"] # Write out the row. append list "\n" } append list "
FileSizeDate
..
$link$size$time
" return $list } proc valid_page_p { url } { if {[util_link_responding_p $url]} { if { [catch { set status [util_get_http_status $url] } ] } { set status "500" } } else { set status "500" } if {$status == 500 || $status == 404} { return 0 } else { return 1 } } proc get_title { url } { if { [catch {set page [ns_httpget $url]}] } { set title "" } else { if {![regexp -nocase {(.*)?} $page match title]} { set title "" } } return $title } proc print_turl_link { monitor_id title } { return " $title - [turl_system_url]/[encode $monitor_id]

[turl_system_url]/[encode $monitor_id]
" } proc turl_link_to_clipboard { monitor_id } { return "

" } proc url_count { db url } { return [database_to_tcl_string $db "select count(*) from turl where url='$url'"] } proc get_monitor_id_from_url { db url } { return [database_to_tcl_string $db "select monitor_id from turl where url='$url'"] } proc get_monitor_id { db } { return [database_to_tcl_string $db "select nextval('turl_monitor_id_seq')"] } proc add_url { url } { if {![regexp {([^:]+)://([^:/]+)(:([0-9]+))?(/.*)?} $url match protocol host x serverport path]} { return "You entered a bad url, make sure you have started the url with http://" } else { if {![valid_page_p $url]} { return "Error getting url

$url" } else { ns_log Notice "url to add: $url" set title [get_title $url] set db [ns_db gethandle] set QQurl [DoubleApos $url] dns::configure -nameserver 127.0.0.1 -loglevel critical set tok [dns::resolve $host] if { [string compare [dns::status $tok] "ok"] == 0 } { set addr [dns::address $tok] catch {dns::clean $tok} if {[rbl_blocked_p $addr "sbl-xbl.spamhaus.org"]} { return "Unknown Error" } } else { # dns lookup failed, reject for now return "Unknown Error" } if { $host == "uk.geocities.com" } { return "Unknown Error" } if { [url_count $db $QQurl] > 0 } { set monitor_id [get_monitor_id_from_url $db $QQurl] set sql "update turl set checked_date = now(),working_date = now(),enabled='t' where monitor_id = $monitor_id" catch {ns_db dml $db $sql} } else { set monitor_id [get_monitor_id $db] while { [url_count $db $QQurl] > 0 } { set monitor_id [get_monitor_id $db] } set sql "insert into turl (monitor_id,url,enabled,title,entered_date,checked_date,working_date) values ($monitor_id,'$QQurl','t','[DoubleApos $title]',now(),now(),now())" if { [catch {ns_db dml $db $sql} errmsg] } { return "Database error" } } return "[print_turl_link $monitor_id $title]

[ turl_link_to_clipboard $monitor_id]" } } } proc list_of_urls { } { set db [ns_db gethandle] set urls "" set selection [ns_db select $db "select monitor_id,url,title from turl order by monitor_id"] while { [ns_db getrow $db $selection] } { set_variables_after_query set turl "[turl_system_url]/[encode $monitor_id]" set link "$turl" if {![empty_string_p $title]} { #append urls "

  • $title - $link
  • \n" append urls "
  • $link - $url
  • \n" } else { append urls "
  • $link - $url
  • \n" } } return $urls } proc click_count { {limit ""} } { set db [ns_db gethandle] set output "" set sql "select t.monitor_id as monitor_id, t.title as title, count(*) as clicks from turl as t, turl_click_log as c where c.monitor_id = t.monitor_id group by t.monitor_id,t.title order by clicks desc" if {![empty_string_p $limit] } { append sql " limit $limit" } set selection [ns_db select $db $sql] while { [ns_db getrow $db $selection] } { set_variables_after_query set turl "[turl_system_url]/[encode $monitor_id]" set link "$turl" append output "$link$title$clicks\n" } return $output } proc clicks_for_one_url { monitor_id } { set db [ns_db gethandle] set output "" append output "[get_url_info $db $monitor_id ]
    \n" append output "\n" append output "\n" set sql "select remote_addr as addr ,referrer,count(*) as clicks from turl_click_log where monitor_id = $monitor_id group by referrer,addr order by clicks desc" set selection [ns_db select $db $sql] while { [ns_db getrow $db $selection] } { set_variables_after_query append output "\n" } append output "
    Remote AddressReferrerClicks
    $addr$referrer$clicks
    \n" return $output } proc get_url_info { db monitor_id } { set output "" set selection [ns_db 0or1row $db "select title,url,entered_date from turl where monitor_id = $monitor_id"] set_variables_after_query if {![empty_string_p $title]} { append output "$title -" } append output "$url
    entered:$entered_date" return $output } proc check_urls {} { set db [ns_db gethandle] set monitor_ids [database_to_tcl_list $db "select monitor_id from turl"] set n_urls [llength $monitor_ids] set start_time [ns_time] if { $n_urls == 0 } { ns_log Notice "Found no urls to monitor" } else { ns_log Notice "Starting to check $n_urls urls" foreach monitor_id $monitor_ids { set selection [ns_db 0or1row $db "select url,date_part('day',now() - working_date) as non_working_days from turl where monitor_id = $monitor_id "] if { $selection ==""} { #this should never happen continue } set_variables_after_query ns_log Notice "$url/$non_working_days" set sql "update turl set checked_date = now() where monitor_id = $monitor_id" if { [catch {ns_db dml $db $sql} errmsg] } { ns_log Error "Database error: $errmsg" } if {[valid_page_p $url]} { set sql "update turl set working_date = now(),enabled='t' where monitor_id = $monitor_id" if { [catch {ns_db dml $db $sql} errmsg] } { ns_log Error "Database error: $errmsg" } else { ns_log Notice "monitor $monitor_id working" } } else { if {$non_working_days > [disable_days]} { set sql "update turl set enabled = 'f' where monitor_id = $monitor_id" if { [catch {ns_db dml $db $sql} errmsg] } { ns_log Error "Database error: $errmsg" } else { ns_log Notice "monitor $monitor_id disabled" } } } } } ns_log Notice "Took [expr [ns_time] - $start_time] seconds to check $n_urls urls" } proc counts {} { set db [ns_db gethandle] array set counts [list 1d day 1w week 1m month 1y year] foreach count [array names counts] { set where_clause "where click_time >= now() - interval \'1 $counts($count)\'" set sql "select count(*) as clicks from turl_click_log ${where_clause};" set selection [ns_db 0or1row $db $sql] set_variables_after_query set sql "select count(distinct monitor_id) as urls from turl_click_log ${where_clause};" set selection [ns_db 0or1row $db $sql] set_variables_after_query ns_cache set counts_cache clicks_${count} $clicks ns_cache set counts_cache urls_${count} $urls ns_log Notice "Updated counts_cache for $counts($count) stats." } } proc rbl_blocked_p { host domain } { set str $host set rc 0 set status "passed" dns::configure -nameserver 127.0.0.1 catch {regexp "(\[0-9]{1,3})\.(\[0-9]{1,3})\.(\[0-9]{1,3})\.(\[0-9]{1,3})" $str all first second third fourth} set address "$fourth.$third.$second.$first" set tok [dns::resolve $address.$domain] if { $first >= 210 && $first <= 211 } { ns_log Notice "Asia Pacific ... no go" set rc 1 } if { [string compare [dns::status $tok] "ok"] == 0 } { ns_log Notice "Address $str blocked by $domain" set rc 1 } catch {dns::clean $tok} if {$rc} { set status "blocked" } ns_log Notice "Host: $host $status" return $rc } proc random_list_memeber { list } { return [lindex $list [expr {int(rand()*[llength $list])}]] } proc random_turl {} { set db [ns_db gethandle] # get a list of all enabled turls set sql "select monitor_id from turl where enabled='t'" # convert enabled turls to a tcl list set monitor_list [database_to_tcl_list $db $sql] # Pick up a random turl set monitor_id [random_list_memeber $monitor_list] # Log it 'cause .. well, do I need a reason? set sql "insert into turl_random_log (monitor_id,click_time) values ($monitor_id,now())" if [catch {ns_db dml $db $sql} errmsg] { ns_log Error "Could not log random click for monitor $monitor_id, from $remote_addr/$referrer" } # release the db handle catch {ns_db releasehandle $db} # Since we store decoded monitor_ids in the db, encoded and return return [encode $monitor_id] } proc archive_page { request } { set month 0; set year 0; set day 0; set year_p 0; set month_p 0; set day_p 0 set urls {} ; set count 0; set page_title "MyTurl Achive for [turl_system_name]" set desc "Myturls for " if { [regexp {([0-9][0-9][0-9][0-9])?/?([0-9][0-9])?/?([0-9][0-9])?} $request dummy year month day] } { set year [format "%04s" $year] set month [format "%02s" $month] set day [format "%02s" $day] if { $year > 0 } { set year_p 1 } if { $month > 0 } { set month_p 1 } if { $day > 0 } { set day_p 1 } if { $year_p } { ns_log Notice "archive request for $year / $month / $day" set db [ns_db gethandle] set sql "select monitor_id,url,title,date(entered_date) as entered_date from turl where " if { $year_p } { append sql " date_part('year',entered_date) = $year " append desc "Year: $year " } if { $month_p } { append sql " and date_part('month',entered_date) = $month" append desc "Month: $month " } if { $day_p } { append sql " and date_part('day',entered_date) = $day" append desc "Day: $day" } append sql " order by entered_date" set selection [ns_db select $db $sql] while { [ns_db getrow $db $selection] } { set_variables_after_query set turl "[turl_system_url]/[encode $monitor_id]" set link "$entered_date: " if {![empty_string_p $title]} { append urls "
  • ${link}${title}
  • \n" } else { append urls "
  • ${link}${url}
  • \n" } incr count } catch {ns_db releasehandle $db} } if { $count > 0 } { append page_title " ($count turls)" } else { append page_title ": no turls found" set urls "No turls found." } set output "[turl_header $page_title]\n" append output "

    $desc

    " append output "$urls\n" append output "[turl_footer]\n" ns_return 200 text/html $output } else { set output "[turl_header $page_title]\n" append output "Error, incorrecty format\n" append output "[turl_footer]\n" ns_return 200 text/html $output } } ns_register_proc GET [turl_url] turl ns_register_proc POST [turl_url] turl ns_register_proc HEAD [turl_url] turl ns_log Notice "Done loading turl procs" ns_share -init {set schedule_check_urls 0} schedule_check_urls if {!$schedule_check_urls} { ns_schedule_daily -thread 0 0 check_urls ns_log Notice "URL check has been scheduled." set schedule_check_urls 1 } ns_share -init {set schedule_counts 0} schedule_counts if {!$schedule_counts} { ns_cache create counts_cache -size 100000 counts ns_schedule_proc -thread 3600 counts ns_log Notice "Counts update has been scheduled" set schedule_counts 1 }