#! /bin/bash maxsizeofreftable=30 date=$1 if test x$wbzdir == "x" -o x$wazdir == "x" -o x$cfgdir == "x" then echo ; echo "aborting while wbzdir='$wbzdir', wazdir='$wazdir', cfgdir='$cfgdir'" echo ; echo "the command '. .config' must be executed before any use of this package" exit # else # echo $wbzdir # echo $bindir fi while test $2 do case $2 in ## none is : build ckoi_tables and stops. -c) cflag="cflag" ; shift ;; ## dont build ckoi_tables and proceed -r) rflag="rflag" ; cflag="cflag" ; shift ;; ## same, but DONT update les_coucous list ## useful when debugging (resolveip is slow) *) echo ; echo "syntax is $0 file [-c] [-r] "; echo; exit ;; esac done echo echo "rebuilding $date : (y/n) ?" read rep if test ! x$rep == "xy" then echo aborting ; echo ; exit fi cd $cfgdir #--------------------build the blacklist filters ----------- from=blacklist_txt frmo=blacklist_local vers=blacklist_non serv=blacklist_oui if test ! -f $frmo then echo "" > $frmo fi mv $from tmp_from sort -u tmp_from > $from mv $frmo tmp_from sort -u tmp_from > $frmo sort $from $frmo > tmp_from echo "" > $serv echo "" > $vers function split () { arga=$1 argb=$2 } for ((i=1; i<=1000; i++)) do read line if test ! "$line" then break fi split $line if test $argb then echo "\\¶$arga¶ { \\¶$argb¶ d}" >> $vers echo "\\¶$arga¶ { \\¶$argb¶ p}" >> $serv else echo "\\¶$arga¶ d" >> $vers echo "\\¶$arga¶ p" >> $serv fi done < tmp_from #-------------- files names -------------------------------- file_ref=ref_$date.html file_usa=usage_$date.html if test -f $wazdir/$file_usa then cp $wazdir/$file_usa tmp_usa.html else echo "File $wazdir/$file_usa not found" echo "Cannot proceed; aborting" echo exit fi if test ! -f $wazdir/$file_ref then if test -f referrers_fake then ## echo "building a fake $file_ref file" referrers_fake -bonjour $date fakeflag=fakeflag else echo echo "File $wazdir/$file_ref not found" echo "Executable $cfgdir/referrers_fake not found" echo "Cannot proceed; aborting" echo exit fi else ## echo "using the genuine $file_ref file" cp $wazdir/$file_ref tmp_ref.html fi echo $file_ref #------------- statistiques -------------------------------- cat << EOF > tmp_sed /Total Hits/ { N s¶\n¶¶ s¶.*¶¶ s¶.*¶¶ p } EOF hits=`sed -nf tmp_sed tmp_usa.html` cat << EOF > tmp_sed /Total Unique Referrers/ { N s¶\n¶¶ s¶.*¶¶ s¶.*¶¶ p } EOF totref=`sed -nf tmp_sed tmp_usa.html` sed -e "1,35 d ; \þ google$þ d ; \þþ, $ d" tmp_ref.html | sed -f resolve_list > tmp_ref0.html #------------- traitement des coucous------------------------ if test ! $rflag then for i in \ `sed -nf blacklist_oui tmp_ref0.html | sed -e "s¶.*http://¶¶; s¶/.*¶¶" | sort -u ` do echo `resolveip -s $i 2> /dev/null` $i done | sort -n >> liste_coucous mv liste_coucous liste_coucous2 sort -u liste_coucous2 | sort -n > liste_coucous rm liste_coucous2 fi #------------- traitement des normaux------------------------ sed -f blacklist_non tmp_ref0.html | sed -e "\¶http://¶ ! { \¶Direct Request¶ ! { \¶Local Request¶ ! d} } ; \¶http://www/$¶ d" > tmp_ref1.html numsearcha=$((`grep google[.][a-z][a-z]*/search tmp_ref1.html | sed -e "s¶[ ].*¶¶" | tr "\n" "+"`0)) numsearchb=$((`grep google[.][a-z][a-z]*[.][a-z][a-z]*/search tmp_ref1.html | sed -e "s¶[ ].*¶¶" | tr "\n" "+"`0)) numsearchc=$((`grep images[.]google tmp_ref1.html | sed -e "s¶[ ].*¶¶" | tr "\n" "+"`0)) grep -v google[.][a-z][a-z]*/search tmp_ref1.html | grep -v google[.][a-z][a-z]*[.][a-z][a-z]*/search | grep -v images[.]google > tmp_ref2.html numsearchd=$((`grep translate tmp_ref2.html | grep google | sed -e "s¶[ ].*¶¶" | tr "\n" "+"`0)) numsearche=$((`grep -v translate tmp_ref2.html | grep "//[^/]*google[.][a-z][a-z]*[/]*$" | sed -e "s¶[ ].*¶¶" | tr "\n" "+"`0)) grep -v "//[^/]*google[.][a-z][a-z]*[/]*$" tmp_ref2.html | sed -e "/translate/ {/google/ d}" > tmp_ref3.html numsearchf=$((`grep "Direct Request" tmp_ref3.html | sed -e "s¶[ ].*¶¶" | tr "\n" "+"`0)) numsearchg=$((`grep "Local Request" tmp_ref3.html | sed -e "s¶[ ].*¶¶" | tr "\n" "+"`0)) grep -v "Direct Request" tmp_ref3.html | grep -v "Local Request" > tmp_ref4.html function percent (){ num=$1"þþþþþþþþþþ" num=${num:0:10} per=$(($1*10000/$hits + 10000)) if test ${per:1:1} == "0" then pet=" "${per:2:1}"."${per:3:2}"%" else pet=${per:1:2}"."${per:3:2}"%" fi echo $num$pet" "$2 | tr "þ" " " } numsearcha=$(($numsearcha+$numsearchb)) ( percent $numsearchf "- (Direct Request)" percent $numsearcha "http://www.google.xx/search" percent $numsearchd "http://www.google.xx/translate_c" if test ! $numsearchc == 0 then percent $numsearchc "http://images.google.xx/imgres" fi if test ! $numsearche == 0 then percent $numsearche "http://www.google.xx" fi if test ! $numsearchg == 0 then percent $numsearchg "- (Local Request)" fi ) >> tmp_ref4.html sort -n -r tmp_ref4.html > tmp_ref5.html #------------- liste active des referers ------------------------ qteref=`nbli tmp_ref5.html` if test ! $cflag then echo "building the ckoi_ref.html page of these $qteref referrers" for ((i=1; i<=$qteref; i++)) do read line page=`echo $line | sed -e "s¶.*%[ ][ ]*¶¶"` many=`echo $line | sed -e "s¶....%.*¶¶"` site=`echo $line | sed -e "s¶.*%[ ][ ]*¶¶ ; s¶http://¶¶; s¶/.*¶¶"` if test "$site" == "" -o "$site" == "- (Direct Request)" then continue fi if test ! "x"`echo $site | grep "google\.xx"` == "x" then continue fi echo " "`resolveip -s $site`" $many" $page "
" done < tmp_ref5.html > ckoi_table_datas les_tables="ckoi_table_ips.html ckoi_table_hit.html ckoi_table_nam.html" for ouca in $les_tables do cat << EOF > $ouca Referrers not yet blacklisted

Referrers not yet blacklisted


EOF done sort -k 2 -n ckoi_table_datas >> ckoi_table_ips.html sort -k 3 -n -r ckoi_table_datas >> ckoi_table_hit.html sort -k 5 ckoi_table_datas >> ckoi_table_nam.html for ouca in $les_tables do cat << EOF >> $ouca
ips hit  nam 
EOF done exit fi ( sed -ne "1,35 p " tmp_ref.html cat tmp_ref5.html sed -ne "\þþ, $ p" tmp_ref.html ) > $cfgdir/$file_ref echo "a cleaned $file_ref is available in $cfgdir" #------------- traitement du fichier "usage" ------------------------ if test $qteref -gt $maxsizeofreftable then qteref=$maxsizeofreftable fi function split () { arga=$1 shift argb=$1 shift argc=$* } ( sed -e "\þþ, $ d" tmp_usa.html cat << EOF EOF for ((i=1; i<=$qteref; i++)) do read line split $line if test "`echo $argc | grep google\.xx`" -o "`echo $argc | grep 'Direct[ ]Request'`" then cat << EOF EOF else cat << EOF EOF fi done < tmp_ref5.html if test $fakeflag then cat << EOF
Top $qteref of $totref Total Referrers
# Hits Referrer
$i $arga $argb ${argc:0:80}
$i $arga $argb ${argc:0:80}

EOF else cat << EOF View All Referrers

EOF fi sed -ne "\þþ, $ p" tmp_usa.html ) > $cfgdir/$file_usa echo "a cleaned $file_usa is available in $cfgdir" if test $fakeflag then ## echo "delete fake $wbzdir/$file_ref (y/n) ?" ## read rep rep="y" if test x$rep == "xy" then rm $wbzdir/$file_ref fi fi # rm tmp*