#!c:/Perl/bin/perl use CGI qw/:standard/; # Turbo Lingo - written by Danko Sipka, 2001 print "Content-type: text/html; charset=windows-1250\n\n"; print header, start_html('Turbo Lingo - Your Results'), h1('Your Results:'); $q = new CGI; $page = $q->param('page'); $boks = $q->param('boks'); $conc = $q->param('conc'); $freq = $q->param('freq'); $rfreq = $q->param('rfreq'); $lfreq = $q->param('lfreq'); $lconc = $q->param('lconc'); $phono = $q->param('phono'); $lexa = $q->param('lexa'); $sleng = $q->param('sleng'); $size = $q->param('b'); $bb = $q->param('bb'); $sort = $q->param('sort'); $stopw = $q->param('stopw'); if ($page ne "http://") { if ($boks eq "") { &strana; $ffile=$page; if ($conc eq "on") { &konc_udri; } else { print hr; print "Concordance not selected"; } if ($lconc eq "on") { &lkonc_udri; } else { print hr; print "Letter concordance not selected"; } if ($freq eq "on") { &freq_udri; } else { print hr; print "Frequency list not selected"; } if ($lfreq eq "on") { &freqchar_udri; } else { print hr; print "Letter frequency list not selected"; } if ($rfreq eq "on") { &rfreq_udri; } else { print hr; print "Reversed frequency list not selected"; } if ($phono eq "on") { &phono_udri; } else { print hr; print "1x1 phonotactics not selected"; } if ($lexa eq "on") { &lexa_udri; } else { print hr; print "1x1 lexical statistics not selected"; } if ($sleng eq "on") { &sleng_udri; } else { print hr; print "Sentence length not selected"; } } else { print "You can enter either a Web page or paste a text. Go back and choose only one option"; } } elsif ($boks ne "") { $ffile="Pasted text"; if ($conc eq "on") { &konc_udri; } else { print hr; print "Concordance not selected"; } if ($lconc eq "on") { &lkonc_udri; } else { print hr; print "Letter concordance not selected"; } if ($freq eq "on") { &freq_udri; } else { print hr; print "Frequency list not selected"; } if ($lfreq eq "on") { &freqchar_udri; } else { print hr; print "Letter frequency list not selected"; } if ($rfreq eq "on") { &rfreq_udri; } else { print hr; print "Reversed frequency list not selected"; } if ($phono eq "on") { &phono_udri; } else { print hr; print "1x1 phonotactics not selected"; } if ($lexa eq "on") { &lexa_udri; } else { print hr; print "1x1 lexical statistics not selected"; } if ($sleng eq "on") { &sleng_udri; } else { print hr; print "Sentence length not selected"; } } else { print "You did not enter anything. Go back and enter a Web page or paste a text"; } sub konc_udri { $z=0; $u=$size; @drkaj=''; @hrkaj=''; $c=1; $a=0; @sorta =""; @stopw = ""; @sorta=split /\s+/, $sort; @stopw=split /\s+/, $stopw; print hr; print "

Your concordance

"; @niska=split /\s+/, $boks; unshift @niska, split(//,' ' x $u); foreach $size ($u..($#niska)) { $viska[$size]=$niska[$size]; $viska[$size]=~s/\]//g; $viska[$size]=~s/\(//g; $viska[$size]=~s/\)//g; $viska[$size]=~s/\[//g; $viska[$size]=~s/\///g; $viska[$size]=~s/\{//g; $viska[$size]=~s/\\//g; $viska[$size]=~s/ \d+ /zzz./g; foreach $a (0..9) { $viska[$size]=~s/$sorta[$a]/0$a./g; $viska[$size]=~s/\.(\d+)\./\.$1/g; } foreach $a (10..$#sorta) { $viska[$size]=~s/$sorta[$a]/$a./g; $viska[$size]=~s/\.(\d+)\./\.$1/g; } if ($niska[$size] eq "-") { } elsif ($niska[$size] eq "+") { } elsif ($niska[$size] eq "=") { } else { push (@drkaj, $viska[$size].xzx.$niska[$size].xzx.(join ' ', @niska[$size+1..$size+$u]).xzx.(join ' ', @niska[$size-$u..$size-1]).xzx.$ffile.xzx.$c); $c++; } } @mrkaj = sort {$a <=> $b} @drkaj; for $z (0 .. $#mrkaj) { $mrkaj[$z]=~s/xzxxzx/xzx xzx/g; $mrkaj[$z]=~s/.+?xzx(.+)xzx(.+)xzx(.+)xzx(.+)xzx(.+)/$3\t\|$1\|\t$2\t$4: $5\n/; $mrkaj[$z]=~s/(.+?)(\|.+?\|)(.+?)/$1\\$2\<\/font>\<\/b>$3/g; $mrkaj[$z]=~s/([.,"!?:;'])(\|\<\/font\>\<\/b\>)/$2$1/g; $mrkaj[$z]=~s/([\)\]\}\\\/])(\|\<\/font\>\<\/b\>)/$2$1/g; $mrkaj[$z]=~s/(\$mrkaj[$z]"; if ($stopw eq "") { print "
$mrkaj[$z]";
     } else {
     foreach $stopw(@stopw) {
     $opw='|'.$stopw.'|';   
     if (index($mrkaj[$z], $opw) > -1) {
     print "
$mrkaj[$z]";
} else {
}
}
}
#
                          }
             }

sub lkonc_udri {
$z=0;
$size=$bb;
$u=$size;
@drkaj='';
@hrkaj='';
@ssorta="";
@ssorta=split /\s+/, $sort;
$c=1;
$j=0; 
$a=0;
$roks=$boks;
$roks=~s/\s+/ \_ /g;
$roks=~s/ \d+ //g;
$roks=~s/\_+/\_/g;
    print hr;
    print "

Your letter concordance

"; print "
_ stands for a single space";
foreach $j (0..$#ssorta) {
$roks=~ s/$ssorta[$j]/$ssorta[$j] /g;
}
@smorta=split /\s+/, $sort;
    $roks=~s/[.,!;:?"'@\(\)\{\}\[\]\=\+\^\*\~\\\/\|\`]//g;
    @niska=split /\s+/, $roks;
    unshift @niska, split(//,' ' x $u);
    foreach $size ($u..($#niska)) {
    $viska[$size]=$niska[$size];
    foreach $a (0..9) {
    $viska[$size]=~s/$smorta[$a]/0$a/g;
 }
    foreach $a (10..$#smorta) {
    $viska[$size]=~s/$smorta[$a]/$a/g;
  }
    push (@drkaj, $viska[$size].xzx.$niska[$size].xzx.(join ' ', @niska[$size+1..$size+$u]).xzx.(join ' ', @niska[$size-$u..$size-1]).xzx.$ffile.xzx.$c);
    $c++;
    }
@mrkaj = sort {$a <=> $b} @drkaj; 
    for $z (0 .. $#mrkaj) {
    $mrkaj[$z]=~s/xzxxzx/xzx xzx/g;
    $mrkaj[$z]=~s/.+xzx(.+)xzx(.+)xzx(.+)xzx(.+)xzx(.+)/$3\t\|$1\|\t$2\t$4: $5\n/;
    $mrkaj[$z]=~s/(.+?\|)(.+?)(\|.+?)/$1\\$2\<\/font>\<\/b>$3/g;
    print "
$mrkaj[$z]";
                          }
             }

sub freq_udri {
print hr;
$voks=$boks;
    print "

Your frequency list

"; { $voks=~s/[.,!;:?"'@\(\)\{\}\[\]\=\+\^\*\~\\\/\|\`]//g; @niska=split/\s+/,$voks; foreach $nisk (@niska) { $count{$nisk}++; } } foreach $nisk (sort {uc($a) cmp uc($b)} (keys %count)) { push(@ojoj, $count{$nisk}.xzx.$nisk.xzx.$count{$nisk}); } @ojoja = reverse sort {$a <=> $b} @ojoj; foreach $ojoja (@ojoja) { $ojoja=~ s/.+?xzx(.+?)xzx(.+?)/$1\t$2/; print "
$ojoja";
}
             }

sub freqchar_udri {
print hr;
$koks=$boks;
@storta="";
@storta=split /\s+/, $sort;
$i=0;
    print "

Your letter frequency list

"; foreach $i (0..$#storta) { $koks=~ s/$storta[$i]/$storta[$i] /g; } $koks=~s/[.,!;:?"'@\(\)\{\}\[\]\=\+\^\*\~\\\/\|\`]//g; $koks=~s/ \d+ / /g; $koks=~s/ \d+ / /g; { @viska=split/\s+/,$koks; foreach $visk (@viska) { $mount{$visk}++; } } foreach $visk (sort {uc($a) cmp uc($b)} (keys %mount)) { push(@ojoh, $mount{$visk}.xzx.$visk.xzx.$mount{$visk}); #print "
$visk $mount{$visk}\n";
  }
@ojojh = reverse sort {$a <=> $b} @ojoh;
foreach $ojojh (@ojojh) {
$ojojh=~ s/.+?xzx(.+?)xzx(.+?)/$1\t$2/;
print "
$ojojh";
}

             }

sub rfreq_udri {
print hr;
$moks = $boks;
    print "

Your reversed frequency list

"; { $moks=~s/[.,!;:?"'@\(\)\{\}\[\]\=\+\^\*\~\\\/\|\`]//g; @giska=split/\s+/,$moks; foreach $gisk (@giska) { $aunt{$gisk}++; } } $a=0; @zabij=""; @sorta =""; @sorta=split /\s+/, $sort; foreach $gisk (sort {uc($a) cmp uc($b)} (keys %aunt)) { $zisk= reverse $gisk; foreach $a (0..9) { $zisk=~s/$sorta[$a]/0$a./g; $zisk=~s/\.(\d+)\./\.$1/g; } foreach $a (10..$#sorta) { $zisk=~s/$sorta[$a]/$a./g; $zisk=~s/\.(\d+)\./\.$1/g; } push (@zabij, $zisk.xzx.$gisk.xzx.$aunt{$gisk}); } @zabij=sort {$a <=> $b} @zabij; foreach $zabij(@zabij) { $zabij=~s/.+xzx(.+)xzx(.+)/$1\t$2/g; print "
$zabij";
}
       }
sub phono_udri {
print hr;
print "

Your 1x1 phonotactics

"; print "
_ stands for a single space";

$drzi=$boks;
$drzi=$drzi.' ';
@storta="";
@storta=split /\s+/, $sort;
$i=0;
    $drzi=~s/ \d+//g;
    $drzi=~s/\s+/ \_ /g;
 foreach $i (0..$#storta) {
 $drzi=~ s/$storta[$i]/$storta[$i] /g;
 }
    $drzi=~s/[.,!;:?"'@+=]//g;
    $drzi=~s/[\(\)\{\}\=\+\^\*\~\\\/\n]//g;
    $drzi=~tr/A-Z/a-z/;

    @tiska=split/\s+/,$drzi;

{
$a==0;
while ($a < $#tiska) {
$spoji= join '-',@tiska[$a], @tiska[$a+1];
push(@biska, $spoji);
$a++
}

foreach $bisk (@biska) {
$funt{$bisk}++;
}
print "
Right context:\n";
foreach $bisk (sort(keys %funt)) {
print "$bisk $funt{$bisk}\n";
}
}
$e=1;
$count=1;
while ($e <= $#tiska) {
$spoj= join '-',@tiska[$e], @tiska[$e-1];
push(@bizka, $spoj);
$e++
}

foreach $bizk (@bizka) {
$vunt{$bizk}++;
}
print "
Left context:\n";
foreach $bizk (sort(keys %vunt)) {
$jao="$bizk $vunt{$bizk}\n";
$jao=~s/(.+)\-(.+)( .+)/$2\-$1$3/g;
print "
$jao";
}

            }

sub lexa_udri {
print hr;
print "

Your 1x1 lexial combinatorics

"; print "
_ stands for a single space";
    $hrzi=$boks;
    $hrzi=~s/[.,!;:?"'@+=]//g;
    $hrzi=~s/[\(\)\{\}\=\+\^\*\~\\\/\n]//g;

    @ttiska=split/\s+/,$hrzi;

{
$a=0;
while ($a < $#ttiska) {
$sspoji= join '-',@ttiska[$a], @ttiska[$a+1];
push(@bbiska, $sspoji);
$a++
}

foreach $bbisk (@bbiska) {
$ffunt{$bbisk}++;
}
print "
Right context:\n";
foreach $bbisk (sort(keys %ffunt)) {
print "
$bbisk $ffunt{$bbisk}\n";
}
}
$e=1;
$ccount=1;
while ($e <= $#ttiska) {
$sspoj= join '-',@ttiska[$e], @ttiska[$e-1];
push(@bbizka, $sspoj);
$e++
}

foreach $bbizk (@bbizka) {
$vvunt{$bbizk}++;
}
print "
Left context:\n";
foreach $bbizk (sort(keys %vvunt)) {
$jjao="$bbizk $vvunt{$bbizk}\n";
$jjao=~s/(.+)\-(.+)( .+)/$2\-$1$3/g;
print "
$jjao";
}

            }

sub sleng_udri {
print hr;
print "

Your Sentence length

"; print hr; $yy=1; $xx=0; $slena=$boks; @pojedna= split /[\.\!\?\"]/, $slena; $x=scalar(@pojedna); foreach $pojedna(@pojedna) { @jojmoj= split/\s+/, $pojedna; $y=scalar(@jojmoj)-1; $yy=$yy+$y; } $xx=$yy/$x; print "
Total of $x sentences, $yy words. Average sentence length: $xx words";
}

sub strana {
    my($tmpfile)="/tmp/tmp.sipkadan.lingo";
    system("/usr/local/bin/lynx -dump -nolists $page > $tmpfile");
    open (DAT,"$tmpfile") or die "Your page is not available";
    while () {
        $boks=$boks. $_;
    }
    $boks=~ s/\_/ /g;
    $boks=~ s/\s+/ /g;
    $boks=~ s/\[INLINE\]/ /g;
    $boks=~ s/\[LINK\]/ /g;
    unlink "$tmpfile";
}