#!c:/Perl/bin/perl use CGI qw/:standard/; # Turbo Lingo - written by Danko Sipka, 2001 print "Content-type: text/html; charset=windows-1250\n\n"; print header, start_html('Turbo Lingo - Your Results'), h1('Your Results:'); $q = new CGI; $page = $q->param('page'); $boks = $q->param('boks'); $conc = $q->param('conc'); $freq = $q->param('freq'); $rfreq = $q->param('rfreq'); $lfreq = $q->param('lfreq'); $lconc = $q->param('lconc'); $phono = $q->param('phono'); $lexa = $q->param('lexa'); $sleng = $q->param('sleng'); $size = $q->param('b'); $bb = $q->param('bb'); $sort = $q->param('sort'); $stopw = $q->param('stopw'); if ($page ne "http://") { if ($boks eq "") { &strana; $ffile=$page; if ($conc eq "on") { &konc_udri; } else { print hr; print "Concordance not selected"; } if ($lconc eq "on") { &lkonc_udri; } else { print hr; print "Letter concordance not selected"; } if ($freq eq "on") { &freq_udri; } else { print hr; print "Frequency list not selected"; } if ($lfreq eq "on") { &freqchar_udri; } else { print hr; print "Letter frequency list not selected"; } if ($rfreq eq "on") { &rfreq_udri; } else { print hr; print "Reversed frequency list not selected"; } if ($phono eq "on") { &phono_udri; } else { print hr; print "1x1 phonotactics not selected"; } if ($lexa eq "on") { &lexa_udri; } else { print hr; print "1x1 lexical statistics not selected"; } if ($sleng eq "on") { &sleng_udri; } else { print hr; print "Sentence length not selected"; } } else { print "You can enter either a Web page or paste a text. Go back and choose only one option"; } } elsif ($boks ne "") { $ffile="Pasted text"; if ($conc eq "on") { &konc_udri; } else { print hr; print "Concordance not selected"; } if ($lconc eq "on") { &lkonc_udri; } else { print hr; print "Letter concordance not selected"; } if ($freq eq "on") { &freq_udri; } else { print hr; print "Frequency list not selected"; } if ($lfreq eq "on") { &freqchar_udri; } else { print hr; print "Letter frequency list not selected"; } if ($rfreq eq "on") { &rfreq_udri; } else { print hr; print "Reversed frequency list not selected"; } if ($phono eq "on") { &phono_udri; } else { print hr; print "1x1 phonotactics not selected"; } if ($lexa eq "on") { &lexa_udri; } else { print hr; print "1x1 lexical statistics not selected"; } if ($sleng eq "on") { &sleng_udri; } else { print hr; print "Sentence length not selected"; } } else { print "You did not enter anything. Go back and enter a Web page or paste a text"; } sub konc_udri { $z=0; $u=$size; @drkaj=''; @hrkaj=''; $c=1; $a=0; @sorta =""; @stopw = ""; @sorta=split /\s+/, $sort; @stopw=split /\s+/, $stopw; print hr; print "
$mrkaj[$z]";
} else {
foreach $stopw(@stopw) {
$opw='|'.$stopw.'|';
if (index($mrkaj[$z], $opw) > -1) {
print "$mrkaj[$z]";
} else {
}
}
}
#
}
}
sub lkonc_udri {
$z=0;
$size=$bb;
$u=$size;
@drkaj='';
@hrkaj='';
@ssorta="";
@ssorta=split /\s+/, $sort;
$c=1;
$j=0;
$a=0;
$roks=$boks;
$roks=~s/\s+/ \_ /g;
$roks=~s/ \d+ //g;
$roks=~s/\_+/\_/g;
print hr;
print "Your letter concordance
";
print "_ stands for a single space";
foreach $j (0..$#ssorta) {
$roks=~ s/$ssorta[$j]/$ssorta[$j] /g;
}
@smorta=split /\s+/, $sort;
$roks=~s/[.,!;:?"'@\(\)\{\}\[\]\=\+\^\*\~\\\/\|\`]//g;
@niska=split /\s+/, $roks;
unshift @niska, split(//,' ' x $u);
foreach $size ($u..($#niska)) {
$viska[$size]=$niska[$size];
foreach $a (0..9) {
$viska[$size]=~s/$smorta[$a]/0$a/g;
}
foreach $a (10..$#smorta) {
$viska[$size]=~s/$smorta[$a]/$a/g;
}
push (@drkaj, $viska[$size].xzx.$niska[$size].xzx.(join ' ', @niska[$size+1..$size+$u]).xzx.(join ' ', @niska[$size-$u..$size-1]).xzx.$ffile.xzx.$c);
$c++;
}
@mrkaj = sort {$a <=> $b} @drkaj;
for $z (0 .. $#mrkaj) {
$mrkaj[$z]=~s/xzxxzx/xzx xzx/g;
$mrkaj[$z]=~s/.+xzx(.+)xzx(.+)xzx(.+)xzx(.+)xzx(.+)/$3\t\|$1\|\t$2\t$4: $5\n/;
$mrkaj[$z]=~s/(.+?\|)(.+?)(\|.+?)/$1\\$2\<\/font>\<\/b>$3/g;
print "$mrkaj[$z]";
}
}
sub freq_udri {
print hr;
$voks=$boks;
print "Your frequency list
";
{
$voks=~s/[.,!;:?"'@\(\)\{\}\[\]\=\+\^\*\~\\\/\|\`]//g;
@niska=split/\s+/,$voks;
foreach $nisk (@niska) {
$count{$nisk}++;
}
}
foreach $nisk (sort {uc($a) cmp uc($b)} (keys %count)) {
push(@ojoj, $count{$nisk}.xzx.$nisk.xzx.$count{$nisk});
}
@ojoja = reverse sort {$a <=> $b} @ojoj;
foreach $ojoja (@ojoja) {
$ojoja=~ s/.+?xzx(.+?)xzx(.+?)/$1\t$2/;
print "$ojoja";
}
}
sub freqchar_udri {
print hr;
$koks=$boks;
@storta="";
@storta=split /\s+/, $sort;
$i=0;
print "Your letter frequency list
";
foreach $i (0..$#storta) {
$koks=~ s/$storta[$i]/$storta[$i] /g;
}
$koks=~s/[.,!;:?"'@\(\)\{\}\[\]\=\+\^\*\~\\\/\|\`]//g;
$koks=~s/ \d+ / /g;
$koks=~s/ \d+ / /g;
{
@viska=split/\s+/,$koks;
foreach $visk (@viska) {
$mount{$visk}++;
}
}
foreach $visk (sort {uc($a) cmp uc($b)} (keys %mount)) {
push(@ojoh, $mount{$visk}.xzx.$visk.xzx.$mount{$visk});
#print "$visk $mount{$visk}\n";
}
@ojojh = reverse sort {$a <=> $b} @ojoh;
foreach $ojojh (@ojojh) {
$ojojh=~ s/.+?xzx(.+?)xzx(.+?)/$1\t$2/;
print "$ojojh";
}
}
sub rfreq_udri {
print hr;
$moks = $boks;
print "Your reversed frequency list
";
{
$moks=~s/[.,!;:?"'@\(\)\{\}\[\]\=\+\^\*\~\\\/\|\`]//g;
@giska=split/\s+/,$moks;
foreach $gisk (@giska) {
$aunt{$gisk}++;
}
}
$a=0;
@zabij="";
@sorta ="";
@sorta=split /\s+/, $sort;
foreach $gisk (sort {uc($a) cmp uc($b)} (keys %aunt)) {
$zisk= reverse $gisk;
foreach $a (0..9) {
$zisk=~s/$sorta[$a]/0$a./g;
$zisk=~s/\.(\d+)\./\.$1/g;
}
foreach $a (10..$#sorta) {
$zisk=~s/$sorta[$a]/$a./g;
$zisk=~s/\.(\d+)\./\.$1/g;
}
push (@zabij, $zisk.xzx.$gisk.xzx.$aunt{$gisk});
}
@zabij=sort {$a <=> $b} @zabij;
foreach $zabij(@zabij) {
$zabij=~s/.+xzx(.+)xzx(.+)/$1\t$2/g;
print "$zabij";
}
}
sub phono_udri {
print hr;
print "Your 1x1 phonotactics
";
print "_ stands for a single space";
$drzi=$boks;
$drzi=$drzi.' ';
@storta="";
@storta=split /\s+/, $sort;
$i=0;
$drzi=~s/ \d+//g;
$drzi=~s/\s+/ \_ /g;
foreach $i (0..$#storta) {
$drzi=~ s/$storta[$i]/$storta[$i] /g;
}
$drzi=~s/[.,!;:?"'@+=]//g;
$drzi=~s/[\(\)\{\}\=\+\^\*\~\\\/\n]//g;
$drzi=~tr/A-Z/a-z/;
@tiska=split/\s+/,$drzi;
{
$a==0;
while ($a < $#tiska) {
$spoji= join '-',@tiska[$a], @tiska[$a+1];
push(@biska, $spoji);
$a++
}
foreach $bisk (@biska) {
$funt{$bisk}++;
}
print "Right context:\n";
foreach $bisk (sort(keys %funt)) {
print "$bisk $funt{$bisk}\n";
}
}
$e=1;
$count=1;
while ($e <= $#tiska) {
$spoj= join '-',@tiska[$e], @tiska[$e-1];
push(@bizka, $spoj);
$e++
}
foreach $bizk (@bizka) {
$vunt{$bizk}++;
}
print "Left context:\n";
foreach $bizk (sort(keys %vunt)) {
$jao="$bizk $vunt{$bizk}\n";
$jao=~s/(.+)\-(.+)( .+)/$2\-$1$3/g;
print "$jao";
}
}
sub lexa_udri {
print hr;
print "Your 1x1 lexial combinatorics
";
print "_ stands for a single space";
$hrzi=$boks;
$hrzi=~s/[.,!;:?"'@+=]//g;
$hrzi=~s/[\(\)\{\}\=\+\^\*\~\\\/\n]//g;
@ttiska=split/\s+/,$hrzi;
{
$a=0;
while ($a < $#ttiska) {
$sspoji= join '-',@ttiska[$a], @ttiska[$a+1];
push(@bbiska, $sspoji);
$a++
}
foreach $bbisk (@bbiska) {
$ffunt{$bbisk}++;
}
print "Right context:\n";
foreach $bbisk (sort(keys %ffunt)) {
print "$bbisk $ffunt{$bbisk}\n";
}
}
$e=1;
$ccount=1;
while ($e <= $#ttiska) {
$sspoj= join '-',@ttiska[$e], @ttiska[$e-1];
push(@bbizka, $sspoj);
$e++
}
foreach $bbizk (@bbizka) {
$vvunt{$bbizk}++;
}
print "Left context:\n";
foreach $bbizk (sort(keys %vvunt)) {
$jjao="$bbizk $vvunt{$bbizk}\n";
$jjao=~s/(.+)\-(.+)( .+)/$2\-$1$3/g;
print "$jjao";
}
}
sub sleng_udri {
print hr;
print "Your Sentence length
";
print hr;
$yy=1;
$xx=0;
$slena=$boks;
@pojedna= split /[\.\!\?\"]/, $slena;
$x=scalar(@pojedna);
foreach $pojedna(@pojedna) {
@jojmoj= split/\s+/, $pojedna;
$y=scalar(@jojmoj)-1;
$yy=$yy+$y;
}
$xx=$yy/$x;
print "Total of $x sentences, $yy words. Average sentence length: $xx words";
}
sub strana {
my($tmpfile)="/tmp/tmp.sipkadan.lingo";
system("/usr/local/bin/lynx -dump -nolists $page > $tmpfile");
open (DAT,"$tmpfile") or die "Your page is not available";
while () {
$boks=$boks. $_;
}
$boks=~ s/\_/ /g;
$boks=~ s/\s+/ /g;
$boks=~ s/\[INLINE\]/ /g;
$boks=~ s/\[LINK\]/ /g;
unlink "$tmpfile";
}