#!/usr/bin/perl $russianDictFileName = "ruen.dic"; open (MAILFILE, "<$russianDictFileName") or die "Can't open file!"; $original =""; $temp = ""; $counter = 0; # read in the file in one go $size=(stat($russianDictFileName))[7]; read MAILFILE, $original, $size; $output = ""; # $original =~ s/(.+)\*(.+)\n/$1:$2;\n/ig; #$original =~ s/(.+)\*(.+)\n.+\n.+\n.+\n.+\n.+\n.+\n.+\n.+\n.+\n.+\n.+\n.+\n.+\n.+\n.+\n/$1:$2;\n/ig; use Encode; #my $string = "ืำลอ"; #Encode::from_to($string, "koi8_r", "Unicode"); #binmode STDOUT,':utf8'; #print length($string)," chars '$string'\n"; #my $ord = join("\n", map( ord($_),split(//,$string))); #print "$ord"; #printf("\n Hmm " .sprintf('%X',15)); @lines = split('\n', $original); open (FINAL_OUT, ">ruen.js") or die "Can't open file!"; print FINAL_OUT "CONTSTR=\""; $currEntry=""; $cou = 0; foreach $currLine (@lines) { # TODO: THIS causes only each 10th entry to be used, thus # making it possible to stay below Langmixers 3000 entry limit if ($cou <= 10 ) { $cou++; } else { $cou = 0; #print "\n>$currLine\n"; #$currLine =~ m/^(.)*$/gi; @tempWords = split('\*', $currLine); $currRussian = @tempWords[0]; $currEnglish = @tempWords[1]; $currEnglish =~ s/;/ /gi; #print("CURR RUSSIAN: $currRussian"); #my $currWord = "ืำลอ"; Encode::from_to($currRussian, "koi8_r", "Unicode"); #print(", {$currRussian}\n"); @currWordList = split( '', $currRussian); $currEntry = ""; foreach $nextChar (@currWordList) { # $nextChar = "ื"; $nextChar = sprintf('%X',ord($nextChar)); $numZeros = 4 -length($nextChar); for(; $numZeros>0; $numZeros--){ $nextChar = "0" . $nextChar; } $currEntry = $currEntry . "\\u" .$nextChar; # printf("Char [$nextChar]\n"); } print FINAL_OUT "$currEntry:$currEnglish;"; }# END 100 COUNT FOR } # print the last closing quote print FINAL_OUT "\""; close FINAL_OUT;