Friday, July 8, 2011

Count number of repeating characters in powershell, ugly and dirty way

Recently I was asked to write a script to count the number of times each letter appears in a text file. This script ignores case sensitivity. I am working with a copy of War and Peace from Project Gutenberg. It reminded me of the MapReduce/Hadoop tutorial I'd followed on the Apache website to count the number of words in a collection of books. Here's the script, please suggest a better approach if you dare:

#Artem Ervits - version 0.0.1
#this script counts a number of times each character appears in a file, case insensitive

$names = Get-Content "C:\Users\are9004\Desktop\wnp.txt"

$a = 0; $b = 0; $c = 0; $d = 0; $e = 0; $f = 0; $g = 0; $h = 0; $i = 0;
$j = 0; $k = 0; $l = 0; $m = 0; $n = 0; $o = 0; $p = 0; $q = 0; $r = 0;
$s = 0; $t = 0; $u = 0; $v = 0; $w = 0; $x = 0; $y = 0; $z = 0

foreach($name in $names)
{
for($i = 0; $i -lt $name.Length; $i++)
{
if($name[$i] -contains 'a')
{
$a++;
}
elseif($name[$i] -contains 'b')
{
$b++;
}
elseif($name[$i] -contains 'c')
{
$c++;
}
elseif($name[$i] -contains 'd')
{
$d++;
}
elseif($name[$i] -contains 'e')
{
$e++;
}
elseif($name[$i] -contains 'f')
{
$f++;
}
elseif($name[$i] -contains 'g')
{
$g++;
}
elseif($name[$i] -contains 'h')
{
$h++;
}
elseif($name[$i] -contains 'i')
{
$i++;
}
elseif($name[$i] -contains 'j')
{
$j++;
}
elseif($name[$i] -contains 'k')
{
$k++;
}
elseif($name[$i] -contains 'l')
{
$l++;
}
elseif($name[$i] -contains 'm')
{
$m++;
}
elseif($name[$i] -contains 'n')
{
$n++;
}
elseif($name[$i] -contains 'o')
{
$o++;
}
elseif($name[$i] -contains 'p')
{
$p++;
}
elseif($name[$i] -contains 'q')
{
$q++;
}
elseif($name[$i] -contains 'r')
{
$r++;
}
elseif($name[$i] -contains 's')
{
$s++;
}
elseif($name[$i] -contains 't')
{
$t++;
}
elseif($name[$i] -contains 'u')
{
$u++;
}
elseif($name[$i] -contains 'v')
{
$v++;
}
elseif($name[$i] -contains 'w')
{
$w++;
}
elseif($name[$i] -contains 'x')
{
$x++;
}
elseif($name[$i] -contains 'y')
{
$y++;
}
elseif($name[$i] -contains 'z')
{
$z++;
}
}
}

if($a -ne 0)
{
Write-Host "a: " $a
}
if($b -ne 0)
{
Write-Host "b: " $b
}
if($c -ne 0)
{
Write-Host "c: " $c
}
if($d -ne 0)
{
Write-Host "d: " $d
}
if($e -ne 0)
{
Write-Host "e: " $e
}
if($f -ne 0)
{
Write-Host "f: " $f
}
if($g -ne 0)
{
Write-Host "g: " $g
}
if($h -ne 0)
{
Write-Host "h: " $h
}
if($i -ne 0)
{
Write-Host "i: " $i
}
if($j -ne 0)
{
Write-Host "j: " $j
}
if($k -ne 0)
{
Write-Host "k: " $k
}
if($l -ne 0)
{
Write-Host "l: " $l
}
if($m -ne 0)
{
Write-Host "m: " $m
}
if($n -ne 0)
{
Write-Host "n: " $n
}
if($o -ne 0)
{
Write-Host "o: " $o
}
if($p -ne 0)
{
Write-Host "p: " $p
}
if($q -ne 0)
{
Write-Host "q: " $q
}
if($r -ne 0)
{
Write-Host "r: " $r
}
if($s -ne 0)
{
Write-Host "s: " $s
}
if($t -ne 0)
{
Write-Host "t: " $t
}
if($u -ne 0)
{
Write-Host "u: " $u
}
if($v -ne 0)
{
Write-Host "v: " $v
}
if($w -ne 0)
{
Write-Host "w: " $w
}
if($x -ne 0)
{
Write-Host "x: " $x
}
if($y -ne 0)
{
Write-Host "y: " $y
}
if($z -ne 0)
{
Write-Host "z: " $z
}


and result:

a: 202517
b: 33301
c: 52720
d: 109627
e: 306370
f: 51143
g: 47026
h: 167025
i: 59
j: 2574
k: 19109
l: 88239
m: 51805
n: 135728
o: 185395
p: 44332
q: 2303
r: 142130
s: 140990
t: 204563
u: 65295
v: 23465
w: 59197
x: 3769
y: 46263
z: 1795

This is only English dictionary with 26 characters. Enjoy!
Post a Comment