# character set for our problem Alph = c("!", "*", "(", ")", "-", "[", "]", ".", ",", ";", "\"", "?", "’", "/", ":", "\n", " ", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "A", "b", "B", "c", "C", "d", "D", "e", "E", "f", "F", "g", "G", "h", "H", "i", "I", "j", "J", "k", "K", "l", "L", "m", "M", "n", "N", "o", "O", "p", "P", "q", "Q", "r", "R", "s", "S", "t", "T", "u", "U", "v", "V", "w", "W", "x", "X", "y", "Y", "z", "Z") nc = length(Alph) vec = 1:nc names(vec) = Alph # read War and Peace and turn it to numbers con = url("http://www.tau.ac.il/~saharon/Resampling2012/pg2600.txt") txt = readLines (con,n=100000) txt = txt[nchar(txt)>0] txt = paste(txt,sep=" ", collapse=" ") res = strsplit(txt, NULL)[[1]] res = res[res%in%Alph] nums = vec[res] # nums contains the index in Alph of every character # statistics for single letters singles = numeric(nc) for (i in 1:nc) singles[i] = sum(nums==i)/length(nums) # replace zeros with small numbers singles=singles *(1-10^(-7)) singles=singles + (10^(-7)/nc) # show some results names(singles) = Alph singles[singles > 0.01] # statistics for pairs of letters N = length(nums) pairs = matrix(0,ncol=nc,nrow=nc) for (i in 1:(N-1)) pairs[nums[i],nums[i+1]]=pairs[nums[i],nums[i+1]]+1 pairs = pairs/(N-1) pairs=pairs *(1-10^(-6)) pairs=pairs + (10^(-6)/nc^2) # Pairs that appear more than 1% of the time rownames(pairs) = colnames(pairs)=Alph for (i in 1:nc)for(j in 1:nc) if (pairs[i,j]>0.01) cat (Alph[i],Alph[j]," ",pairs[i,j],sep="","\n") # our cipher! encoded.string = ":7)2wHF8.NR\nGwF8.VR’wi7h\n2w8.p2wH.\n2.hR’7.2G7Fa.c.VR\n2.iR.0’7h.-G2FG78.wRi.iR.*7G)F2.z)\n,.nz2.2P)p.izGi.\n2w.HR.p)P2F.GAi27.iz2\na.nz2.?RRH.)F.RAi.)wi2772H. )iz.iz2)7.0Rw2Fa.5R.p2i.)i.02. )iz.-G2FG7,.nz2.wR0p2.97’i’F.uGiz.iRpH.hR’.-G2FG7. GF.G\n0)i)R’F/.cA.)i. 272.FR8.)i. GF.G.?7)2PR’F.AG’pi8.TwH.?7)2PR’Fph.zGiz.-G2FG7.GwF 27SH.)i,.u2728.’wH27.p2GP2.RA.97’i’F.GwH.iz2.72Fi.D.:R7.97’i’F.)F.Gw.zRwR’7G0p2.\nGwa.5R.G72.iz2h.Gpp8.Gpp.zRwR’7G0p2.\n2w.D.-R\n2.c.iR.F*2Gb.)w.-G2FG7SF.A’w27Gp,.u2. GF.\nh.A7)2wH8.AG)izA’p.GwH.f’Fi.iR.\n2/.9’i.97’i’F.FGhF.z2. GF.G\n0)i)R’Fa.TwH.97’i’F.)F.Gw.zRwR’7G0p2.\nGw,.u2.zGiz.07R’?zi.\nGwh.VG*i)P2F.zR\n2.iR.NR\n2.WzRF2.7GwFR\nF.H)H.iz2.?2w27Gp.VRAA27F.A)pp/.I)H.iz)F.)w.-G2FG7.F22\n.G\n0)i)R’Fx.Wz2w.izGi.iz2.*RR7.zGP2.V7)2H8.-G2FG7.zGiz. 2*i/.T\n0)i)Rw.FzR’pH.02.\nGH2.RA.Fi27w27.Fi’AA/.r2i.97’i’F.FGhF.z2. GF.G\n0)i)R’Fa.TwH.97’i’F.)F.Gw.zRwR’7G0p2.\nGw,.rR’.Gpp.H)H.F22.izGi.Rw.iz2.o’*27VGp.c.iz7)V2.*72F2wi2H.z)\n.G.b)w?ph.V7R w8.Wz)Vz.z2.H)H.iz7)V2.72A’F2/. GF.iz)F.G\n0)i)Rwx.r2i.97’i’F.FGhF.z2. GF.G\n0)i)R’Fa.TwH8.F’728.z2.)F.Gw.zRwR’7G0p2.\nGw,.c.F*2Gb.wRi.iR.H)F*7RP2. zGi.97’i’F.F*Rb28.9’i.z272.c.G\n.iR.F*2Gb. zGi.c.HR.bwR ,.rR’.Gpp.H)H.pRP2.z)\n.RwV28.wRi. )izR’i.VG’F2/.WzGi.VG’F2. )izzRpHF.hR’.iz2w8.iR.\nR’7w.AR7.z)\nx.J.f’H?\n2wiE.izR’.G7i.Ap2H.iR.07’i)Fz.02GFiF8.TwH.\n2w.zGP2.pRFi.iz2)7.72GFRw,.92G7. )iz.\n2a.Mh.z2G7i.)F.)w.iz2.VRAA)w.iz272. )iz.-G2FG78.TwH.c.\n’Fi.*G’F2.i)pp.)i.VR\n2.0GVb.iR.\n2,..W)pp)G\n.5zGb2F*2G72.YA7R\n.v’p)’F.-G2FG7.eDZ!," cyp = strsplit(encoded.string,NULL)[[1]] cyp = vec[cyp] names(cyp)=NULL n = length(cyp) # example of printing a deciphering # create a random permutation perm = sample (nc) # decipher according to permutation # note that "newline" is one of the characters, so output may be a few lines dec = paste(Alph[perm[cyp]],sep="",collapse="") cat (dec,"\n") ds.best = -10^7 perm.best =perm ds = log(pairs) ss = log(singles) all.pairs = cbind(perm[cyp][-n],perm[cyp][-1]) score.perm = sum(ds[all.pairs])#-ss[perm[cyp][-n]]) accs=0 for (i in 1:1000000){ tmp = sample (nc,2) newperm=perm newperm[tmp]=perm[sample(tmp)] newperm[tmp]=perm[sample(tmp)] all.pairs = cbind(newperm[cyp][-n],newperm[cyp][-1]) score.newperm = sum(ds[all.pairs])#-ss[newperm[cyp][-n]]) if (score.newperm>score.perm || runif(1)ds.best){ ds.best=score.perm perm.best=perm } if (i %% 100000 ==0){ dec = paste(Alph[perm[cyp]],sep="",collapse="") cat (dec,"\n") cat (i,score.perm, accs,"\n") } }