Text to picture
Hey guys, I created a web visualization for w2vec translated text. It basically generates a picture from your text using w2vec representations for words and a chosen palette.
Pick a color palette from above and write some text below (the picture updates when you press space or on paste :) )
Got the 10000 most used english words representations from the word2vec algorithm. The word representations are from polyglot-en.pkl
The color palettes are the most popular color palettes from color-hex as of 30 December 2017.
import pickle
import json
import math
import numpy as np
dict = {}
interesting_words = []
with open("google-10000-english.txt") as f:
interesting_words = f.readlines()
with open('polyglot-en.pkl', 'rb') as f:
data = pickle.load(f)
for idx, it in enumerate(data[1]):
x = np.asarray(list(it))
it = x / np.linalg.norm(x)
for idx, it in enumerate(data[0]):
vec = data[1][idx]
vec = vec * 128 + 128
dict[data[0][idx].lower()] = ["%d" % x for x in vec]
newdict = {}
for word in interesting_words:
word = word.strip()
word = word.lower()
try:
newdict[word] = dict[word]
except:
print(word)
with open('wemb.json', 'w') as outfile:
json.dump(newdict, outfile)
The main workhorse for this is the doImage, it draws the vectors pixel by pixel into the canvas
function doImage( pidx)
{
current_pidx = pidx;
sentence = textarea.value
words = sentence.split(' ')
if(words.length < 2)
return;
wordsprel = []
for(var idx = 0 ; idx < words.length; idx++)
{
var newword =""
var oldword = words[idx].toLowerCase()
for(var vidx= 0 ; vidx < oldword.length; vidx++)
{
c = oldword.charAt(vidx)
if(isLetter(c))
newword += c;
}
if(newword != "")
{
wordsprel.push(newword);
}
}
words = wordsprel
var sentencelen = words.length
var image = []
var unkn = worddict["balloon"]
var times = 8
var timesh = 8
var seed =words.length
for( var idx = 0 ; idx < sentencelen; idx++){
wordcheck = words[idx].toLowerCase()
var wordvec = []
if(wordcheck in worddict)
{
wordvec = worddict[wordcheck]
}
else
{
wordvec = unkn
}
for(var id = 0 ; id < timesh ;id++)
{
image.push(wordvec);
}
}
lab_color_palette = pallets[pidx].colors.map(x => rgb2lab(hexToRgb(x)));
var veclen = image[0].length * times
var thecanvas = document.getElementById('canvas');
thecanvas.width = image.length;
thecanvas.height = veclen;
var imgData = ctx.createImageData(image.length,veclen);
var data = imgData.data;
var k =0
var rand = new Random(seed );
for(var i = 0; i < image.length; i++)
{
for(var j = 0; j< image[0].length; j++)
{
var c = image[i][j]
var l = c * 100/256
index = rand.next()%lab_color_palette.length
var a = lab_color_palette[index][1]
var b = lab_color_palette[index][2]
rgb = lab2rgb([l,a,b])
for (var l = 0 ; l< times; l++)
{
data[k++] = rgb[0];
data[k++] = rgb[1];
data[k++] = rgb[2];
data[k++] = 255;
}
}
}
ctx.putImageData(imgData, 0, 0);
}
I did this small javascript app while I was training my deep neural network to categorize wikipedia comments for a kaggle contest. I found it interesting, hope you like it.