/**
*
Exercise originally created for Data visualization workshop taught at
* HEAD Geneva, October 2010. For more information visit the workshop wiki at:
* http://learn.postspectacular.com/Workshop:HEADGeneva
*
* This small application displays a histogram of the top 500 most frequently
* used words of an arbitrarily chosen piece of text. The visualization is using
* the ZoomLens class to make better use of the available screen space and users
* can zoom in to any area by moving the mouse vertically. Furthermore, this was
* an exercise in object-oriented design and we can also re-sort the visualization
* to instead show the longest words used or sort them alphabetically. Finally,
* the text sample can be chosen using a standard file chooser dialog box.
*
* Usage:
*
* - f: sort by word length
* - l: sort by word frequency
* - r: load new text
*
*
* The text bundled with this demo is "The Art of War" by Sunzi:
* http://www.gutenberg.org/ebooks/132
*/
/*
* Copyright (c) 2011 Karsten Schmidt
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* http://creativecommons.org/licenses/LGPL/2.1/
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
import java.awt.FileDialog;
import toxi.util.*;
import toxi.math.*;
List uniqueWords;
ZoomLensInterpolation zoomLens = new ZoomLensInterpolation();
float smoothStep=0.2;
int gap=10;
int labelGap=150;
int wordLimit=500;
HistogramSorter sortFunction;
void setup() {
size(680,382);
initHistogram();
}
void draw() {
float maxBarHeight=height-labelGap-20;
float normFactor=maxBarHeight/sortFunction.getMetric(uniqueWords.get(0));
float focalPos=map(mouseX,gap,width-gap,0.0,1.0);
zoomLens.setLensPos(focalPos,smoothStep);
zoomLens.setLensStrength(map(mouseY,0,height,-1,+1),smoothStep);
int focalX=(int)zoomLens.interpolate(gap, width-gap, focalPos);
background(255);
noStroke();
textAlign(RIGHT);
fill(0);
int space=10;
int maxWords=min(uniqueWords.size(),wordLimit);
for(int i=0; i3) {
textSize(ts);
pushMatrix();
translate(barCenter+ts/4,height-labelGap);
rotate(-HALF_PI);
text("("+sortFunction.getMetric(w)+") "+w.word,0,0);
popMatrix();
}
}
}
void initHistogram() {
// use toxiclibs FileUtils to display a file chooser
String fileName=null;
fileName=FileUtils.showFileDialog(
frame,
"Choose a text file...",
dataPath(""),
new String[]{".txt",".txt.gz"},
FileDialog.LOAD
);
// if user pressed cancel, use default file
if (fileName==null) {
fileName="artofwar.txt.gz";
}
String[] lines= loadStrings(fileName);
int totalWordCount =0;
HashMap histogram=new HashMap();
for(int i=0; i();
for(String w : histogram.keySet()) {
HistogramWord hw=new HistogramWord(w,histogram.get(w));
uniqueWords.add(hw);
}
setSortFunction(new FrequencyComparator());
for(int i=0; i<1000; i++) {
println(uniqueWords.get(i).word.length());
}
}
void keyPressed() {
if (key=='f') {
setSortFunction(new FrequencyComparator());
}
if (key=='l') {
setSortFunction(new WordLengthComparator());
}
if (key=='r') {
initHistogram();
}
}
// DONT REPEAT YOURSELF!
void setSortFunction(HistogramSorter s) {
sortFunction=s;
Collections.sort(uniqueWords,sortFunction);
}