-- Load input from the file named Mary, and call the single
-- field in the record 'line'.
input = load 'mary' as (line);
-- TOKENIZE splits the line into a field for each word.
-- flatten will take the collection of records returned by
-- TOKENIZE and produce a separate record for each one, calling the single
-- field in the record word.
words = foreach input generate flatten(TOKENIZE(line)) as word;
-- Now group them together by each word.
grpd = group words by word;
-- Count them.
cntd = foreach grpd generate group, COUNT(words);
-- Print out the results.
A = load '/home/accure/shamshad/input.txt';
B = foreach A generate flatten(TOKENIZE((chararray)$0)) as word;
C = group B by word;
D = foreach C generate COUNT(B), group; store D into './wordcount';
To run this pig script:
pig -x local /home/accure/shamshad/accure.pig
text= LOAD text.txt USING TextLoader(); tokens=FOREACH text GENERATE FLATTEN (TOKENIZE($0)) as word; wordcount = FOREACH (GROUP tokens BY word) GENERATE group as word, COUNT_STAR($1) as ct;
Ready to start your tutorial with us? That's great! Send us an email and we will get back to you as soon as possible!