require 'rubygems' require 'hpricot' require 'rexml/document' require 'kconv' require 'cgi' require 'net/http' Net::HTTP.version_1_2 # const FavotterAddress = 'favotter.matope.com' FavotterPath1 = '/user.php?user=' FavotterPath2 = '&mode=fav&page=' YahooApiAddress = 'jlp.yahooapis.jp' YahooApiPath1 = '/KeyphraseService/V1/extract?appid=' YahooApiPath2 = '&sentence=' YahooApiId = 'xxx' # please change to your ID # arg if ARGV.length == 0 then $user = 'yasu_kobayashi' else $user = ARGV[0] end if ARGV.length >= 2 then $max_page = ARGV[1].to_i else $max_page = 10 end # global $status_text = Array.new $phrase_counter = {} $phrase_counter.default=0 # parse i = 1 j = 0 while i<=$max_page Net::HTTP.start(FavotterAddress){|http| response = http.request_get(FavotterPath1+$user+FavotterPath2+i.to_s) doc = Hpricot(response.body) doc.search("div[@class~='bubble']").each{|div| $status_text[j] = div.at('span').inner_text j+=1 } } i+=1 end # analyze Net::HTTP.start(YahooApiAddress){|http| $status_text.each{|status| response = http.request_get(YahooApiPath1+YahooApiId+YahooApiPath2+CGI.escape(Kconv.toutf8(status))) doc = REXML::Document.new(response.body) doc.elements.each("ResultSet/Result") {|result| $phrase_counter[result.elements["Keyphrase"].text]+=1 } } } #output $phrase_counter.sort.to_a.sort{|a, b| ((b[1]<=>a[1])*2+(a[0]<=>b[0]))}.each{|a| printf("%s = %d\n", a[0], a[1]) }