require 'rubygems' require 'hpricot' require 'net/http' Net::HTTP.version_1_2 # const HttpString = 'http://' TwitterAddress = 'twitter.com' PathOfFavorite = '/favourites?page=' PathOfStatus = '/statuses/' # arg if ARGV.length < 2 then printf("usage: ruby favorite2.rb username password [max_page=1000]\n") abort end $username = ARGV[0] $password = ARGV[1] if ARGV.length < 3 then $max_page = 1000 else $max_page = ARGV[2].to_i end if ARGV.length < 4 then $target = ARGV[0] else $target = ARGV[3] end # parse i = 1 while i<=$max_page begin path = '/'+$target+PathOfFavorite+i.to_s req = Net::HTTP::Get.new(path) req.basic_auth($username, $password) Net::HTTP.start(TwitterAddress){|http| body = http.request(req).body doc = Hpricot(body) table = doc.at('table') begin table.search('tr').each{|tr| begin content = tr.at("td[@class~='content']") screen_name = content.at('strong').at('a').inner_text entry_content = content.at("span[@class~='entry-content']").inner_text entry_content.gsub!(/\n/, '') entry_content.gsub!(/\t/, '') entry_date = content.at("a[@class~='entry-date']") url = entry_date['href'] abbr = content.at('abbr')['title'] printf("\"%s\",\"%s\",\"%s\",\"%s\"\n", screen_name, entry_content, abbr, url) rescue p $! end } rescue p $! end } rescue p $! end i+=1 end