This code uses Reddit’s own API, praw, to download all the post titles, karma counts, position, and reddit id of each post within a sub reddit.
This example collects all the info from the top 100 posts in r/technology at on hour intervals and then saves it to a file.
import praw from time import gmtime, strftime from threading import Timer # this is the sub-reddit we want to work with str_subreddit = 'technology' # set the timer and create a function def _getthePosts(): # set the user agent for reddit r = praw.Reddit(user_agent='_cool_stuff_') # log in as a user r.login('myUserName','123456') # get the submissions for android praw_subs = r.get_subreddit(str_subreddit).get_top(limit=100) # list where all the the data will do list_sub_data = [] # counter to see in which possition it is int_sub = 1 # iterate through submissions for each_sub in praw_subs: # this will be the output line str_out = '%s\t%d\t%d\t"%s"'% (each_sub.id, # reddit's id int_sub, # possition each_sub.score, # karma each_sub.title) # title # append to above list list_sub_data.append(str_out.encode('utf-8')) int_sub+=1 # get the time the data was taken time_now = strftime("%w_%Y%M%d_%H%M%S", gmtime()) print time_now # create file and name according to time f_o = open('%s.txt'%time_now,'w') f_o.write('\n'.join(list_sub_data)) f_o.close() # set the timer for one hour and run function again Timer(3600,_getthePosts).start() _getthePosts()