Rssdler.rc

From TMB Wiki
Jump to: navigation, search

Sample config file for RSS Downloader

# lines (like this one) starting with # are comments and will be ignored by the config parser
# the only required section (though the program won't do much without others)
# sections are denoted by a line starting with [
# then the name of the section, then ending with ]
# so this is the global section
[global]
# download files to this directory. Defaults to the working directory.
downloadDir = /home/colin/rss/dloaded/

# makes this the 'working directory' of RSSDler. anytime you specify a filename without an absolute path, it will be relative to this 
workingDir = /home/colin/rss/

# if a file is smaller than this, it will not be downloaded. if filesize cannot be determined, this is ignored. 
# Specified in MB. Remember 1024 MB == 1GB
# 0 means no minimum, as does "None" (w/o the quotes)
#minSize = 0

# if a file is larger than this, it will not be downloaded.  Default is None
# though this line is ignored because it starts with a #
#maxSize = 2

# write messages to a log file. 0 is off, 1 is just error messages, 3 is quite wordy, 5 is very, very wordy. (default = 0)
log = 0
# where to write those log messages (default 'downloads.log')
logFile = downloads.log

# like log, only prints to the screen (errors to stderr, other to stdout)
# default 3
verbose = 3

# the place where a cookie file can be found. Default None.
cookieFile = ./cookies.txt

# type of cookie file to be found at above location. default MozillaCookieJar
cookieType = MozillaCookieJar
# other possible types are:
# cookieType = LWPCookieJar
# only works if urllib = False
# cookieType = MSIECookieJar

#how long to wait between checking feeds (in minutes). Default 15.
scanMins = 10

# how long to wait between http requests (in seconds). Default 0
sleepTime = 2

# to exit after scanning all the feeds, or to keep looping. Default False.
#runOnce = False

# set to true to avoid having to install mechanize. side effects described in help. Default False.
urllib = True

# the rest of the global options are described in the help, let's move on to a thread

###################
# The TMB RSS feed could be grabbed as a whole and a variety of expressions run against it 
# but I have chosen to save bandwidth and just grab the parts of the feed I want
###################

[TMB-EM]
# just link to the feed
link = http://themixingbowl.org/rss.xml?cat=6

directory = /home/colin/torrent_queue/TMB/

# Set time to check for new EMs, between 3AM and 9AM on Saturday morning, no point checking outside these times really
checkTime1Day   = Sat
checkTime1Start = 3
checkTime1Stop  = 9

# Now, without any of the download<x> or regEx options (detailed below)
# every item in the rss feed will be downloaded, provided that it has not previously been downloaded
# all the regular expression should be specified in lower case 
# (except for character classes and other special regular expression characters, if you know what that means)
# as the string that it is searched against is set to lower case.
# the default value, None, makes RET ignored
# regExTrue = None


# Starting with regExTrue (RET)

# I only want the MP2 versions

regExTrue = mp2


# but we want to make sure we don't download anything with nrg in the name or ccd
# because those are undesirable formats, but we want to make sure to not match
# a name that may have those as a substring e.g. enrgy 
# (ok, not a great example, come up with something better and I'll include it)
# REF from now on (\b indicates a word boundary)
# regExFalse = (\bnrg\b|\bccd\b)
# the default value, which means it will be ignored
# regExFalse = None

# at this point, as long as the file gets a positive hit in RET and no hit in REF, the file will be downloaded
# equivalently said, RET and REF are necessary and sufficient conditions for a download.
# lengthy expressions can be constructed to deal with every combination of things you want, but there is 
# a looping facility to allow us to get more fine grained control over the items we want to grab
# without having to have hundreds of characters on a single line, which of course gets rather unreadable

# making use of this looping facility makes RET and REF neccessary (though that can be bypassed too, more later) conditions
# however, they are no longer sufficient....
# download<x> is like regExTrue, but begins the definition of an 'item' and we can associate further actions with it
# if we so choose
# put a non-negative integer where <x> goes
#download1 = ubuntu
# but say we love ubuntu, and want to always grab everything that mentions it
# so we want to ignore regExTrue, this 'bypasses' RET when set to False. Default True.
#download1True = False

# we could also bypass REF. but we really don't like nrg, but we'll deal with ccd's, just for ubuntu
# to be clear, download<x>False is a mixed type option, taking both True, False for dealing with the global REF 
# or a string (like here) to specify what amounts to a 'localized REF', which effectively says False to the global REF
# while at the same time specifying the local REF
#download1False = \bnrg\b

# we don't want to download things like howto, md5 files, etc, so we can set a minSize (MB)
# this overrides the global/thread minSize when not set to None
# Default None. works like thread-based minSize. a maxSize option is also available
#download1MinSize = 10
#download1MaxSize = 750

# and finally, we can put our ubuntu stuff in a special folder, if we choose
#download1Dir = /home/user/ubuntustuff

[TMB-Readers]
# just link to the feed
link = http://themixingbowl.org/rss.xml?cat=14

directory = /home/colin/torrent_queue/TMB/

# I like just about everything that these guys do so I will grab anything of theirs that goes up
regExTrue = xill|plugg|bbjunky|djp|mook|glint