Categories
Uncategorized

Download Consecutive Pages – Python

#!/usr/bin/python
# Download consecutive pages.

import urllib,re

# This is where all the stuff will end up
urlPatt="http://that.webSite.com/with/pageNumber"
dataFile1="dataFile"
dataFile2=".html"
done="FALSE"
counter=1

###### Functions ######

# write the raw HTML codes
def writeFile():
	wDataFile=open(dieFile,"w")
	openWebPage=urllib.urlopen(dieLink)
	pageSrcCode=openWebPage.read()
	wDataFile.write(pageSrcCode)
	wDataFile.close()

# find out if there is a "next" page
def isThereNext():
	rDataFile=open(dieFile,"r")
	for line in rDataFile:
		if "Next Page" in line:
			return "TRUE"
			break

##### Run The Stuff ######

while done=="FALSE":

	# All the links joined up
	joinDataFile=[dataFile1,str(counter),dataFile2]
	joinLink=[urlPatt,str(counter)]
	dieFile="".join(joinDataFile)
	dieLink="".join(joinLink)

	writeFile()
	print dieFile,"<-Done"
	if isThereNext()=="TRUE":
		counter+=1
	elif isThereNext()!="TRUE":
		done="TRUE"
		print "### Finished OK ###"

Leave a Reply

Please log in using one of these methods to post your comment:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s