Categories
Uncategorized

Obsolete New Page Search – Python

Obsolete stuff that will tell you if there is a “Next” page by looking for the string “next” in the HTML.

#!/usr/bin/python
# go to next page

import urllib,re

rawData="rawHtml.txt"
findString="rel=\"next\""
webSite="http://any.siteYou.want/"

# downloads and saves HTML
def saveHTML():
	wRawData=open(rawData,"w")
	locPage=webSite
	accPag=urllib.urlopen(locPage)
	wPageSrc=accPag.read()
	accPag.close()
	wRawData.write(wPageSrc)
	wRawData.close()

# then finds the line with the link
def getLink():
	rRawData=open(rawData,"r")
	for line in rRawData:
		if findString in line:
			return line
	rRawData.close()

# removes all the crap
def printLink():
	clearedStuff1=re.sub("^.*page=","",dasLink)
	clearedStuff2=re.sub("\" title.*$","",clearedStuff1)
	print clearedStuff2

saveHTML()
dasLink=getLink()
printLink()

A pretty useless loop…

#!/usr/bin/python
# Next page number

this="http://the.web.site/page/number_"
pagNum=".html"

for x in range(1,6):
	newLink=[this,str(x),pagNum]
	dasLink="".join(newLink)
	print dasLink

Finally, just a simple “If it’s there then we cool”

#!/usr/bin/python
# Another page? Cool!

rawData="rawHtml.txt"
findString="rel=\"next\""

def getLink():
	rRawData=open(rawData,"r")
	for line in rRawData:
		if findString in line:
			return "TRUE"
	rRawData.close()

if getLink()=="TRUE":
	print "It be TRUE, yo"

Leave a Reply

Please log in using one of these methods to post your comment:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Google photo

You are commenting using your Google account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s