Categories
Uncategorized

Create List of Thread ID Numbers – Python

#!/usr/bin/python
# secSorter.py

import os,re

# Variables
theDir=os.listdir(".")
comFile="sectionThreads.txt"
comFileTmp=comFile+".tmp"

# html to lines
def getLines():
	wComFileTmp=open(comFileTmp,"w")
	for uFile in theDir:
		if uFile.endswith(".html"):
			rData=open(uFile,"r")
			for line in rData:
				if "whoposted" in line:
					wComFileTmp.write(line)
			print uFile,"- Done"
	wComFileTmp.close()


# Lines to thread ids
def slimLines():
	rComFileTmp=open(comFileTmp,"r")
	wComFile=open(comFile,"w")

	subPat=""
	patt1="\" onclick.*$"
	patt2="[^\d\n]"
	for line in rComFileTmp:
		skim_1=re.sub(patt1,subPat,line)
		skim_2=re.sub(patt2,subPat,skim_1)
		wComFile.write(skim_2)
		skim_2	
	wComFile.close()
	rComFileTmp.close()
	os.remove(comFileTmp)
	print "   :: Finished OK ::   "


##### Run The Stuff ######

# Run Functions
getLines()
slimLines()



Leave a Reply

Please log in using one of these methods to post your comment:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s