Categories
Uncategorized

Dated posts for a SN – R

# Data is entered into a csv file, with the following format
# separated by moths, just to make it easy-ish
# Date is formated for MySQL.
from     to      date
user1   user2  2009-03-01
user5   user8  2010-04-01
user1   user6  2009-08-01

# Then we load the data like so...
> ddf<-data.frame(read.csv("location/of/file.csv",header=FALSE))

# calculate all the different dates
> postDates<-data.frame(table(ddf['V3']))['Var1']

# so if we enter
> postDates

# it should return
2009-03-01
2010-04-01
2009-08-01

# Then we create a sub set for all the different dates, and
# select only V1 and V2 because we don't want dates anymore.
g1ddf<-subset(ddf,V3='2009-03-01',select=c(V1,V2))
g2ddf<-subset(ddf,V3='2010-04-01',select=c(V1,V2))
g3ddf<-subset(ddf,V3='2009-08-01',select=c(V1,V2))

# Now we create a data frame with only the nodes
gdf<-data.frame(ddf['V1'],ddf['V2'])

# and we get the frequency table and classify them in layers,
# using this function.
get.freq.table<-function(dfx){

	colnames(dfx)<-c("x","x");
	dfx<-rbind(dfx[1],dfx[2]);
	dfx<-as.data.frame(table(dfx))
	dfx<-dfx[with(dfx,order(-Freq)),]
	rownames(dfx) <- 1:nrow(dfx)
	dfx[3]<-dfx[2]/(sum(dfx[2]))
	dfx[4]<-cumsum(dfx[3])
	
	dfx[5] <-"NA"
	dfx[5][(dfx[4])<=0.33] <- 1
	dfx[5][(dfx[4]) > 0.33 & (dfx[4]) <= 0.66] <- 2
	dfx[5][(dfx[4]) > 0.66] <- 3

	dfx[6] <-"NA"
	dfx[6]<-dfx[2]/(nrow(dfx)-1)
	
	colnames(dfx)<-c("user","Freq","PcP","cSum","layer","CD")
	dfx<-data.frame(dfx$user,dfx$Freq,dfx$PcP,dfx$cSum,dfx$layer,dfx$CD)
	colnames(dfx)<-c("user","Freq","PcP","cSum","layer","CD")
	dfx
}

# so we create the table and
tgdf<-get.freq.table(gdf)

# Then we get the set for each layer
set1<-c(t(subset(tgdf,tgdf[5]==1,select='user')))
set2<-c(t(subset(tgdf,tgdf[5]==2,select='user')))
set3<-c(t(subset(tgdf,tgdf[5]==3,select='user')))

# Then using this function....
color.nodes<-function(g){
	V(g)[set1]$color <- "#2244CC"
	V(g)[set2]$color <- "#3388EE"
	V(g)[set3]$color <- "#55AADD"
	V(g)[set1]$size <- 4
	V(g)[set2]$size <- 3
	V(g)[set3]$size <- 1
	V(g)[set1]$bcol <- "#000088"
	V(g)[set2]$bcol <- "#1166CC"
	V(g)[set3]$bcol <- NA
}

# Now we create a simple graph of stuff
s1g<-simplify(graph.data.frame(g1ddf,directed=FALSE))
s2g<-simplify(graph.data.frame(g2ddf,directed=FALSE))
s3g<-simplify(graph.data.frame(g3ddf,directed=FALSE))

# color the nodes
color.nodes(s1g)

#...to be continued

Leave a Reply

Please log in using one of these methods to post your comment:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s