This is how you read a pajek file and move it all to a database.
This consists of three files, all in the same directory
sample.txt
– the pajek sample file
pajek_rw.py
– module that reads the pajek file
g2db.py
– which moves all the data to a database
sample.txt
This is what the pajek file looks like
*Vertices 5 1 "Jose" 2 "ShaNayNay" 3 "Watermelondrea" 4 "Barbeesha" 5 "Rieenanay" *Edges 1 5 3 4 2 4
pajek_rw.py
This is the code whot reads it…
from re import sub # yeah, that's right...a class class pajekRead(): def __init__(self,str_file_data): self.str_data = str_file_data list_comps = str_file_data.split('*') self.str_nodes = [] self.str_edges = [] for str_comps in list_comps: if str_comps.startswith('Edges'): str_sece = sub('Edges\n','',str_comps) self.str_edges.append(str_sece) if str_comps.startswith('Vertices'): str_secn = sub('Vertices.*?\n','',str_comps) self.str_nodes.append(str_secn) # raw dictionary of the node ids and names def gRawNodes(self): dict_nodes = {} str_nodes = self.str_nodes[0] list_nodes = str_nodes.split('\n') for str_each_node in list_nodes: if len(str_each_node) > 0: list_comps = str_each_node.split(' ') str_node_name = sub('"','',list_comps[1]) dict_nodes[str_node_name] = int(list_comps[0]) return dict_nodes # good dictionary to return def gNodes(self): dict_raw = self.gRawNodes() dict_main = dict(zip(dict_raw.values(),dict_raw.keys())) return dict_main # list of all the edges as lists def gEdges(self): list_edges = [] str_edges = self.str_edges[0] list_all_edges = str_edges.split('\n') for str_each_edge in list_all_edges: list_comps = str_each_edge.split(' ') if len(list_comps) == 2: list_out = [int(list_comps[0]),int(list_comps[1])] list_edges.append(list_out) return list_edges
gNodes()
returns a dictionary with the user names and the their respective ids
> {1:'Jose',2:'ShaNayNay',3:'Watermelondrea',4:'Barbeesha',5:'Rieenanay'}
and gEdges()
returns
> [[1,5],[3,4],[2,4]]
g2db.py
This connects to the db and stores all the data
# import the mysqldb module import MySQLdb # import the above script import pajek_rw # get the name of the database str_db = raw_input('Enter db Name: ') # connect to the database db_connect = MySQLdb.connect(host = '127.0.0.1', user = 'myUserName', passwd = 'MyVerySecretPassword', db = str_db) # get the name of the graph str_graph = raw_input('Enter graph Name: ') # get the cursor db_cursor = db_connect.cursor() # create a table for nodes if it does not exist... try: db_cursor.execute('''CREATE TABLE t_%s_nodes ( usr_id INT, usr_name VARCHAR(100))''' % str_graph) except: pass # ...do same for edges try: db_cursor.execute('''CREATE TABLE t_%s_edges ( id_from INT, id_to INT)''' % str_graph) except: pass # open pajek file and read as string with open('sample.txt','r') as fr_data: str_file_data = fr_data.read() # pass the string through the above script g_data = pajek_rw.pajekRead(str_file_data) # get node dictionary and edge list dict_nodes = g_data.gNodes() list_edges = g_data.gEdges() # move nodes to database for dict_each in dict_nodes: db_cursor.execute(''' INSERT INTO t_%s_nodes (usr_id, usr_name) VALUES ('%d','%s')''' % (str_graph, int(dict_each), dict_nodes[dict_each])) # move edges to database for list_each in list_edges: int_from, int_to = list_each[0],list_each[1] db_cursor.execute(''' INSERT INTO t_%s_edges (id_from, id_to) VALUES ('%d','%d')''' % (str_graph, int_from, int_to)) # commit and close connection db_connect.commit() db_connect.close()
Running it
On the terminal, cd
to the directory and run the g2db.py
, where you will be asked for the name of the database (mine is called ‘sample’) and the name of the graph (mine is called ‘graph1’)
$ python g2db.py Enter db Name: sample Enter graph Name: graph1
This is what each table should look like