Categories
Uncategorized

Pajek to database – Python

This is how you read a pajek file and move it all to a database.

This consists of three files, all in the same directory
sample.txt – the pajek sample file
pajek_rw.py – module that reads the pajek file
g2db.py – which moves all the data to a database

sample.txt

This is what the pajek file looks like

*Vertices 5
1 "Jose"
2 "ShaNayNay"
3 "Watermelondrea"
4 "Barbeesha"
5 "Rieenanay"
*Edges
1 5
3 4
2 4

pajek_rw.py

This is the code whot reads it…

from re import sub

# yeah, that's right...a class
class pajekRead():

    def __init__(self,str_file_data):
        self.str_data = str_file_data
        
        list_comps = str_file_data.split('*')
        
        self.str_nodes = []
        self.str_edges = []

        for str_comps in list_comps:
            if str_comps.startswith('Edges'):
                str_sece = sub('Edges\n','',str_comps)
                self.str_edges.append(str_sece)
            if str_comps.startswith('Vertices'):
                str_secn = sub('Vertices.*?\n','',str_comps)
                self.str_nodes.append(str_secn)

    # raw dictionary of the node ids and names
    def gRawNodes(self):
        dict_nodes = {}
        str_nodes = self.str_nodes[0]
        list_nodes = str_nodes.split('\n')
        for str_each_node in list_nodes:
            if len(str_each_node) > 0:
                list_comps = str_each_node.split(' ')
                str_node_name = sub('"','',list_comps[1])
                dict_nodes[str_node_name] = int(list_comps[0])
        return dict_nodes

    # good dictionary to return
    def gNodes(self):
        dict_raw = self.gRawNodes()
        dict_main = dict(zip(dict_raw.values(),dict_raw.keys()))
        return dict_main

    # list of all the edges as lists
    def gEdges(self):
        list_edges = []
        str_edges = self.str_edges[0]
        list_all_edges = str_edges.split('\n')
        for str_each_edge in list_all_edges:
            list_comps = str_each_edge.split(' ')
            if len(list_comps) == 2:
                list_out = [int(list_comps[0]),int(list_comps[1])]
                list_edges.append(list_out)
        return list_edges

gNodes() returns a dictionary with the user names and the their respective ids

> {1:'Jose',2:'ShaNayNay',3:'Watermelondrea',4:'Barbeesha',5:'Rieenanay'}

and gEdges() returns

> [[1,5],[3,4],[2,4]]

g2db.py

This connects to the db and stores all the data

# import the mysqldb module
import MySQLdb
 
# import the above script
import pajek_rw
 
# get the name of the database
str_db = raw_input('Enter db Name: ')
 
# connect to the database
db_connect = MySQLdb.connect(host = '127.0.0.1',
                             user = 'myUserName',
                             passwd = 'MyVerySecretPassword',
                             db = str_db)
 
# get the name of the graph
str_graph = raw_input('Enter graph Name: ')

# get the cursor
db_cursor = db_connect.cursor()
 
# create a table for nodes if it does not exist...
try:
    db_cursor.execute('''CREATE TABLE t_%s_nodes (
                         usr_id INT,
                         usr_name VARCHAR(100))''' % str_graph)
except:
    pass
 
# ...do same for edges
try:
    db_cursor.execute('''CREATE TABLE t_%s_edges (
                         id_from INT,
                         id_to INT)''' % str_graph)
except:
    pass
 
# open pajek file and read as string
with open('sample.txt','r') as fr_data:
    str_file_data = fr_data.read()
 
# pass the string through the above script
g_data = pajek_rw.pajekRead(str_file_data)
 
# get node dictionary and edge list
dict_nodes = g_data.gNodes()
list_edges = g_data.gEdges()
 
# move nodes to database
for dict_each in dict_nodes:
    db_cursor.execute(''' INSERT INTO t_%s_nodes (usr_id, usr_name) 
                            VALUES ('%d','%s')''' % (str_graph,
                                                     int(dict_each), 
                                                     dict_nodes[dict_each]))
 
# move edges to database
for list_each in list_edges:
    int_from, int_to = list_each[0],list_each[1]
    db_cursor.execute(''' INSERT INTO t_%s_edges (id_from, id_to) 
                            VALUES ('%d','%d')''' % (str_graph,
                                                     int_from, 
                                                     int_to))
 
# commit and close connection
db_connect.commit()
db_connect.close()

Running it

On the terminal, cd to the directory and run the g2db.py, where you will be asked for the name of the database (mine is called ‘sample’) and the name of the graph (mine is called ‘graph1’)

$ python g2db.py
Enter db Name: sample
Enter graph Name: graph1

This is what each table should look like

t_graph1_nodes

Screen Shot 2013-07-30 at 11.11.12

t_graph1_edges

Screen Shot 2013-07-30 at 11.10.14

Leave a Reply

Please log in using one of these methods to post your comment:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Google photo

You are commenting using your Google account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s