I have multile .txt files in a directory, say, d:memdump.txt,1.txt,...10.txt sample text file is given below:
Applications Memory Usage (kB):
Uptime: 7857410 Realtime: 7857410
** MEMINFO in pid 23875 [com.example.twolibs] **
Shared Private Heap Heap Heap
Pss Dirty Dirty Size Alloc Free
------ ------ ------ ------ ------ ------
Native 0 0 0 13504 10836 459
Dalvik 6806 7740 6580 24076 18523 5553
Stack 80 0 80
Cursor 0 0 0
Ashmem 0 0 0
Other dev 14741 836 1028
.so mmap 1367 448 1028
.jar mmap 0 0 0
.apk mmap 225 0 0
.ttf mmap 0 0 0
.dex mmap 1225 340 16
Other mmap 5 8 4
Unknown 3473 564 3432
TOTAL 27922 9936 12168 37580 29359 6012
Objects
Views: 62 ViewRootImpl: 2
AppContexts: 5 Activities: 2
Assets: 3 AssetManagers: 3
Local Binders: 9 Proxy Binders: 18
Death Recipients: 0
OpenSSL Sockets: 0
SQL
MEMORY_USED: 0
PAGECACHE_OVERFLOW: 0 MALLOC_SIZE: 0
I have to parse these files to get values of PID, Native Heap Size, Native Heap Alloc size, Dalvik Heap Size, Dalvik Heap Alloc size and plot a graph with these heap sizes as below
I am using the following code to achieve this:
import glob
import os
import re
import numpy as np
import matplotlib.pyplot as plt
os.chdir("D:Python_TrainingsMemInfoData")
pid_arr = []
native_heapsize_arr = []
dalvik_heapsize_arr = []
native_heapalloc_arr = []
dalvik_heapalloc_arr = []
pkg_name_arr = []
#Method to parse the memory dump files
def parse_dumpFiles():
for data_file in glob.glob("*.txt"):
try:
fo = open(data_file,"r")
for line in fo:
pid_match = re.search('pids+(d+)',line)
pkg_name_match = re.search("[(w+.+w+.+w+)]",line)
native_heapsize_match = re.search('(Native+s+d+s+d+s+d+s+)+(d+)',line)
dalvik_heapsize_match = re.search('(Dalvik+s+d+s+d+s+d+s+)+(d+)',line)
native_heapalloc_match = re.search('(Native+s+d+s+d+s+d+s+d+s+)+(d+)',line)
dalvik_heapalloc_match = re.search('(Dalvik+s+d+s+d+s+d+s+d+s+)+(d+)',line)
if pid_match:
pid_arr.append(int(pid_match.group(1)))
if native_heapsize_match:
native_heapsize_arr.append(native_heapsize_match.group(2))
if dalvik_heapsize_match:
dalvik_heapsize_arr.append(dalvik_heapsize_match.group(2))
if native_heapalloc_match:
native_heapalloc_arr.append(native_heapalloc_match.group(2))
if dalvik_heapalloc_match:
dalvik_heapalloc_arr.append(dalvik_heapalloc_match.group(2))
if pkg_name_match:
if pkg_name_match.group(1) not in pkg_name_arr:
pkg_name_arr.append(pkg_name_match.group(1))
except IOError:
print "Error: can't find file or read data"
finally:
fo.close()
#end of parse_dumpFiles() Method
#Method to plot from Memory Dumps
def plt_MemDump(pid_arr, native_heapsize_arr, dalvik_heapsize_arr, native_heapalloc_arr, dalvik_heapalloc_arr, pkg_name_arr):
#Create a figure and axes with room for the table
fig = plt.figure()
ax = plt.axes([0.2, 0.2, 0.7, 0.7])
#Create labels for the rows and columns as tuples
colLabels = ('0','10', '20', '30', '40', '50', '60', '70', '80', '90', '100')
rowLabels = ('Native Heap Size','Native Heap Allocated','Dalvik Heap Size','Dalvik Heap Allocated','PID')
#Table data as a numpy array
tableData = np.asarray([native_heapsize_arr,dalvik_heapsize_arr,native_heapalloc_arr,dalvik_heapalloc_arr,pid_arr],dtype=int)
#Get the current color cycle as a list, then reset the cycle to be at the beginning
colors = []
while True:
colors.append(ax._get_lines.color_cycle.next())
if colors[0] == colors[-1] and len(colors)>1:
colors.pop(-1)
break
for i in xrange(len(colors)-1):
ax._get_lines.color_cycle.next()
#Show the table
the_table = plt.table(cellText=tableData, rowLabels=rowLabels, rowColours=colors, colLabels=colLabels, loc='bottom')
#Make some line plots
xAxis_val = [0,10,20,30,40,50,60,70,80,90,100]
ax.plot(xAxis_val,native_heapsize_arr, linewidth=2.5, marker="o", label="Native Heap Size")
ax.plot(xAxis_val,dalvik_heapsize_arr, linewidth=2.5, marker="D", label="Dalvik Heap Size")
ax.plot(xAxis_val,native_heapalloc_arr, linewidth=2.5, marker="^",label="Native Heap Allocated")
ax.plot(xAxis_val,dalvik_heapalloc_arr, linewidth=2.5, marker="h", label="Dalvik Heap Allocated")
#show legend
plt.legend(loc='upper right', fontsize=10)
#set the column color where PID is different from 1st PID
c=the_table.get_celld()[(5,3)]
c.set_color('r')
i=0
while i<=10:
c=the_table.get_celld()[(5,i)]
if(c.get_text().get_text()!=((the_table.get_celld()[(5,0)]).get_text().get_text())):
c.set_color('r')
(the_table.get_celld()[(4,i)]).set_color('r')
(the_table.get_celld()[(3,i)]).set_color('r')
(the_table.get_celld()[(2,i)]).set_color('r')
(the_table.get_celld()[(1,i)]).set_color('r')
i+=1
#Turn off x-axis ticks and show the plot
plt.xticks([])
#Configure Y axis
plt.ylim(0,60000)
plt.yticks([10000,20000,30000,40000,50000,60000])
plt.grid(True)
#Setting the name of the window title of the plot
fig.canvas.set_window_title(pkg_name_arr[0] + "- Memory Dump Plot")
#Setting the Title of the plot
plt.title(pkg_name_arr[0],color='r',fontsize=20)
#Setting Y Label
plt.ylabel('Heap Size', fontsize=14, color='r')
#show plot
plt.show()
#end of plt_MemDump() Method
parse_dumpFiles()
plt_MemDump(pid_arr, native_heapsize_arr, dalvik_heapsize_arr, native_heapalloc_arr, dalvik_heapalloc_arr, pkg_name_arr)
Now I want to mark the columns of the table with some color where PID value differ with 1st PID value.(eg,column 30,60 & 100).
Can anybody help me to achieve this?
See Question&Answers more detail:
os