Welcome to OGeek Q&A Community for programmer and developer-Open, Learning and Share
Welcome To Ask or Share your Answers For Others

Categories

0 votes
168 views
in Technique[技术] by (71.8m points)

How to extract only particular set of structs from a file between braces in python

a. Have a scenario, where in my function reads in a file which contains list of c-structures as shown below, reads the file and extracts all the information between { } braces for each structure and store them in arrays.

package whatstruct;

  typedef struct packed {
    int [4:0] version;
    char party;
    float parity; 
    char ccc; 
    int [7:0] spend;
  } party_s;

  typedef struct packed {
    int [5:0] cod3;
    int [1:0] group;
    int [51:0] duty;
    char calloff; 
    char selfi;
    int [11:0] snap; 
    int [5:0] longtrip;
    int [1:0] whattodo;
    int [9:0] sky;
    int [7:0] yahoo;
    int [5:0] hurrey;
    int [3:0] appology;
    int [1:0] temp;
    int [2:0] iddd;  
    float dontknow; 
  } dict_s;

  typedef struct packed {
    int [4:0] jan;
    int [12:0] feb;
    int [1:0] mar;
    logic mar; 
    logic april; 
  } months_c;

  typedef struct packed {
    var apple;
    int mango;
    float banana;
    int lichi
  } fruits_s; 
endpackage: whatstruct

b. The functions reads in two parameters, one is the file containing the structures and the second parameter contains list of struct names only from which the lines needs to be extracted. For extracting all the infos between {}, tried some of the examples available as given below.

from Tkinter import *
import subprocess
import shlex
import os 
import time
import string
import threading
import sys, argparse
import ttk
import re
import logging
import warnings
import os.path
import gzip


readstructfile = None
filename = None
structnames = None


def readfileanddump(filename, structnames):
    readstructfile = open(filename, "r+")

    test_str = readstructfile.readlines() 

    matches = re.finditer(r"{(.*?)}", str(test_str), re.MULTILINE | re.DOTALL)

    for matchNum, match in enumerate(matches):
        for groupNum in range(0, len(match.groups())):
            print (match.group(1))

    # for lines in readstructfile:
    #     print lines

readfileanddump("structpkg.c", all);

c. Was able to extract all the infos between {} and dumpout the infos if the second parameter is provided as "all". But if the second parameter passed is a string containing struct names, only those struct that needs to be extracted. I am unclear how to do that. Is it possible using regexp ? or any other way is better?

Any suggestion is highly helpful !

Updated CODE:

from Tkinter import *
import subprocess
import shlex
import os 
import time
import string
import threading
import sys, argparse
import ttk
from openpyxl import Workbook, load_workbook
from openpyxl.compat import range
from openpyxl.utils import get_column_letter
from openpyxl.styles import Alignment, PatternFill, Border, Color
from openpyxl.styles.colors import YELLOW 
from openpyxl.styles.borders import Border, Side
import re
import logging
import warnings
import os.path
import gzip

#import xlwt
#import xlrd 

readstructfile = None
filename = None
structnames = []
filename_and_structnames_l = [] 
global found_struct
global found_struct_idx


found_struct_idx = {} 
found_struct = 0

def readfileanddump(filename_and_structnames):
    global found_struct
    filename_and_structnames_l = filename_and_structnames.split(",") 

    if len(filename_and_structnames_l) < 2:
        filename = filename_and_structnames_l[0]
        structnames.append('all')
        print "1. Value of filename %s and structnames %s"%(filename, str(structnames))
    elif len(filename_and_structnames_l) > 1 and len(filename_and_structnames_l) < 3:
        filename = filename_and_structnames_l[0]
        structnames.append(filename_and_structnames_l[1])
        print "2. Value of filename %s and structnames %s"%(filename, str(structnames))
    elif len(filename_and_structnames_l) > 2: 
        filename = filename_and_structnames_l[0]
        for i in range (1, len(filename_and_structnames_l)):
            structnames.append(filename_and_structnames_l[i])
        print "3. Value of filename %s and structnames %s"%(filename, str(structnames))


    if (len(structnames) == 1) and (structnames[0] == 'all'):
        readstructfile = open(filename, "r+")
        test_str = readstructfile.readlines() 
        matches = re.finditer(r"{(.*?)}", str(test_str), re.MULTILINE | re.DOTALL)

        for matchNum, match in enumerate(matches):
            for groupNum in range(0, len(match.groups())):
                print (match.group(1))
                # match_group_t = str(str(re.sub('[A-Za-z0-9_[]:]+', '', str(match.group(1)))).strip())
                # match_group_t = match.groups(1)
                #for i in range (len(match_group_t)):
                    # print match_group_t[i].replace("
","")
                    # print str(str(re.sub('[^A-Za-z0-9[:]]+[
]+', ' ', str(match_group_t[i]))))
                    # print re.sub(r"(?<=[a-z])
?
"," ", match_group_t[i]) 
                    # print ''.join(ch for ch in match_group_t[i] if not ch.isspace())
                    # print ''.join(match_group_t[i].strip().split())
                    #print(re.sub(r"(?:[;
']|s{2,})",r'',match_group_t[i])[2:])
                # print match_group_t
                print(re.sub(r"(?:[;
']|s{2,})",r'',match.group(1))[2:])
    else:
        readstructfile = open(filename, "r+")
        for lines in readstructfile:
            if found_struct == 0 and re.match(r'.*typedef struct', lines):
                found_struct = 1
                matches = re.finditer(r"{(.*?)}.*", str(lines), re.MULTILINE | re.DOTALL)
                print "Value in matches", matches
            if found_struct == 1 and re.match(r'.*}.*', lines):
                found_struct = 0
                found_struct_t = str(str(re.sub('[^A-Za-z0-9_]+', ' ', str(lines))).strip()).split(" ")

                for i in range (len(structnames)):
                    if structnames[i] == found_struct_t[0]:
                        # print "value of found_struct_t", found_struct_t
                        # #found_struct_idx.append(found_struct_t[0]) 
                        # found_struct_idx[structnames[i]] = i 
                        # print "Value of found_struct_idx", found_struct_idx
                        # break

                        print "Value in found_struct_t", found_struct_t 
                        # for matchNum, match in enumerate(matches):
                        #     for groupNum in range(0, len(match.groups())):
                        #         print (match.group(1))

readfileanddump('alldetailspkg');
See Question&Answers more detail:os

与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
Welcome To Ask or Share your Answers For Others

1 Reply

0 votes
by (71.8m points)

Got It Working As Per My Req:

from Tkinter import *
import subprocess
import shlex
import os 
import time
import string
import threading
import sys, argparse
import ttk
import re
import logging
import warnings
import os.path
import gzip

#import xlwt
#import xlrd 

readstructfile = None
filename = None
structnames = []
filename_and_structnames_l = [] 
global found_struct
global found_struct_idx
global temp_struct
global final_struct

found_struct_idx = {} 
found_struct = 0
temp_struct = []
final_struct = [] 

def readfileanddump(filename_and_structnames):
    global found_struct
    global temp_struct
    global final_struct
    filename_and_structnames_l = filename_and_structnames.split(",") 

    if len(filename_and_structnames_l) < 2:
        filename = filename_and_structnames_l[0]
        structnames.append('all')
        print "1. Value of filename %s and structnames %s"%(filename, str(structnames))
    elif len(filename_and_structnames_l) > 1 and len(filename_and_structnames_l) < 3:
        filename = filename_and_structnames_l[0]
        structnames.append(filename_and_structnames_l[1])
        print "2. Value of filename %s and structnames %s"%(filename, str(structnames))
    elif len(filename_and_structnames_l) > 2: 
        filename = filename_and_structnames_l[0]
        for i in range (1, len(filename_and_structnames_l)):
            structnames.append(filename_and_structnames_l[i])
        print "3. Value of filename %s and structnames %s"%(filename, str(structnames))


    if (len(structnames) == 1) and (structnames[0] == 'all'):
        readstructfile = open(filename, "r+")
        for lines in readstructfile:
            if found_struct == 0 and re.match(r'.*typedef struct', lines):
                found_struct = 1
            elif found_struct == 1 and re.match(r'.*}.*', lines):
                found_struct = 0
                print "Value of temp_struct",temp_struct
                final_struct = temp_struct
                print "Value of final_struct", final_struct
            elif found_struct == 1:
                print "Value of lines",lines
                lines = str(lines.strip()).replace(";","")
                print "Value of lines b4",lines
                print "Value of lines a8",lines.strip()
                #print(re.sub(r"(?:[;]|s{2,})",r'',lines)[2:])
                temp_struct.append(lines)
    else:
        readstructfile = open(filename, "r+")
        for lines in readstructfile:
            if found_struct == 0 and re.match(r'.*typedef struct', lines):
                found_struct = 1
                temp_struct = None; temp_struct = []
            elif found_struct == 1 and re.match(r'.*}.*', lines):
                found_struct = 0
                reached_struct = re.sub(r'.*}.|;',r'',lines)
                for i in range (len(structnames)):
                    print "Value of structnames[%d] %s and lines is %s"%(i, structnames[i], reached_struct)
                    if str(structnames[i]).strip() == str(reached_struct).strip():
                        for i in range (len(temp_struct)):
                            final_struct.append(temp_struct[i])
                        print "Value of temp_struct",temp_struct
                        print "Value of final_struct",final_struct
            elif found_struct == 1:
                # print "Value of lines",lines
                lines = str(lines.strip()).replace(";","")
                # print "Value of lines b4 strip",lines
                # print "Value of lines a8 strip",lines.strip()
                temp_struct.append(lines)

与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
OGeek|极客中国-欢迎来到极客的世界,一个免费开放的程序员编程交流平台!开放,进步,分享!让技术改变生活,让极客改变未来! Welcome to OGeek Q&A Community for programmer and developer-Open, Learning and Share
Click Here to Ask a Question

...