I need to search into some XML files (all of them have the same name, pom.xml) for the following text sequence exactly (also in subfolders), so in case somebody write some text or even a blank, I must get an alert:
<!--
| Startsection
|-->
<!--
| Endsection
|-->
I'm running the following Python script, but still not matching exactly, I also get alert even when it's partially the text inside:
import re
import os
from os.path import join
comment=re.compile(r"<!--s+| Startsections+|-->s+<!--s+| Endsections+|-->")
tag="<module>"
for root, dirs, files in os.walk("."):
if "pom.xml" in files:
p=join(root, "pom.xml")
print("Checking",p)
with open(p) as f:
s=f.read()
if tag in s and comment.search(s):
print("Matched",p)
UPDATE #3
I am expecting to print out, the content of tag <module>
if it exists between |--> <!--
into the search:
<!--
| Startsection
|-->
<!--
| Endsection
|-->
for instance print after Matched , and the name of the file, also print "example.test1" in the case below :
<!--
| Startsection
|-->
<module>example.test1</module>
<!--
| Endsection
|-->
UPDATE #4
Should be using the following :
import re
import os
from os.path import join
comment=re.compile(r"<!--s+| Startsections+|-->s+<!--s+| Endsections+|-->", re.MULTILINE)
tag="<module>"
for root, dirs, files in os.walk("/home/temp/test_folder/"):
for skipped in ("test1", "test2", ".repotest"):
if skipped in dirs: dirs.remove(skipped)
if "pom.xml" in files:
p=join(root, "pom.xml")
print("Checking",p)
with open(p) as f:
s=f.read()
if tag in s and comment.search(s):
print("The following files are corrupted ",p)
UPDATE #5
import re
import os
import xml.etree.ElementTree as etree
from bs4 import BeautifulSoup
from bs4 import Comment
from os.path import join
comment=re.compile(r"<!--s+| Startsections+|-->s+<!--s+| Endsections+|-->", re.MULTILINE)
tag="<module>"
for root, dirs, files in os.walk("myfolder"):
for skipped in ("model", "doc"):
if skipped in dirs: dirs.remove(skipped)
if "pom.xml" in files:
p=join(root, "pom.xml")
print("Checking",p)
with open(p) as f:
s=f.read()
if tag in s and comment.search(s):
print("ERROR: The following file are corrupted",p)
bs = BeautifulSoup(open(p), "html.parser")
# Extract all comments
comments=soup.find_all(string=lambda text:isinstance(text,Comment))
for c in comments:
# Check if it's the start of the code
if "Start of user code" in c:
modules = [m for m in c.findNextSiblings(name='module')]
for mod in modules:
print(mod.text)
See Question&Answers more detail:
os