I am quite new to scraping.
I am getting links from nowgoal
below is how I started navigating to above page.
I do not wish to get link for all matches. but I will have an input txt file, which is attached Here and use the selected league and date.
The following code will initialize as input:
#Intialisation
league_index =[]
final_list = []
j = 0
#config load
config = RawConfigParser()
configFilePath = r'.config.txt'
config.read(configFilePath)
date = config.get('database_config','date') #input file provided by user - provide in YYYY-MM-DD format
leagues = config.get('database_config','leagues') #input file provided by user - provide in windows format
headless_param =config.get('database_config','headless') #Headless param - set True if you want to see bowser operating in foreground!
leagues_list = leagues.split(',')
print(leagues_list)
After I initialized with the preferred date and league, I will set up for chrome driver as follow:
options = webdriver.ChromeOptions() #initialise webdriver options
#options.binary_location = brave_path #if you are running the script on brave - then enable it
if headless_param == 'True' :
print('headless')
options.headless = True # if headeless parameter is set to true - the chrome browser will not appear in foreground
options.add_argument('start-maximized') # Start the chrome maximised
options.add_argument('disable-infobars') # Disable infobars
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option("prefs", {"profile.default_content_setting_values.cookies": 2})
options.add_experimental_option("prefs", {"profile.block_third_party_cookies": True})
options.add_experimental_option('useAutomationExtension', False)
options.add_argument("--incognito") #Incognito mode
#intiate the driver
driver = webdriver.Chrome(resource_path('./drivers/chromedriver.exe'),options=options)
#Format the url
url = 'http://www.nowgoal3.com/football/fixture/?f=ft0&date='+date
#get the url
driver.get(url)
#wait for some time
time.sleep(3)
driver.find_element_by_xpath('//*[@id="li_league"]').click()
time.sleep(5)
#click on the -team ranking
driver.find_element_by_xpath('//*[@id="TeamOrderCheck"]').click()
After this, you will be brought to the following page
I also add in the snap shot below
I try to get the data from the table by looping: the code is as follow:
> #Get the leagues name from page htmlSource = driver.page_source
> #Pass the htmlsource into soup soup = bs4.BeautifulSoup(htmlSource,'html.parser')
> #Table table = soup.select('table[id="table_live"]')
> #Rows of table all_rows = table[0].select('tr')
> #loop through each row
for i , row in enumerate(all_rows[2:]) :
> try:
> key_word = row['class'][0]
> print(key_word)
> if 'Leaguestitle' in key_word:#if leagues got changed
> league = row.a.text
> print(row.a.text)
> if row.a.text in leagues_list:
> j =1
> else:
> j =0
> elif j== 1:
> home_team = row.findAll('a')[0].text #home team
> print(home_team)
> away_team = row.findAll('a')[1].text #away team
> match_number = ''.join(filter(str.isdigit,row.findAll('a')[2]['href'].strip()))
> #match_number
> link = 'http://data.nowgoal.group/3in1odds/'+match_number+'.html'
> #link for 3 in 1 odds from the match code
> home_ranking = row.findAll('span')[0].text.strip('[]') #home team ranking
> away_ranking = row.findAll('span')[1].text.strip('[]') #Away team ranking
> final_list.append([home_team,home_ranking,away_team,away_ranking,league,match_number,link])
> except KeyError:
> try:
> if row['style']=='display:none':
> continue
> elif j== 1:
> home_team = row.findAll('a')[0].text #home team
> away_team = row.findAll('a')[1].text #away team
> home_ranking = row.findAll('span')[0].text.strip('[]') #home team ranking
> away_ranking = row.findAll('span')[1].text.strip('[]') #Away team ranking
> match_number = ''.join(filter(str.isdigit,row.findAll('a')[2]['href'].strip()))
> #match_code associated with each match
> link = 'http://data.nowgoal.group/3in1odds/'+match_number+'.html'
> #link for 3 in 1 odds from the match code
> final_list.append([home_team,home_ranking,away_team,away_ranking,league,match_number,link])
> except KeyError :
> print('KeyError')
>
>
> except IndexError:
> if j== 1:
> home_team = row.findAll('a')[0].text #home team
> away_team = row.findAll('a')[1].text #away team
> home_ranking = row.findAll('span')[0].text.strip('[]') #home team ranking
> away_ranking = row.findAll('span')[1].text.strip('[]') #Away team ranking
> match_number = ''.join(filter(str.isdigit,row.findAll('a')[2]['href'].strip()))
> #match_code associated with each match
> link = 'http://data.nowgoal.group/3in1odds/'+match_number+'.html'
> #link for 3 in 1 odds from the match code
> final_list.append([home_team,home_ranking,away_team,away_ranking,league,match_number,link])
> print('IndexError-captured')
>
> print(final_list)#show the final result driver.quit()#close the
> browser
Then I print out the hometeam and the following results
Chelsea adtext-bg QC: MAY88.COM - NHà CáI H?P PHáP NA UY - TH??NG N?P
100% - HOàN TR? 100TR - H? TR? 24/7
Then it threw me an index error as follow:
Traceback (most recent call last):
File "D:/Football matters/Sttratagem data access/Games By Numbers/Nowgoal scraping project/codes/NOWGOAL-20200721T024808Z-001/NOWGOAL/PYFILES/Link_extractor_v1.3.py", line 124, in <module>
away_team = row.findAll('a')[1].text #away team
IndexError: list index out of range
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "D:/Football matters/Sttratagem data access/Games By Numbers/Nowgoal scraping project/codes/NOWGOAL-20200721T024808Z-001/NOWGOAL/PYFILES/Link_extractor_v1.3.py", line 149, in <module>
away_team = row.findAll('a')[1].text #away team
IndexError: list index out of range
I am seeking your kind advice on that.
I would be greatly appreciate for your help.
Thanks,
Zep.
question from:
https://stackoverflow.com/questions/65929976/getting-table-value-from-nowgoal-has-got-an-index-error