Welcome to OGeek Q&A Community for programmer and developer-Open, Learning and Share
Welcome To Ask or Share your Answers For Others

Categories

0 votes
101 views
in Technique[技术] by (71.8m points)

How to append the csv file in python through each loop of dates in screen scrapinng?

This is the code that I am working on and I want to the code to iterate through scraping and dates in such a way that it does not overwrite the csv file and the new data is added after scraping through next set of dates. Please help me with it. I tired different methods such as f.write() and append as well for the same code but it gives me some or other error.

from bs4 import BeautifulSoup as BS
from selenium import webdriver
from functools import reduce
import pandas as pd
import time
from datetime import datetime, timedelta
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import numpy as np
import requests

def render_page(url,type):
    driver = webdriver.Chrome(executable_path=r'C:UsersabcDesktopscreen scraping codeschromedriver.exe')
    driver.get(url)
    time.sleep(15)
    if type =="C":
        element = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.ID, 'wuSettings'))
        )
        element.click()
        element = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.XPATH, '//*[@id="wuSettings-quick"]/div/a[2]')))
        element.click()
        time.sleep(15)
        r = driver.page_source
        driver.quit()
    if type=="F":
        r = driver.page_source
        driver.quit()
    return r

def hourly_scraper(page,dates,type):
    output = pd.DataFrame()

    for d in dates:
        url = str(str(page) + str(d))

        r = render_page(url,type)

        soup = BS(r, "html.parser",)
        container = soup.find('lib-city-history-observation')
        check = container.find('tbody')

        data = []
        data_hour = []
        for i in check.find_all('span', class_='ng-star-inserted'):
            trial = i.get_text()
            data_hour.append(trial)

        for i in check.find_all('span', class_='wu-value wu-value-to'):
            trial = i.get_text()
            data.append(trial)


        numbers = pd.DataFrame([data[i:i+7] for i in range(0, len(data), 7)],columns=["Temperature","Dew Point","Humidity","Wind Speed","Wind Gust","Pressure","Precipitation"])
        hour = pd.DataFrame(data_hour[0::17],columns=["Time"])
        wind = pd.DataFrame(data_hour[7::17],columns=["Wind"])
        condition = pd.DataFrame(data_hour[16::17],columns=["Condition"])

        dfs = [hour,numbers,wind,condition]

        df_final = reduce(lambda left, right: pd.merge(left, right, left_index=True, right_index=True), dfs)
        df_final['Date'] = str(d)

        output = output.append(df_final)
        df_final.to_csv(r'C:UsersabcDesktopscreen scraping codes123.csv',index=False)
        print(str(str(d) + ' finished!'))
       

    return output


page = "https://www.wunderground.com/history/daily/in/ahmedabad/VAAH/date/"
#dates = ["2020-12-27","2020-12-28"]
d0 = datetime(2009, 1,1)
d1 = datetime(2009, 1,3)
dt = timedelta(days = 1)
dates = np.arange(d0, d1, dt).astype(datetime)

hourly = hourly_scraper(page,dates,"C")
    
   
question from:https://stackoverflow.com/questions/65897981/how-to-append-the-csv-file-in-python-through-each-loop-of-dates-in-screen-scrapi

与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
Welcome To Ask or Share your Answers For Others

1 Reply

0 votes
by (71.8m points)
Waitting for answers

与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
OGeek|极客中国-欢迎来到极客的世界,一个免费开放的程序员编程交流平台!开放,进步,分享!让技术改变生活,让极客改变未来! Welcome to OGeek Q&A Community for programmer and developer-Open, Learning and Share
Click Here to Ask a Question

...