本文整理汇总了Python中selenium.webdriver.PhantomJS类的典型用法代码示例。如果您正苦于以下问题:Python PhantomJS类的具体用法?Python PhantomJS怎么用?Python PhantomJS使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了PhantomJS类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: Parser
class Parser(object):
def __init__(self):
self.browser = PhantomJS()
def cleanup(self):
self.browser.quit()
开发者ID:ipinak,项目名称:xekatharisma,代码行数:7,代码来源:parser_base.py
示例2: export
def export(plot, filename, width=800, height=600):
"""
Export plot to file.
Args:
plot (quorra.Plot): Quorra plot object to export.
width (int): Width for plot (pixels).
height (int): Height for plot (pixels).
filename (str): Filename to export to.
"""
global _phantom, __templates__, __cwd__
if _phantom is None:
from selenium.webdriver import PhantomJS
_phantom = PhantomJS(service_log_path=os.path.devnull)
tmpl = os.path.join(__templates__, 'export.html')
exp = os.path.join(__cwd__, '.' + str(uuid.uuid1()) + '.html')
try:
with open(tmpl, 'r') as fi, open(exp, 'w') as fo:
dat = fi.read()
dat = dat.replace('var plot = undefined;', 'var plot = {};'.format(str(plot)))
dat = dat.replace('width: 800px;', 'width: {}px;'.format(width))
dat = dat.replace('height: 500px;', 'height: {}px;'.format(height))
fo.write(dat)
_phantom.get('file://' + exp)
_phantom.save_screenshot(filename.replace('.png', '') + '.png')
finally:
if os.path.exists(exp):
os.remove(exp)
return
开发者ID:bprinty,项目名称:quorra-python,代码行数:29,代码来源:methods.py
示例3: __init__
def __init__(self):
self.start_page = START_PAGE
self.end_page = END_PAGE
self.weixin_url = REFER_FIRST
# self.driver = Firefox()
if hasattr(config, 'PHANTOMJS_PATH'):
self.driver = PhantomJS(executable_path=getattr(config, 'PHANTOMJS_PATH'))
else:
self.driver = PhantomJS()
开发者ID:xutaoding,项目名称:csf_scraper,代码行数:10,代码来源:wx_threads.py
示例4: __init__
class Crawler:
def __init__(self, timeout=20, phantomjs_cfg_file='python-utils/config/phantomjs_cfg.json', use_cfg_file=False, proxy_pool_server='http://127.0.0.1:15110'):
self.timeout = timeout
if use_cfg_file:
phantomjs_service_args = ['--config={}'.format(phantomjs_cfg_file)]
else:
_, proxy_type, proxy, proxy_auth = get_proxy(proxy_pool_server)
phantomjs_service_args = [
'--proxy-type={}'.format(proxy_type),
'--proxy={}'.format(proxy),
'--proxy-auth={}'.format(proxy_auth),
]
self.driver = PhantomJS(
desired_capabilities=self.new_desired_capabilities(),
service_args=phantomjs_service_args)
self.check_client_info()
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, traceback):
self.close()
def close(self):
self.driver.quit()
@contextmanager
def wait_for_page_load(self, old_element):
yield
WebDriverWait(self.driver, self.timeout).until(EC.staleness_of(old_element))
def new_desired_capabilities(self, user_agent=default_ua):
desired_capabilities = DesiredCapabilities.PHANTOMJS.copy()
if not user_agent:
user_agent = ua.random
desired_capabilities["phantomjs.page.settings.userAgent"] = user_agent
return desired_capabilities
def check_client_info(self):
url='http://www.whoishostingthis.com/tools/user-agent/'
self.driver.get(url)
ip_addr = get_xpath_element(self.driver, '//*[@id="user-agent"]/div[2]/span').text.strip()
user_agent = get_xpath_element(self.driver, '//*[@id="user-agent"]/div[1]').text.strip()
logger.info('IP: {}, User-Agent: {}'.format(ip_addr, user_agent))
if self.wrong_ip(ip_addr):
logger.error('Proxy not set correctly!')
sys.exit(-1)
def wrong_ip(self, ip_addr):
if ip_addr.startswith('166.111.') or ip_addr.startswith('59.66.') or ip_addr.startswith('101.5.') or ip_addr.startswith('101.6.'):
return True
else:
return False
开发者ID:thuzhf,项目名称:python-utils,代码行数:54,代码来源:crawler.py
示例5: main
def main():
driver = PhantomJS()
scraper = NLScraper(driver, year=2014)
print(sys.argv[1])
writer = unicodecsv.DictWriter(open(sys.argv[1], 'w'), ('amount', 'scheme', 'year',
'country', 'currency', 'recipient_name', 'recipient_postcode',
'recipient_id', 'recipient_location'))
writer.writeheader()
try:
scraper.start(writer)
finally:
driver.quit()
开发者ID:simonwoerpel,项目名称:farmsubsidy-scrapers,代码行数:12,代码来源:nl_scraper.py
示例6: __init__
def __init__(self):
self.start_page = START_PAGE
self.end_page = END_PAGE
self.weixin_url = REFER_FIRST
# self.driver = Firefox()
if hasattr(config, 'PHANTOMJS_PATH'):
self.driver = PhantomJS(executable_path=getattr(config, 'PHANTOMJS_PATH'))
else:
self.driver = PhantomJS()
self.client = MongoClient(HOST, PORT)
self.collection = self.client[DB][COLLECTION]
self.all_uids = self.uids
开发者ID:xutaoding,项目名称:csf_scraper,代码行数:14,代码来源:wx_phantomjs.py
示例7: onegoogolePR
def onegoogolePR(self, url):
'''返回单个PR'''
prUrl = 'http://pr.chinaz.com' # 谷歌PR查询地址
driver = PhantomJS()
driver.get(prUrl)
driver.find_element_by_id('PRAddress').send_keys(url)
driver.find_element_by_class_name('search-write-btn').click()
try:
imgsrc = driver.find_element_by_css_selector('span#pr>img').get_attribute('src')
pr = search(r'\d', imgsrc).group()
except:
pr = '暂无数据'
driver.quit()
return pr
开发者ID:EvilDD,项目名称:appExe,代码行数:14,代码来源:seo.py
示例8: init_phantom
def init_phantom(self):
self.prefixfiles = os.path.join(
scrapyd_config().get('logs_dir'),
HYPHE_PROJECT,
self.name,
self.crawler.settings['JOBID']
)
self.log("Using path %s for PhantomJS crawl" % self.prefixfiles, log.INFO)
phantom_args = []
if PROXY and not PROXY.startswith(':'):
phantom_args.append('--proxy=%s' % PROXY)
phantom_args.append('--cookies-file=%s-phantomjs-cookie.txt' % self.prefixfiles)
phantom_args.append('--ignore-ssl-errors=true')
phantom_args.append('--load-images=false')
self.capabilities = dict(DesiredCapabilities.PHANTOMJS)
self.capabilities['phantomjs.page.settings.userAgent'] = self.user_agent
self.capabilities['takesScreenshot'] = False
self.capabilities['phantomjs.page.settings.javascriptCanCloseWindows'] = False
self.capabilities['phantomjs.page.settings.javascriptCanOpenWindows'] = False
self.phantom = PhantomJS(
executable_path=PHANTOM['PATH'],
service_args=phantom_args,
desired_capabilities=self.capabilities,
service_log_path="%s-phantomjs.log" % self.prefixfiles
)
self.phantom.implicitly_wait(10)
self.phantom.set_page_load_timeout(60)
self.phantom.set_script_timeout(self.ph_timeout + 15)
开发者ID:SciencesPoDRIS,项目名称:hyphe,代码行数:28,代码来源:pages.py
示例9: selenium
def selenium(self, webdriverOption=0):
"""
# 调用浏览器下载,适用于任何情形
:return:
"""
if not self.url[:4] == "http":
return None
driver = None
if webdriverOption == 0:
from selenium.webdriver import PhantomJS
driver = PhantomJS()
elif webdriverOption == 1:
from selenium.webdriver import Chrome
driver = Chrome()
elif webdriverOption == 2:
from selenium.webdriver import Firefox
driver = Firefox()
if not driver:
print(u"-->DownLoader->Selenium driver初始化出错,请检查运行环境或webdriverOption选项")
driver.get(self.url)
src = driver.page_source
driver.quit()
self.pageSource = src
return src
开发者ID:yangmingsong,项目名称:python,代码行数:30,代码来源:DownLoader.py
示例10: __init__
def __init__(self):
"""
Default constructor
ARGS:
None
RETURNS:
None
"""
self.browser = PhantomJS(executable_path='./drivers/phantomjs',
port=free_port()) # Optional argument, if not specified will search path.
self.timeout = 5 # seconds
开发者ID:pdxcycling,项目名称:carv.io,代码行数:12,代码来源:savefrom_scraper.py
示例11: catalog_url
def catalog_url(url='http://www.meitun.com/'):
# catalog_url is AJAX,use phantomJS
driver = PhantomJS()
driver.get(url)
driver.maximize_window()
mov_ele = driver.find_element_by_css_selector('.nav>ul>li:nth-child(1)')
# the mouse move to the lazy layout element,and perform
ActionChains(driver).move_to_element(mov_ele).perform()
time.sleep(3)
response = driver.page_source
driver.quit()
# use pyquery parser the page source,more quickly
d = pq(response)
return map(lambda x: 'http:' + pq(x).attr('href'), d.find('.cg-pdts a'))
开发者ID:yangmingsong,项目名称:python,代码行数:14,代码来源:ps_meitun_spider.py
示例12: setUp
def setUp(self):
self.driver = PhantomJS()
self.user = User.objects.create_user('admin', '[email protected]', 'pass')
self.user.save()
self.provider = Provider(
name='provider',
user=self.user,
)
self.provider.save()
self.provider_adverts = mommy.make(Advertisement, _quantity=20, provider=self.provider)
开发者ID:tridinebandim,项目名称:OpenAds,代码行数:13,代码来源:tests.py
示例13: on_start_again
def on_start_again(self, url):
driver = PhantomJS()
driver.get(url)
time.sleep(2)
driver.maximize_window()
t = driver.find_element_by_css_selector('.page-txt').text
res_t = []
if t:
t = int(t.split('/')[1][:-1]) - 1 # get the page count
# the count of page turning should be i-1
while t:
t -= 1
move_ele = driver.find_element_by_css_selector('#next')
ActionChains(driver).move_to_element(move_ele).click()
time.sleep(1)
res_t.append(driver.page_source)
driver.quit()
for item in res_t:
self.step_first(item)
开发者ID:yangmingsong,项目名称:python,代码行数:19,代码来源:ps_meitun_spider.py
示例14: render
def render(gist_id, commit):
block_url = 'http://bl.ocks.org/' + gist_id
d3_block_rec = {'gist_id': gist_id}
try:
driver = PhantomJS()
driver.get(block_url)
time.sleep(RENDER_DELAY) # let it render
fullpage_im = Image.open(BytesIO(driver.get_screenshot_as_png()))
fimb = BytesIO()
fullpage_im.save(fimb, 'png')
d3_block_rec['fullpage_base64'] = base64.b64encode(fimb.getvalue())
d3_block_rec['block_url'] = driver.current_url
except Exception as e:
# we got nothing
with LittlePGer('dbname=' + DB_NAME, commit=commit) as pg:
d3_block_rec['error'] = str(e)
pg.insert('d3_block', values=d3_block_rec)
exit(10)
try:
f = driver.find_element_by_xpath('//iframe')
x, y = int(f.location['x']), int(f.location['y'])
w, h = x + int(f.size['width']), y + int(f.size['height'])
block_im = fullpage_im.crop((x, y, w, h))
bimb = BytesIO()
block_im.save(bimb, 'png')
d3_block_rec['block_base64'] = base64.b64encode(bimb.getvalue())
d3_block_rec['block_size'] = list(block_im.size)
except Exception as e:
# at least we got the fullpage im, save it
with LittlePGer('dbname=' + DB_NAME, commit=commit) as pg:
d3_block_rec['error'] = str(e)
pg.insert('d3_block', values=d3_block_rec)
exit(11)
# all good, save everything
with LittlePGer('dbname=' + DB_NAME, commit=commit) as pg:
pg.insert('d3_block', values=d3_block_rec)
开发者ID:cjauvin,项目名称:d3-blocks-thumbnailer,代码行数:38,代码来源:d3_blocks_downloader.py
示例15: __init__
def __init__(self, login, password, userAgent=LINUX_USER_AGENT):
'''
Constructor
:param login:
:param password:
'''
self.login = login
self.password = password
dcap = dict(DesiredCapabilities.PHANTOMJS)
dcap["phantomjs.page.settings.userAgent"] = (
userAgent
)
self.driver = PhantomJS(desired_capabilities=dcap)
self.driver.set_window_size(1366, 768)
开发者ID:AltarBeastiful,项目名称:rateItSeven,代码行数:17,代码来源:legacysenscritique.py
示例16: Premiumgeneratorlink
class Premiumgeneratorlink(object):
def __init__(self, url):
self.url = url
self.browser = PhantomJS()
def get_link(self):
try:
self.browser.get('http://premiumgeneratorlink.com/')
self.browser.find_element_by_name('link').send_keys(self.url)
self.browser.find_element_by_xpath('//a[@class="input"]').click()
wdw = WebDriverWait(self.browser, 10)
wdw.until(EC.element_to_be_clickable((By.ID, 'check'))).click()
wdw.until(EC.element_to_be_clickable((By.ID, 'generate'))).click()
link = wdw.until(EC.visibility_of_element_located((By.XPATH, '//form[@class="center"]'))).get_attribute('action')
except (WebDriverException, NoSuchElementException, TimeoutException):
return False
finally:
self.browser.quit()
return link
开发者ID:inox9,项目名称:musicBot,代码行数:19,代码来源:premiumgeneratorlink.py
示例17: post
def post(self):
id = request.values['page']
page = Page.objects.get_or_404(id=id)
# html = requests.get(page.baseurl).text
screenshot = None
try:
phantom = PhantomJS(desired_capabilities={'acceptSslCerts': True},
service_args=['--web-security=false',
'--ssl-protocol=any',
'--ignore-ssl-errors=true'], port=8888)
phantom.set_window_size(1024, 768)
phantom.get(page.baseurl)
html = phantom.page_source
screenshot = phantom.get_screenshot_as_png()
phantom.close()
except Exception as ex:
html = "error when i snap your page ... %s" % ex
snap = Snap(html, datetime.datetime.now(), screenshot).save()
page.update(push__snaps=snap)
snap = Snap(html, datetime.datetime.now(), screenshot).save()
page.update(push__snaps=snap)
return jsonify({'id': "%s" % snap.id})
开发者ID:41px,项目名称:api.watcher.link,代码行数:22,代码来源:snap.py
示例18: Leecherus
class Leecherus(object):
def __init__(self, url):
self.url = url
self.browser = PhantomJS()
def get_link(self):
try:
self.browser.get('http://leecher.us')
wdw = WebDriverWait(self.browser, 10)
wdw.until(EC.visibility_of_element_located((By.NAME, 'link'))).send_keys(self.url)
wdw.until(EC.element_to_be_clickable((By.XPATH, '//button[@class="subscribe"]'))).click()
wdw.until(EC.element_to_be_clickable((By.XPATH, '//input[@class="subscribe"]'))).click()
self.browser.switch_to_window(self.browser.window_handles[1])
onclick = wdw.until(EC.element_to_be_clickable((By.ID, 'get_link'))).get_attribute('onclick')
except (WebDriverException, NoSuchElementException, TimeoutException, IndexError):
return False
finally:
self.browser.quit()
m = re.search("'(http://[^']+)'", onclick)
return m.group(1) if m else False
开发者ID:inox9,项目名称:musicBot,代码行数:20,代码来源:leecherus.py
示例19: Client
class Client(object):
"""Client HTTP pour tester fonctionnellement Strass
Adapteur du pilote Selenium, avec une interface inspirée de Nightwatch.js,
et quelques paramètres spécifiques à Strass."""
def __init__(self):
self.driver = PhantomJS()
self.driver.set_window_size(1120, 550)
def __del__(self):
self.driver.quit()
def get(self, query=None):
server = os.environ.get('STRASS_TEST_SERVER', 'http://localhost:8000')
url = server + (query or '/')
self.driver.get(url)
return self
def find(self, selector):
return self.driver.find_element_by_css_selector(selector)
def click(self, selector):
self.find(selector).click()
return self
def fill(self, selector, value):
if isinstance(value, datetime.date):
self.fill(selector + ' input.day', str(value.day))
self.fill(selector + ' input.month', str(value.month))
self.fill(selector + ' input.year', str(value.year))
else:
control = self.find(selector)
try:
control.clear()
except selexc.InvalidElementStateException:
# On doit tenter de nettoyer un input[type=file]. On zap.
pass
control.send_keys(value)
return self
def select(self, selector, value):
Select(self.find(selector)).select_by_value(value)
return self
def submit(self, selector='#document button[type=submit]'):
return self.click(selector)
def close(self):
self.driver.close()
if self.driver.window_handles:
self.driver.switch_to.window(self.driver.window_handles[0])
self.driver.set_window_size(1120, 550)
return self
def screenshot(self, filename):
self.driver.get_screenshot_as_file(filename)
sys.stderr.write("Capture d'écran enregistrée dans %r\n" % (filename,))
return self
def save(self, filename):
with open(filename, 'w') as fo:
fo.write(self.driver.page_source)
sys.stderr.write("HTML enregistré dans %r\n" % (filename,))
return self
def __getattr__(self, name):
return getattr(self.driver, name)
开发者ID:bersace,项目名称:strass,代码行数:68,代码来源:client.py
示例20: PagesCrawler
class PagesCrawler(BaseSpider):
name = 'pages'
link_extractor = RegexpLinkExtractor(canonicalize=False, deny_extensions=[])
ignored_exts = set(['.' + e for e in IGNORED_EXTENSIONS])
def __init__(self, **kw):
args = DEFAULT_INPUT.copy()
args.update(kw)
self.args = args
self.start_urls = to_list(args['start_urls'])
self.maxdepth = int(args['maxdepth'])
self.follow_prefixes = to_list(args['follow_prefixes'])
self.nofollow_prefixes = to_list(args['nofollow_prefixes'])
self.discover_prefixes = [url_to_lru_clean("http%s://%s" % (https, u.replace('http://', '').replace('https://', ''))) for u in to_list(args['discover_prefixes']) for https in ['', 's']]
self.resolved_links = {}
self.user_agent = args['user_agent']
self.phantom = 'phantom' in args and args['phantom'] and args['phantom'].lower() != "false"
if self.phantom:
self.ph_timeout = int(args.get('phantom_timeout', PHANTOM['TIMEOUT']))
self.ph_idle_timeout = int(args.get('phantom_idle_timeout', PHANTOM['IDLE_TIMEOUT']))
self.ph_ajax_timeout = int(args.get('phantom_ajax_timeout', PHANTOM['AJAX_TIMEOUT']))
self.errors = 0
dispatcher.connect(self.closed, spider_closed)
dispatcher.connect(self.crashed, spider_error)
def start_requests(self):
self.log("Starting crawl task - jobid: %s" % self.crawler.settings['JOBID'], log.INFO)
self.log("ARGUMENTS : "+str(self.args), log.INFO)
if self.phantom:
self.init_phantom()
for url in self.start_urls:
yield self._request(url)
def init_phantom(self):
self.prefixfiles = os.path.join(
scrapyd_config().get('logs_dir'),
HYPHE_PROJECT,
self.name,
self.crawler.settings['JOBID']
)
self.log("Using path %s for PhantomJS crawl" % self.prefixfiles, log.INFO)
phantom_args = []
if PROXY and not PROXY.startswith(':'):
phantom_args.append('--proxy=%s' % PROXY)
phantom_args.append('--cookies-file=%s-phantomjs-cookie.txt' % self.prefixfiles)
phantom_args.append('--ignore-ssl-errors=true')
phantom_args.append('--load-images=false')
self.capabilities = dict(DesiredCapabilities.PHANTOMJS)
self.capabilities['phantomjs.page.settings.userAgent'] = self.user_agent
self.capabilities['takesScreenshot'] = False
self.capabilities['phantomjs.page.settings.javascriptCanCloseWindows'] = False
self.capabilities['phantomjs.page.settings.javascriptCanOpenWindows'] = False
self.phantom = PhantomJS(
executable_path=PHANTOM['PATH'],
service_args=phantom_args,
desired_capabilities=self.capabilities,
service_log_path="%s-phantomjs.log" % self.prefixfiles
)
self.phantom.implicitly_wait(10)
self.phantom.set_page_load_timeout(60)
self.phantom.set_script_timeout(self.ph_timeout + 15)
def crashed(self, spider):
self.errors += 1
self.closed("CRASH")
def closed(self, reason):
if self.errors:
self.log("%s error%s encountered during the crawl." %
(self.errors, 's' if self.errors > 1 else ''), log.ERROR)
if self.phantom:
self.phantom.quit()
if not self.errors:
for f in ["phantomjs-cookie.txt", "phantomjs.log"]:
fi = "%s-%s" % (self.prefixfiles, f)
if os.path.exists(fi) and not self.errors:
os.remove(fi)
def handle_response(self, response):
lru = url_to_lru_clean(response.url)
if self.phantom:
self.phantom.get(response.url)
# Collect whole DOM of the webpage including embedded iframes
with open(os.path.join(PHANTOM["JS_PATH"], "get_iframes_content.js")) as js:
get_bod_w_iframes = js.read()
bod_w_iframes = self.phantom.execute_script(get_bod_w_iframes)
response._set_body(bod_w_iframes.encode('utf-8'))
# Try to scroll and unfold page
self.log("Start PhantomJS scrolling and unfolding", log.INFO)
with open(os.path.join(PHANTOM["JS_PATH"], "scrolldown_and_unfold.js")) as js:
try:
signal.signal(signal.SIGALRM, timeout_alarm)
signal.alarm(self.ph_timeout + 30)
timedout = self.phantom.execute_async_script(
js.read(), self.ph_timeout,
self.ph_idle_timeout, self.ph_ajax_timeout)
#.........这里部分代码省略.........
开发者ID:SciencesPoDRIS,项目名称:hyphe,代码行数:101,代码来源:pages.py
注:本文中的selenium.webdriver.PhantomJS类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论