• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python user_agent.generate_user_agent函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中user_agent.generate_user_agent函数的典型用法代码示例。如果您正苦于以下问题:Python generate_user_agent函数的具体用法?Python generate_user_agent怎么用?Python generate_user_agent使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了generate_user_agent函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: test_navigator_option

    def test_navigator_option(self):
        for x in range(100):
            ua = generate_user_agent(navigator='firefox')
            self.assertTrue('firefox' in ua.lower())

            ua = generate_user_agent(navigator='chrome')
            self.assertTrue('chrome' in ua.lower())
开发者ID:jamb0ss,项目名称:user_agent,代码行数:7,代码来源:test.py


示例2: test_platform_option_tuple

 def test_platform_option_tuple(self):
     for x in range(100):
         ua = generate_user_agent(platform=('win', 'linux'))
         ua = generate_user_agent(platform=('win', 'linux', 'mac'))
         ua = generate_user_agent(platform=('win',))
         ua = generate_user_agent(platform=('linux',))
         ua = generate_user_agent(platform=('mac',))
开发者ID:alexfalcucc,项目名称:user_agent,代码行数:7,代码来源:test.py


示例3: test_device_type_smartphone_chrome

def test_device_type_smartphone_chrome():
    for _ in range(50):
        agent = generate_user_agent(device_type='smartphone',
                                    navigator='chrome')
        assert 'Mobile' in agent
        agent = generate_user_agent(device_type='tablet', navigator='chrome')
        assert 'Mobile' not in agent
开发者ID:lorien,项目名称:user_agent,代码行数:7,代码来源:user_agent.py


示例4: test_platform_option_tuple

def test_platform_option_tuple():
    for _ in range(50):
        generate_user_agent(os=('win', 'linux'))
        generate_user_agent(os=('win', 'linux', 'mac'))
        generate_user_agent(os=('win',))
        generate_user_agent(os=('linux',))
        generate_user_agent(os=('mac',))
开发者ID:lorien,项目名称:user_agent,代码行数:7,代码来源:user_agent.py


示例5: test_platform_navigator_option

    def test_platform_navigator_option(self):
        for x in range(100):
            ua = generate_user_agent(platform='win', navigator='firefox')
            self.assertTrue('firefox' in ua.lower())
            self.assertTrue('windows' in ua.lower())

            ua = generate_user_agent(platform='win', navigator='chrome')
            self.assertTrue('chrome' in ua.lower())
            self.assertTrue('windows' in ua.lower())
开发者ID:jamb0ss,项目名称:user_agent,代码行数:9,代码来源:test.py


示例6: test_platform_option

def test_platform_option():
    for _ in range(50):
        agent = generate_user_agent(os='linux')
        assert 'linux' in agent.lower()

        agent = generate_user_agent(os='win')
        assert 'windows' in agent.lower()

        agent = generate_user_agent(os='mac')
        assert 'mac' in agent.lower()
开发者ID:lorien,项目名称:user_agent,代码行数:10,代码来源:user_agent.py


示例7: test_navigator_option

def test_navigator_option():
    for _ in range(50):
        agent = generate_user_agent(navigator='firefox')
        assert 'firefox' in agent.lower()

        agent = generate_user_agent(navigator='chrome')
        assert 'chrome' in agent.lower()

        agent = generate_user_agent(navigator='ie')
        assert 'msie' in agent.lower() or 'rv:11' in agent.lower()
开发者ID:lorien,项目名称:user_agent,代码行数:10,代码来源:user_agent.py


示例8: test_platform_option

    def test_platform_option(self):
        for x in range(100):
            ua = generate_user_agent(platform='linux')
            self.assertTrue('linux' in ua.lower())

            ua = generate_user_agent(platform='win')
            self.assertTrue('windows' in ua.lower())

            ua = generate_user_agent(platform='mac')
            self.assertTrue('mac' in ua.lower())

            self.assertRaises(UserAgentRuntimeError,
                              generate_user_agent,
                              platform=11)
开发者ID:alexfalcucc,项目名称:user_agent,代码行数:14,代码来源:test.py


示例9: getheadline

def getheadline(companyName, day, firstlink, prevdatelink):
    '''
    scrap headlines from finance.yahoo.com
    '''
    #date = '2016-02-'+str(day)
    searchUrl = 'http://finance.yahoo.com/q/h?s='+companyName+'&t=2016-04-'+str(day)
    #use fake useragent
    #ua = generate_user_agent()
    
    head = generate_user_agent().encode('ascii', 'ignore')
    headers = {'useragent':head}
    response = requests.get(searchUrl, headers=headers)
    
    soup = BeautifulSoup(response.content, 'html.parser')
    links = soup.select('div.yfi_quote_headline ul > li > a')
    #write the search results in file, a new file for each day
    filename = 'links'+str(day)+'.txt'

    with io.open(filename, encoding='utf-8', mode='w+') as ns:
        count = 1
        for link in links:
            nextlinks = link.get('href')+'\n'
            if count == 1:
                ns.write(nextlinks)
                firstlink = nextlinks
            elif prevdatelink == nextlinks:
                print "All uniques headlines scraped"
                break
            else:
                ns.write(nextlinks)
            count += 1
        ns.close()
    return firstlink
开发者ID:aizaazali,项目名称:StockMarketAnalyzer-Hive_Pig,代码行数:33,代码来源:get_headlines.py


示例10: get_proxies

def get_proxies(proxy_type, ip_set, start_page, end_page):
    """extract proxies from page source code, store them in redis
    
    Args:
        proxy_type (str): base url for proxy type, like the global variables CHINA and OTHER
        ip_set (str): which set should the ips be stored in redis
        start_page (int):  which page to start crawling
        end_page (int): which page to stop crawling
    """
    try:
        conn = get_connection()
    except Exception:
        print 'Error while connecting to redis'
        return
    proxies, curr_proxy =[], None
    for page in xrange(start_page, end_page+1):
        if page % 2 == 0:
            time.sleep(20)
        # get page source code
        headers = {'user-agent': generate_user_agent(), 'referer': 'http://www.xicidaili.com/'}
        text = requests.get(proxy_type+str(page), headers = headers).text
        # extract ips from source code
        soup = BeautifulSoup(text, 'lxml')
        for tr in soup.find_all('tr')[1:]:
            tds = tr.find_all('td')
            #if u'美国' in tds[3].text:
            proxy = tds[1].text+':'+tds[2].text               
            if is_valid('https://www.amazon.com/', proxy):
                conn.sadd(ip_set, proxy)
                print '%s added to ip set %s' %(proxy, ip_set)
开发者ID:bdchinacs,项目名称:AmazonRobot,代码行数:30,代码来源:GetProxy.py


示例11: getBaiduDictCate

def getBaiduDictCate():
    """
    功能:得到百度词库的分类,有三级分类,因为三级分类太细而且较少,所以将三级分类纳入其二级分类
    :return:两个词典,第一个词典记录大类的ID和内容的对应关系,第二个词典记录了第一个词典中每一类大类下的所有分类
    """
    bigCateDict = {}
    smallCateDict ={}
    initPageURL = r'https://shurufa.baidu.com/dict'
    cateBaseURL = r'https://shurufa.baidu.com/dict_list?cid='

    # 防止502错误
    userAgent = generate_user_agent()
    referrer = 'http://shurufa.baidu.com/dict.html'  
    headers = {}
    headers['User-Agent'] = userAgent
    headers['Referer'] = referrer

    # 抓取大类
    try:
        request = urllib2.Request(url=initPageURL, headers=headers)
        response = urllib2.urlopen(request)
        data = response.read()
    except urllib2.HTTPError, e:
        print 'Error while getting the big category,error code:',e.code
        sys.exit()
开发者ID:WuLC,项目名称:ThesaurusSpider,代码行数:25,代码来源:getCategory.py


示例12: getCategoryPages

def getCategoryPages(caterotyID,downloadDIR):
    """通过类别的初始页面得到该类别的总页数,并将所有的页数放到 PAGE_QUEUE 中供所有线程下载

    :param caterotyID: 下载的词库类型的 ID,用于找到正确 url
    :param downloadDIR: 下载词库的存放目录
    :return:
    """
    global CATEID, DOWNLOAD_DIR, PAGE_BASE_URL, THREAD_LOCK
    CATEID = caterotyID
    DOWNLOAD_DIR = downloadDIR
    PAGE_BASE_URL = 'https://shurufa.baidu.com/dict_list?cid=%s' % CATEID
    pagePattern = re.compile(r'page=(\d+)#page')    # 在网页源码找到其他页面的URL的正则表达匹配模式
    
    # 防止502错误
    userAgent = generate_user_agent()
    referrer = 'http://shurufa.baidu.com/dict.html'  
    headers = {}
    headers['User-Agent'] = userAgent
    headers['Referer'] = referrer

    # 找到最大页的页码,然后所有页面就是1到最大页面
    # 可能会返回502,500错误,最多尝试5次
    maxTry = 8
    data = None
    for i in xrange(maxTry):
        try:
            request = urllib2.Request(url=PAGE_BASE_URL, headers=headers)
            response = urllib2.urlopen(request)
            data = response.read()
            break
        except urllib2.HTTPError, e:
            if i == maxTry-1:
                with io.open(DOWNLOAD_LOG.decode('utf8'), mode = 'a', encoding = 'utf8') as f:
                    f.write((str(e.code)+' error while parsing url '+PAGE_BASE_URL+'\n').decode('utf8'))
        except:
开发者ID:WuLC,项目名称:ThesaurusSpider,代码行数:35,代码来源:multiThreadDownload.py


示例13: getarticle

def getarticle(readfile):
    ''' get the article and save it in a different file '''
    try:
        fileopen = open(readfile)
    except IOError:
        print "file " + readfile + " not in the location specified"
        return

    i = 1
    for line in fileopen:
        try:
        	ua = generate_user_agent()
        	head = ua.encode('ascii', 'ignore')
        	headers = {'useragent':head}

        	print "reading article :"
        	print line
        	html = requests.get(line, headers = headers).text
        	tex = fulltext(html)
        	writefile = "201604"+str(j)+"_"+str(i)+".txt"
        	with io.open(writefile, encoding='utf-8', mode='w+') as ns:
        		strng = ' '.join(tex.split())
        		ns.write(strng)
        		ns.close()
        	i = i + 1       	
       	except:
       	    pass
开发者ID:aizaazali,项目名称:StockMarketAnalyzer-Hive_Pig,代码行数:27,代码来源:getarticle.py


示例14: get_address

def get_address(proxy):
    """fetch american address from https://fakena.me/random-real-address/
    
    Args:
        proxy (str): proxy to visit the target site, ip:port
    
    Returns:
        format_addr (str): american address in the form of "address_line # city # state # zip"
    """
    ignore_warnings()
    url = r'https://fakena.me/random-real-address/'
    referer = r'https://fakena.me'
    header = {'user-agent' : generate_user_agent() , 'referer':referer }
    curr_proxy ={
    'http': 'http://%s'%proxy
    }

    text = requests.get(url, headers = header, proxies = curr_proxy).text
    pattern = re.compile('<strong>(.+)<br>(.+)</strong>')
    result = re.findall(pattern, text)
    if result: # sometimes the result is empty
        print result[0][0], result[0][1]
        address_line = result[0][0]
        city, state_zip = result[0][1].split(',')
        state, zip = state_zip.split()
        format_addr = address_line+'#'+city+'#'+state+'#'+zip
        return format_addr
    else:
        return ''
开发者ID:bdchinacs,项目名称:AmazonRobot,代码行数:29,代码来源:GetUserInfo.py


示例15: send_query

    def send_query(self, query):
        # TODO: Randomize query, i.e. remove/change unused arguments to vary query signature
        self.queries_sent += 1
        if self.queries_sent % self.queries_change == 0:
            self.queries_change = randint(3, 13)
            ScholarConf.USER_AGENT = generate_user_agent()

        return super(BibDLQuerier, self).send_query(query)
开发者ID:igsor,项目名称:bibdl,代码行数:8,代码来源:bibdl.py


示例16: on_blocked

    def on_blocked(self):
        ScholarConf.USER_AGENT = generate_user_agent() # Randomize user agent
        self.timeout *= 2.0 # Increase timeout (exponential backoff)

        if self.blocked_cmd is not None:
            status, output = getstatusoutput(self.blocked_cmd)
            if status != 0:
                self.status.error(output)
开发者ID:igsor,项目名称:bibdl,代码行数:8,代码来源:bibdl.py


示例17: get_request

def get_request(url):
    """
    Takes in a url
    Outputs a list of html for each user's posts
    """

    headers = {"User-Agent": generate_user_agent()}
    response = requests.get(url, headers)
    return response
开发者ID:millertracy,项目名称:g-project,代码行数:9,代码来源:mhf_scrape.py


示例18: invoke

  def invoke(self, url):
    headers = {'User-Agent': generate_user_agent()}
    req = requests.get(url, headers= headers)

    soup = BeautifulSoup(req.text, 'lxml') #from_encoding="gb2312")
    books = soup.select("div.book_list > ul > li")

    for book in books:
      self.parse_book(book)
开发者ID:sjtu-cs,项目名称:service-scraper,代码行数:9,代码来源:book_parser.py


示例19: download_images

def download_images(link_file_path, download_dir, log_dir):
    """download images whose links are in the link file
    
    Args:
        link_file_path (str): path of file containing links of images
        download_dir (str): directory to store the downloaded images
    
    Returns:
        None
    """
    print('Start downloading with link file {0}..........'.format(link_file_path))
    if not os.path.exists(log_dir):
        os.makedirs(log_dir)
    main_keyword = link_file_path.split('/')[-1]
    log_file = log_dir + 'download_selenium_{0}.log'.format(main_keyword)
    logging.basicConfig(level=logging.DEBUG, filename=log_file, filemode="a+", format="%(asctime)-15s %(levelname)-8s  %(message)s")
    img_dir = download_dir + main_keyword + '/'
    count = 0
    headers = {}
    if not os.path.exists(img_dir):
        os.makedirs(img_dir)
    # start to download images
    with open(link_file_path, 'r') as rf:
        for link in rf:
            try:
                o = urlparse(link)
                ref = o.scheme + '://' + o.hostname
                #ref = 'https://www.google.com'
                ua = generate_user_agent()
                headers['User-Agent'] = ua
                headers['referer'] = ref
                print('\n{0}\n{1}\n{2}'.format(link.strip(), ref, ua))
                req = urllib.request.Request(link.strip(), headers = headers)
                response = urllib.request.urlopen(req)
                data = response.read()
                file_path = img_dir + '{0}.jpg'.format(count)
                with open(file_path,'wb') as wf:
                    wf.write(data)
                print('Process-{0} download image {1}/{2}.jpg'.format(main_keyword, main_keyword, count))
                count += 1
                if count % 10 == 0:
                    print('Process-{0} is sleeping'.format(main_keyword))
                    time.sleep(5)

            except urllib.error.URLError as e:
                print('URLError')
                logging.error('URLError while downloading image {0}reason:{1}'.format(link, e.reason))
                continue
            except urllib.error.HTTPError as e:
                print('HTTPError')
                logging.error('HTTPError while downloading image {0}http code {1}, reason:{2}'.format(link, e.code, e.reason))
                continue
            except Exception as e:
                print('Unexpected Error')
                logging.error('Unexpeted error while downloading image {0}error type:{1}, args:{2}'.format(link, type(e), e.args))
                continue
开发者ID:linhanquan,项目名称:GoogleImagesDownloader,代码行数:56,代码来源:download_with_selenium.py


示例20: download_with_time_limit

def download_with_time_limit(link_file_path, download_dir, log_dir, limit_time = 10):
    main_keyword = link_file_path.split('/')[-1]
    if not os.path.exists(log_dir):
        os.makedirs(log_dir)
    log_file = log_dir + 'download_selenium_{0}.log'.format(main_keyword)
    logging.basicConfig(level = logging.DEBUG, filename = log_file, filemode = "a+", format = "%(asctime)-15s %(levelname)-8s  %(message)s")
    img_dir = download_dir + main_keyword + '/'
    count = 0
    headers = {}
    if not os.path.exists(img_dir):
        os.makedirs(img_dir)
    signal.signal(signal.SIGALRM, handler)
    with open(link_file_path, 'r') as rf:
        for link in rf:
            try:
                ref = 'https://www.google.com'
                o = urlparse(link)
                ref = o.scheme + '://' + o.hostname
                ua = generate_user_agent()
                headers['User-Agent'] = ua
                headers['referer'] = ref

                # limit the time of downloading a image
                try:
                    signal.alarm(limit_time) # set a timeout(alarm)
                    req = urllib.request.Request(link.strip(), headers = headers)
                    response = urllib.request.urlopen(req)
                    data = response.read()
                except TimeLimitError as e:
                    print('TimeLimitError: process-{0} encounters {1}'.format(main_keyword, e.value))
                    logging.error('TimeLimitError while downloading image{0}'.format(link))
                    continue
                finally:
                    signal.alarm(0) # disable the alarm

                file_path = img_dir + '{0}.jpg'.format(count)
                with open(file_path,'wb') as wf:
                    wf.write(data)
                print('Process-{0} download image {1}/{2}.jpg'.format(main_keyword, main_keyword, count))
                count += 1
                if count % 10 == 0:
                    print('Process-{0} is sleeping'.format(main_keyword))
                    time.sleep(5)
            except urllib.error.HTTPError as e:
                print('HTTPError')
                logging.error('HTTPError while downloading image {0}http code {1}, reason:{2}'.format(link, e.code, e.reason))
                continue
            except urllib.error.URLError as e:
                print('URLError')
                logging.error('URLError while downloading image {0}reason:{1}'.format(link, e.reason))
                continue
            except Exception as e:
                print('Unexpected Error')
                logging.error('Unexpeted error while downloading image {0}error type:{1}, args:{2}'.format(link, type(e), e.args))
                continue
开发者ID:WuLC,项目名称:GoogleImagesDownloader,代码行数:55,代码来源:download_images_with_time_limit.py



注:本文中的user_agent.generate_user_agent函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python user_agents.parse函数代码示例发布时间:2022-05-27
下一篇:
Python userentitymanager.UserEntityManager类代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap