• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python config.realize函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中seesaw.config.realize函数的典型用法代码示例。如果您正苦于以下问题:Python realize函数的具体用法?Python realize怎么用?Python realize使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了realize函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: data

 def data(self, item):
     data = {
         "downloader": realize(self.downloader, item),
         "api_version": "2"
     }
     if self.version:
         data["version"] = realize(self.version, item)
     return data
开发者ID:ArchiveTeam,项目名称:seesaw-kit,代码行数:8,代码来源:tracker.py


示例2: stdin_data

 def stdin_data(self, item):
     return "".join(
         [
             "%s\n" % os.path.relpath(
                 realize(f, item),
                 realize(self.target_source_path, item)
             )
             for f in realize(self.files, item)
         ]).encode('utf-8')
开发者ID:VADemon,项目名称:seesaw-kit,代码行数:9,代码来源:externalprocess.py


示例3: process

	def process(self, item):
		total_bytes = {}
		for (group, files) in self.file_groups.iteritems():
			total_bytes[group] = sum([ os.path.getsize(f) for f in realize(files, item)])

		stats = {}
		stats.update(self.defaults)
		stats["item"] = item["item_name"]
		stats["bytes"] = total_bytes

		if self.id_function:
			stats["id"] = self.id_function(item)

		item["stats"] = realize(stats, item)
开发者ID:chfoo,项目名称:isohunt-grab,代码行数:14,代码来源:pipeline.py


示例4: process

  def process(self, item):
    with self.task_cwd():
      p = AsyncPopen(
          args=realize(self.args, item),
          env=realize(self.env, item),
          stdin=subprocess.PIPE,
          close_fds=True
      )

      p.on_output += functools.partial(self.on_subprocess_stdout, p, item)
      p.on_end += functools.partial(self.on_subprocess_end, item)

      p.run()

      p.stdin.write(self.stdin_data(item))
      p.stdin.close()
开发者ID:daxelrod,项目名称:seesaw-kit,代码行数:16,代码来源:externalprocess.py


示例5: realize

 def realize(self, item):
     wget_args = [
         WGET_LUA,
         "-U", USER_AGENT,
         "-nv",
         "--lua-script", "furaffinity.lua",
         "-o", ItemInterpolation("%(item_dir)s/wget.log"),
         "--no-check-certificate",
         "--output-document", ItemInterpolation("%(item_dir)s/wget.tmp"),
         "--truncate-output",
         "-e", "robots=off",
         "--rotate-dns",
         "--recursive", "--level=inf",
         "--no-parent",
         "--page-requisites",
         "--timeout", "30",
         "--tries", "inf",
         "--domains", "furaffinity.net",
         "--span-hosts",
         "--waitretry", "30",
         "--warc-file", ItemInterpolation("%(item_dir)s/%(warc_file_base)s"),
         "--warc-header", "operator: Archive Team",
         "--warc-header", "furaffinity-dld-script-version: " + VERSION,
         "--warc-header", ItemInterpolation("furaffinity-user: %(item_name)s"),
     ]
     
     item_name = item['item_name']
     assert ':' in item_name
     item_type, item_value = item_name.split(':', 1)
     
     item['item_type'] = item_type
     item['item_value'] = item_value
     
     assert item_type in ('image', 'imagelogin')
     
     if item_type == 'image':
         suffixesa = string.digits + string.lowercase
         suffixesb = string.digits + string.lowercase
         
         for url in ['http://www.furaffinity.net/view/{0}{1}{2}/'.format(item_value, a, b) for a in suffixesa for b in suffixesb]:
             wget_args.append(url)
         wget_args.extend(["--no-cookies"])
     elif item_type == 'imagelogin'
         suffixesa = string.digits + string.lowercase
         suffixesb = string.digits + string.lowercase
         
         for url in ['http://www.furaffinity.net/view/{0}{1}{2}/'.format(item_value, a, b) for a in suffixesa for b in suffixesb]:
             wget_args.append(url)
         wget_args.extend(["--load-cookies", "cookies.txt"])
     else:
         raise Exception('Unknown item')
     
     if 'bind_address' in globals():
         wget_args.extend(['--bind-address', globals()['bind_address']])
         print('')
         print('*** Wget will bind address at {0} ***'.format(
             globals()['bind_address']))
         print('')
         
     return realize(wget_args, item)
开发者ID:ArchiveTeam,项目名称:furaffinity-grab,代码行数:60,代码来源:pipeline.py


示例6: realize

    def realize(self, item):
        wget_args = [
            WGET_LUA,
            "-U", USER_AGENT,
            "-nv",
            "--lua-script", "rutracker.lua",
            "-o", ItemInterpolation("%(item_dir)s/wget.log"),
            "--no-check-certificate",
            "--output-document", ItemInterpolation("%(item_dir)s/wget.tmp"),
            "--truncate-output",
            "-e", "robots=off",
            "--rotate-dns",
            "--recursive", "--level=inf",
            "--no-parent",
            "--page-requisites",
            "--timeout", "30",
            "--tries", "inf",
            "--domains", "rutracker.org",
            "--span-hosts",
            "--waitretry", "30",
            "--warc-file", ItemInterpolation("%(item_dir)s/%(warc_file_base)s"),
            "--warc-header", "operator: Archive Team",
            "--warc-header", "rutracker-dld-script-version: " + VERSION,
            "--warc-header", ItemInterpolation("rutracker-user: %(item_name)s"),
        ]
        
        item_name = item['item_name']
        assert ':' in item_name
        item_type, item_value = item_name.split(':', 1)
        
        item['item_type'] = item_type
        item['item_value'] = item_value
        
        assert item_type in ('thread', 'forum')

        if item_type == 'thread':
            suffixes = string.digits
            for suffix in suffixes:
                wget_args.append('http://rutracker.org/forum/viewtopic.php?t={0}{1}'.format(item_value, suffix))
                wget_args.append('http://api.rutracker.org/v1/get_peer_stats?by=topic_id&val={0}{1}'.format(item_value, suffix))
                wget_args.append('http://api.rutracker.org/v1/get_tor_hash?by=topic_id&val={0}{1}'.format(item_value, suffix))
                wget_args.append('http://api.rutracker.org/v1/get_tor_topic_data?by=topic_id&val={0}{1}'.format(item_value, suffix))
        elif item_type == 'forum':
            suffixes = string.digits
            for suffix in suffixes:
                wget_args.append('http://rutracker.org/forum/viewforum.php?f={0}{1}'.format(item_value, suffix))
                wget_args.append('http://api.rutracker.org/v1/get_forum_name?by=forum_id&val={0}{1}'.format(item_value, suffix))
                wget_args.append('http://api.rutracker.org/v1/get_forum_data?by=forum_id&val={0}{1}'.format(item_value, suffix))
                wget_args.append('http://api.rutracker.org/v1/static/pvc/f/{0}{1}'.format(item_value, suffix))
        else:
            raise Exception('Unknown item')
        
        if 'bind_address' in globals():
            wget_args.extend(['--bind-address', globals()['bind_address']])
            print('')
            print('*** Wget will bind address at {0} ***'.format(
                globals()['bind_address']))
            print('')
            
        return realize(wget_args, item)
开发者ID:ArchiveTeam,项目名称:rutracker-grab,代码行数:60,代码来源:pipeline.py


示例7: process_body

	def process_body(self, body, item):
		data = json.loads(body)
		if "upload_target" in data:
			files = realize(self.files, item)
			inner_task = None

			if re.match(r"^rsync://", data["upload_target"]):
				item.log_output("Uploading with Rsync to %s" % data["upload_target"])
				inner_task = RsyncUpload(data["upload_target"], files, target_source_path=self.rsync_target_source_path, bwlimit=self.rsync_bwlimit, extra_args=self.rsync_extra_args, max_tries=1)

			elif re.match(r"^https?://", data["upload_target"]):
				item.log_output("Uploading with Curl to %s" % data["upload_target"])

				if len(files) != 1:
					item.log_output("Curl expects to upload a single file.")
					self.fail_item(item)
					return

				inner_task = CurlUpload(data["upload_target"], files[0], self.curl_connect_timeout, self.curl_speed_limit, self.curl_speed_time, max_tries=1)

			else:
				item.log_output("Received invalid upload type.")
				self.fail_item(item)
				return

			inner_task.on_complete_item += self._inner_task_complete_item
			inner_task.on_fail_item += self._inner_task_fail_item
			inner_task.enqueue(item)

		else:
			item.log_output("Tracker did not provide an upload target.")
			self.schedule_retry(item)
开发者ID:chfoo,项目名称:isohunt-grab,代码行数:32,代码来源:pipeline.py


示例8: realize

 def realize(self, item):
     wget_args = [
         WGET_LUA,
         "-U", USER_AGENT,
         "-nv",
         "--lua-script", "gamefront.lua",
         "-o", ItemInterpolation("%(item_dir)s/wget.log"),
         "--no-check-certificate",
         "--output-document", ItemInterpolation("%(item_dir)s/wget.tmp"),
         "--truncate-output",
         "-e", "robots=off",
         "--rotate-dns",
         "--recursive", "--level=inf",
         "--no-parent",
         "--page-requisites",
         "--timeout", "30",
         "--tries", "inf",
         "--domains", "gamefront.com",
         "--span-hosts",
         "--waitretry", "30",
         "--warc-file", ItemInterpolation("%(item_dir)s/%(warc_file_base)s"),
         "--warc-header", "operator: Archive Team",
         "--warc-header", "gamefront-dld-script-version: " + VERSION,
         "--warc-header", ItemInterpolation("gamefront-user: %(item_name)s"),
     ]
     
     item_name = item['item_name']
     assert ':' in item_name
     item_type, item_value = item_name.split(':', 1)
     
     item['item_type'] = item_type
     item['item_value'] = item_value
     
     assert item_type in ('file', 'singlefile')
     
     if item_type == 'file':
         suffixes = string.digits
         for suffix in suffixes:
             wget_args.append('http://www.gamefront.com/files/{0}{1}'.format(item_value, suffix))
     elif item_type == 'singlefile':
         wget_args.append('http://www.gamefront.com/files/{0}'.format(item_value))
         session1 = requests.Session()
         mainpage = session1.get('http://www.gamefront.com/files/' + item_value).text
         if re.search(r"plopMe\('[0-9]+',\s+'[^']+'\)", mainpage):
             plopme = re.search(r"plopMe\('[0-9]+',\s+'([^']+)'\)", mainpage).group(1)
             print('Received token ' + plopme + '.')
             print('Received ' + session1.post('http://www.gamefront.com/files/service/request', data = {'token':plopme}, headers={'referer': 'http://www.gamefront.com/files/' + item_value}).text + '.')
         session1.get('http://www.gamefront.com/files/service/thankyou?id=' + item_value, headers={'referer': 'http://www.gamefront.com/files/' + item_value})
     else:
         raise Exception('Unknown item')
     
     if 'bind_address' in globals():
         wget_args.extend(['--bind-address', globals()['bind_address']])
         print('')
         print('*** Wget will bind address at {0} ***'.format(
             globals()['bind_address']))
         print('')
         
     return realize(wget_args, item)
开发者ID:ArchiveTeam,项目名称:gamefront-grab,代码行数:59,代码来源:pipeline.py


示例9: enqueue

 def enqueue(self, item):
     self.start_item(item)
     item.log_output("Starting %s for %s\n" % (self, item.description()))
     item["tries"] = 1
     item['WgetDownloadMany.urls'] = realize(self.unrealized_urls, item)
     item['WgetDownloadMany.urls_index'] = 0
     item['WgetDownloadMany.current_url'] = None
     self.process(item)
开发者ID:ArchiveTeam,项目名称:puush-grab,代码行数:8,代码来源:pipeline.py


示例10: realize

    def realize(self, item):
        wget_args = [
            WGET_LUA,
            "-U",
            USER_AGENT,
            "-nv",
            "--lua-script",
            "musicbrainz.lua",
            "-o",
            ItemInterpolation("%(item_dir)s/wget.log"),
            "--no-check-certificate",
            "--output-document",
            ItemInterpolation("%(item_dir)s/wget.tmp"),
            "--truncate-output",
            "-e",
            "robots=off",
            "--rotate-dns",
            "--no-parent",
            "--page-requisites",
            "--timeout",
            "30",
            "--tries",
            "inf",
            "--span-hosts",
            "--waitretry",
            "30",
            "--warc-file",
            ItemInterpolation("%(item_dir)s/%(warc_file_base)s"),
            "--warc-header",
            "operator: Archive Team",
            "--warc-header",
            "musicbrainz-dld-script-version: " + VERSION,
            "--warc-header",
            ItemInterpolation("musicbrainz-user: %(item_name)s"),
        ]

        item_name = item["item_name"]
        assert ":" in item_name
        item_sort, item_item, item_file = item_name.split(":", 2)

        item["item_item"] = item_item

        item_list = requests.get("http://archive.org/download/{0}/{1}".format(item_item, item_file))
        if item_list.status_code != 200:
            raise Exception(
                "You received status code %d with URL %s"
                % (item_list.status_code, "https://archive.org/download/{0}/{1}".format(item_item, item_file))
            )
        for url in item_list.text.splitlines():
            wget_args.append("{0}".format(url))

        if "bind_address" in globals():
            wget_args.extend(["--bind-address", globals()["bind_address"]])
            print("")
            print("*** Wget will bind address at {0} ***".format(globals()["bind_address"]))
            print("")

        return realize(wget_args, item)
开发者ID:ArchiveTeam,项目名称:musicbrainz-grab,代码行数:58,代码来源:pipeline.py


示例11: realize

    def realize(self, item):
        wget_args = [
            WGET_LUA,
            "-U", random.choice(USER_AGENTS),
            "-nv",
            "--lua-script", "twitpic-api.lua",
            "-o", ItemInterpolation("%(item_dir)s/wget.log"),
            "--no-check-certificate",
            "--output-document", ItemInterpolation("%(item_dir)s/wget.tmp"),
            "--truncate-output",
            "-e", "robots=off",
            "--no-cookies",
            "--rotate-dns",
            "--recursive", "--level=inf",
            "--no-parent",
            "--page-requisites",
            "--timeout", "30",
            "--tries", "inf",
            "--span-hosts",
            "--waitretry", "30",
            "--domains", "twitpic.com,cloudfront.net,twimg.com,amazonaws.com",
            "--warc-file", ItemInterpolation("%(item_dir)s/%(warc_file_base)s"),
            "--warc-header", "operator: Archive Team",
            "--warc-header", "twitpic-api-dld-script-version: " + VERSION,
            "--warc-header", ItemInterpolation("twitpic-api-user: %(item_name)s"),
            "--header", "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
            "--header", "DNT: 1",
            "--header", random.choice(ACCEPT_LANGUAGE_HEADERS),
        ]
        
        item_name = item['item_name']
        assert ':' in item_name
        item_type, item_value = item_name.split(':', 1)
        
        item['item_type'] = item_type
        item['item_value'] = item_value
        
        assert item_type in ('image', 'user', 'tag', 'event')
        
        if item_type == 'imageapi':
            suffixes = string.digits + string.lowercase

            for args in [(
                          'http://api.twitpic.com/2/media/show.json?id={0}{1}'.format(item_value, s), \
                          'http://api.twitpic.com/2/comments/show.json?media_id={0}{1}&page=1'.format(item_value, s)) for s in suffixes]:
                wget_args.append(args[0])
                wget_args.append(args[1])
        else:
            raise Exception('Unknown item')
        
        if 'bind_address' in globals():
            wget_args.extend(['--bind-address', globals()['bind_address']])
            print('')
            print('*** Wget will bind address at {0} ***'.format(
                globals()['bind_address']))
            print('')

        return realize(wget_args, item)
开发者ID:ArchiveTeam,项目名称:twitpic-api-grab,代码行数:58,代码来源:pipeline.py


示例12: realize

    def realize(self, item):
        wget_args = [
            WGET_LUA,
            "-U", USER_AGENT,
            "-nv",
            "--no-cookies",
            "--lua-script", "portalgraphics.lua",
            "-o", ItemInterpolation("%(item_dir)s/wget.log"),
            "--no-check-certificate",
            "--output-document", ItemInterpolation("%(item_dir)s/wget.tmp"),
            "--truncate-output",
            "-e", "robots=off",
            "--rotate-dns",
            "--recursive", "--level=inf",
            "--no-parent",
            "--page-requisites",
            "--timeout", "30",
            "--tries", "inf",
            "--domains", "portalgraphics.net",
            "--span-hosts",
            "--waitretry", "30",
            "--warc-file", ItemInterpolation("%(item_dir)s/%(warc_file_base)s"),
            "--warc-header", "operator: Archive Team",
            "--warc-header", "portalgraphics-dld-script-version: " + VERSION,
            "--warc-header", ItemInterpolation("portalgraphics-user: %(item_name)s"),
        ]
        
        item_name = item['item_name']
        assert ':' in item_name
        item_type, item_value = item_name.split(':', 2)
        
        item['item_type'] = item_type
        item['item_value'] = item_value
        
        assert item_type in ('image_id', 'user_id')

        if item_type == 'image_id':
            wget_args.append('http://www.portalgraphics.net/pg/illust/?image_id={0}'.format(item_value))
            wget_args.append('http://www.portalgraphics.net/pg/illust/?image_id={0}&lang=ja'.format(item_value))
            wget_args.append('http://www.portalgraphics.net/pg/illust/?image_id={0}&lang=en'.format(item_value))
            wget_args.append('http://www.portalgraphics.net/pg/movie/pg_player/res_movie_data.php?mid={0}'.format(item_value))
            wget_args.append('http://www.portalgraphics.net/pg/movie/pg_player/res_movie_data.php?mid={0}&lang=ja'.format(item_value))
            wget_args.append('http://www.portalgraphics.net/pg/movie/pg_player/res_movie_data.php?mid={0}&lang=en'.format(item_value))
            wget_args.append('http://www.portalgraphics.net/pg/movie/address.php?image%5Fid={0}'.format(item_value))
            wget_args.append('http://www.portalgraphics.net/pg/movie/address.php?image_id={0}'.format(item_value))
        elif item_type == 'user_id':
            wget_args.append('http://portalgraphics.net/pg/profile/?user_id={0}'.format(item_value))
        else:
            raise Exception('Unknown item')
        
        if 'bind_address' in globals():
            wget_args.extend(['--bind-address', globals()['bind_address']])
            print('')
            print('*** Wget will bind address at {0} ***'.format(
                globals()['bind_address']))
            print('')
            
        return realize(wget_args, item)
开发者ID:ArchiveTeam,项目名称:portalgraphics-grab,代码行数:58,代码来源:pipeline.py


示例13: realize

 def realize(self, item):
     wget_args = [
         WGET_LUA,
         "-U", USER_AGENT,
         "-nv",
         "--lua-script", "sourceforge.lua",
         "-o", ItemInterpolation("%(item_dir)s/wget.log"),
         "--no-check-certificate",
         "--output-document", ItemInterpolation("%(item_dir)s/wget.tmp"),
         "--truncate-output",
         "-e", "robots=off",
         "--rotate-dns",
         "--recursive", "--level=inf",
         "--no-parent",
         "--page-requisites",
         "--timeout", "30",
         "--tries", "inf",
         "--domains", "sourceforge.net",
         "--span-hosts",
         "--waitretry", "30",
         "--warc-file", ItemInterpolation("%(item_dir)s/%(warc_file_base)s"),
         "--warc-header", "operator: Archive Team",
         "--warc-header", "sourceforge-dld-script-version: " + VERSION,
         "--warc-header", ItemInterpolation("sourceforge-user: %(item_name)s"),
     ]
     
     item_name = item['item_name']
     assert ':' in item_name
     item_type, item_value = item_name.split(':', 1)
     
     item['item_type'] = item_type
     item['item_value'] = item_value
     
     assert item_type in ('project')
     
     if item_type == 'project':
         wget_args.append('http://sourceforge.net/projects/{0}/'.format(item_value))
         wget_args.append('http://sourceforge.net/projects/{0}/?source=directory'.format(item_value))
         wget_args.append('http://sourceforge.net/projects/{0}/?source=directory-featured'.format(item_value))
         wget_args.append('http://sourceforge.net/projects/{0}/?source=frontpage&position=1'.format(item_value))
         wget_args.append('http://sourceforge.net/projects/{0}/?source=frontpage'.format(item_value))
         wget_args.append('http://sourceforge.net/projects/{0}/'.format(item_value))
         wget_args.append('http://sourceforge.net/p/{0}/'.format(item_value))
         wget_args.append('http://sourceforge.net/rest/p/{0}/'.format(item_value))
         wget_args.append('http://sourceforge.net/rest/p/{0}?doap'.format(item_value))
         wget_args.append('http://{0}.sourceforge.net/'.format(item_value))
     else:
         raise Exception('Unknown item')
     
     if 'bind_address' in globals():
         wget_args.extend(['--bind-address', globals()['bind_address']])
         print('')
         print('*** Wget will bind address at {0} ***'.format(
             globals()['bind_address']))
         print('')
         
     return realize(wget_args, item)
开发者ID:ArchiveTeam,项目名称:sourceforge-grab,代码行数:57,代码来源:pipeline.py


示例14: realize

    def realize(self, item):
        wget_args = [
            WGET_LUA,
            "-U", USER_AGENT,
            "-nv",
            "--no-cookies",
            "--lua-script", "panoramio.lua",
            "-o", ItemInterpolation("%(item_dir)s/wget.log"),
            "--no-check-certificate",
            "--output-document", ItemInterpolation("%(item_dir)s/wget.tmp"),
            "--truncate-output",
            "-e", "robots=off",
            "--rotate-dns",
            "--recursive", "--level=inf",
            "--no-parent",
            "--page-requisites",
            "--timeout", "30",
            "--tries", "inf",
            "--domains", "panoramio.com",
            "--span-hosts",
            "--waitretry", "30",
            "--warc-file", ItemInterpolation("%(item_dir)s/%(warc_file_base)s"),
            "--warc-header", "operator: Archive Team",
            "--warc-header", "panoramio-dld-script-version: " + VERSION,
            "--warc-header", ItemInterpolation("panoramio-item: %(item_name)s"),
        ]
        
        item_name = item['item_name']
        assert ':' in item_name
        item_type, item_value = item_name.split(':', 1)
        
        item['item_type'] = item_type
        item['item_value'] = item_value
        
        assert item_type in ('photos', 'users')

        if item_type == 'photos':
            start, stop = item_value.split('-')
            for i in range(int(start), int(stop)+1):
                wget_args.extend(['--warc-header', 'panoramio-photo: {i}'.format(**locals())])
                wget_args.append('http://www.panoramio.com/photo/{i}'.format(**locals()))
        elif item_type == 'users':
            start, stop = item_value.split('-')
            for i in range(int(start), int(stop)+1):
                wget_args.extend(['--warc-header', 'panoramio-user: {i}'.format(**locals())])
                wget_args.append('http://www.panoramio.com/user/{i}'.format(**locals()))
        else:
            raise Exception('Unknown item')
        
        if 'bind_address' in globals():
            wget_args.extend(['--bind-address', globals()['bind_address']])
            print('')
            print('*** Wget will bind address at {0} ***'.format(
                globals()['bind_address']))
            print('')
            
        return realize(wget_args, item)
开发者ID:ArchiveTeam,项目名称:panoramio-grab,代码行数:57,代码来源:pipeline.py


示例15: realize

    def realize(self, item):
        wget_args = [
            WGET_LUA,
            '-U', USER_AGENT,
            '-nv',
            '--no-cookies',
            '--lua-script', '500px.lua',
            '-o', ItemInterpolation('%(item_dir)s/wget.log'),
            '--no-check-certificate',
            '--output-document', ItemInterpolation('%(item_dir)s/wget.tmp'),
            '--truncate-output',
            '-e', 'robots=off',
            '--rotate-dns',
            '--recursive', '--level=inf',
            '--no-parent',
            '--page-requisites',
            '--timeout', '30',
            '--tries', 'inf',
            '--domains', '500px.com',
            '--span-hosts',
            '--waitretry', '30',
            '--warc-file', ItemInterpolation('%(item_dir)s/%(warc_file_base)s'),
            '--warc-header', 'operator: Archive Team',
            '--warc-header', '500px-dld-script-version: ' + VERSION,
            '--warc-header', ItemInterpolation('500px-item: %(item_name)s'),
        ]

        item_name = item['item_name']
        assert ':' in item_name
        item_type, item_value = item_name.split(':', 1)

        item['item_type'] = item_type
        item['item_value'] = item_value

        if item_type == 'photos':
            for id_ in item_value.split(';'):
                wget_args.extend(['--warc-header', '500px-photo: {}'.format(id_)])
                wget_args.append('https://500px.com/photo/{}'.format(id_))
                wget_args.append('https://api.500px.com/v1/photos/{}/comments?sort=created_at&include_subscription=1&include_flagged=1&nested=1&page=1&rpp=30'.format(id_))
                wget_args.append('https://api.500px.com/v1/photos?image_size%5B%5D=1&image_size%5B%5D=2&image_size%5B%5D=32&image_size%5B%5D=31&image_size%5B%5D=33&image_size%5B%5D=34&image_size%5B%5D=35&image_size%5B%5D=36&image_size%5B%5D=2048&image_size%5B%5D=4&image_size%5B%5D=14&expanded_user_info=true&include_tags=true&include_geo=true&include_equipment_info=true&include_licensing=true&include_releases=true&liked_by=1&following_sample=100&ids={}'.format(id_))
                #wget_args.append('https://api.500px.com/v1/photos/{}/navigation?from=user&formats=jpeg%2Clytro&image_size%5B%5D=1&image_size%5B%5D=2&image_size%5B%5D=32&image_size%5B%5D=31&image_size%5B%5D=33&image_size%5B%5D=34&image_size%5B%5D=35&image_size%5B%5D=36&image_size%5B%5D=2048&image_size%5B%5D=4&image_size%5B%5D=14'.format(id_))
        elif item_type == 'all':
            start, end = item_value.split('-')
            for id_ in range(int(start), int(end)+1):
                wget_args.extend(['--warc-header', '500px-photo: {}'.format(id_)])
                wget_args.append('https://500px.com/photo/{}'.format(id_))
        else:
            raise Exception('Unknown item')

        if 'bind_address' in globals():
            wget_args.extend(['--bind-address', globals()['bind_address']])
            print('')
            print('*** Wget will bind address at {0} ***'.format(
                globals()['bind_address']))
            print('')

        return realize(wget_args, item)
开发者ID:ArchiveTeam,项目名称:500px-grab,代码行数:57,代码来源:pipeline.py


示例16: realize

    def realize(self, item):
        wget_args = [
            WGET_LUA,
            "-U", USER_AGENT,
            "-nv",
            "--lua-script", "yuku.lua",
            "-o", ItemInterpolation("%(item_dir)s/wget.log"),
            "--no-check-certificate",
            "--output-document", ItemInterpolation("%(item_dir)s/wget.tmp"),
            "--truncate-output",
            "-e", "robots=off",
            "--rotate-dns",
            "--recursive", "--level=inf",
            "--no-parent",
            "--no-cookies",
            "--page-requisites",
            "--timeout", "30",
            "--tries", "inf",
            "--domains", "yuku.com",
            "--span-hosts",
            "--waitretry", "30",
            "--warc-file", ItemInterpolation("%(item_dir)s/%(warc_file_base)s"),
            "--warc-header", "operator: Archive Team",
            "--warc-header", "yuku-dld-script-version: " + VERSION,
            "--warc-header", ItemInterpolation("yuku-user: %(item_name)s"),
        ]
        
        item_name = item['item_name']
        assert ':' in item_name
        item_name, item_type, item_value, item_thread = item_name.split(':', 3)
        
        item['item_type'] = item_type
        item['item_value'] = item_value
        item['item_thread'] = item_thread

        # Example item: yuku:10threads:deltasforest29697:17
        
        assert item_type in ('thread', '10threads')

        if item_type == 'thread':
            wget_args.append('http://%s.yuku.com/topic/%s/'%(item_value, item_thread))
        elif item_type == '10threads':
            suffixes = string.digits
            for suffix in suffixes:
                wget_args.append('http://%s.yuku.com/topic/%s%s/'%(item_value, item_thread, suffix))
        else:
            raise Exception('Unknown item')
        
        if 'bind_address' in globals():
            wget_args.extend(['--bind-address', globals()['bind_address']])
            print('')
            print('*** Wget will bind address at {0} ***'.format(
                globals()['bind_address']))
            print('')
            
        return realize(wget_args, item)
开发者ID:ArchiveTeam,项目名称:yuku-grab,代码行数:56,代码来源:pipeline.py


示例17: realize

    def realize(self, item):
        wget_args = [
            WPULL_EXE,
            "-nv",
            "--python-script", "ftp.py",
            "-o", ItemInterpolation("%(item_dir)s/wpull.log"),
            "--no-check-certificate",
            "--database", ItemInterpolation("%(item_dir)s/wpull.db"),
            "--delete-after",
            "--no-robots",
            "--no-cookies",
            "--rotate-dns",
            "--timeout", "60",
            "--tries", "inf",
            "--wait", "0.5",
            "--random-wait",
            "--waitretry", "5",
            "--warc-file", ItemInterpolation("%(item_dir)s/%(warc_file_base)s"),
            "--warc-header", "operator: Archive Team",
            "--warc-header", "ftp-dld-script-version: " + VERSION,
            "--warc-header", ItemInterpolation("ftp-user: %(item_name)s"),
            ]

        item_name = item['item_name']
        assert ':' in item_name
        item_sort, item_item, item_file = item_name.split(':', 2)

        item['item_item'] = item_item

        MAX_SIZE = 10737418240
        
        item_list = requests.get('http://archive.org/download/{0}/{1}'.format(item_item, item_file))
        if item_list.status_code != 200:
            raise Exception('You received status code %d with URL %s'%(item_list.status_code, 'https://archive.org/download/{0}/{1}'.format(item_item, item_file)))
        itemsize = int(re.search(r'ITEM_TOTAL_SIZE: ([0-9]+)', item_list.text).group(1))
        if itemsize > MAX_SIZE:
            raise Exception('Item is %d bytes. This is larger then %d bytes.'%(itemsize, MAX_SIZE))
        for url in item_list.text.splitlines():
            if url.startswith('ftp://'):
                url = url.replace(' ', '%20').replace('&', '&')
                url = urllib.unquote(url)
                if item_item == 'archiveteam_ftp_items_2015120102':
                    url = url.replace('ftp://ftp.research.microsoft.com/downloads/downloads/', 'ftp://ftp.research.microsoft.com/downloads/')
                if '#' in url:
                    raise Exception('%s containes a bad character.'%(url))
                else:
                    wget_args.append("{0}".format(url))

        if 'bind_address' in globals():
            wget_args.extend(['--bind-address', globals()['bind_address']])
            print('')
            print('*** Wget will bind address at {0} ***'.format(
                globals()['bind_address']))
            print('')

        return realize(wget_args, item)
开发者ID:tobbez,项目名称:ftp-grab,代码行数:56,代码来源:pipeline.py


示例18: realize

    def realize(self, item):
        wget_args = [
            WGET_LUA,
            "-U", USER_AGENT,
            "-nv",
            "--lua-script", "canvas.lua",
            "-o", ItemInterpolation("%(item_dir)s/wget.log"),
            "--no-check-certificate",
            "--output-document", ItemInterpolation("%(item_dir)s/wget.tmp"),
            "--truncate-output",
            "-e", "robots=off",
            "--no-cookies",
            "--rotate-dns",
#             "--recursive", "--level=inf",
            "--no-parent",
            "--page-requisites",
            "--timeout", "60",
            "--tries", "inf",
            "--span-hosts",
            "--waitretry", "3600",
            "--domains", "canv.as,drawquest-export.s3-website-us-east-1.amazonaws.com",
            "--warc-file",
                ItemInterpolation("%(item_dir)s/%(warc_file_base)s"),
            "--warc-header", "operator: Archive Team",
            "--warc-header", "canvas-archive-dld-script-version: " + VERSION,
            "--warc-header", ItemInterpolation("canvas-user: %(item_name)s"),
            "--header", "Host: drawquest-export.s3-website-us-east-1.amazonaws.com",
        ]

        item_name = item['item_name']
        item_type, item_value = item_name.split(':', 1)

        item['item_type'] = item_type
        item['item_value'] = item_value

        assert item_type in ('user', 'homepage')

        if item_type == 'user':
            wget_args.append('http://canv.as/{0}/'.format(item_value))
            wget_args.extend(["--recursive", "--level=inf"])

        elif item_type == 'homepage':
            wget_args.append('http://canv.as/')

        else:
            raise Exception('Unknown item')

        if 'bind_address' in globals():
            wget_args.extend(['--bind-address', globals()['bind_address']])
            print('')
            print('*** Wget will bind address at {0} ***'.format(
                globals()['bind_address']))
            print('')

        return realize(wget_args, item)
开发者ID:ArchiveTeam,项目名称:canvas-archive-grab,代码行数:55,代码来源:pipeline.py


示例19: realize

    def realize(self, item):
        wget_args = [
            WGET_LUA,
            "-U", USER_AGENT,
            "-nv",
            "--lua-script", "yahoomaps.lua",
            "-o", ItemInterpolation("%(item_dir)s/wget.log"),
            "--no-check-certificate",
            "--output-document", ItemInterpolation("%(item_dir)s/wget.tmp"),
            "--truncate-output",
            "-e", "robots=off",
            "--rotate-dns",
#            "--recursive", "--level=inf",
            "--no-parent",
#            "--page-requisites",
            "--timeout", "30",
            "--tries", "inf",
            "--domains", "yahoo.com,here.com",
            "--span-hosts",
            "--waitretry", "30",
            "--warc-file", ItemInterpolation("%(item_dir)s/%(warc_file_base)s"),
            "--warc-header", "operator: Archive Team",
            "--warc-header", "sour 

鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python task.SimpleTask类代码示例发布时间:2022-05-27
下一篇:
Python seed_dialog.show_seed_box函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap