I am new to Python and am getting this error:
Traceback (most recent call last):
File "/usr/local/bin/scrapy", line 4, in <module>
execute()
File "/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/scrapy/cmdline.py", line 130, in execute
_run_print_help(parser, _run_command, cmd, args, opts)
File "/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/scrapy/cmdline.py", line 96, in _run_print_help
func(*a, **kw)
File "/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/scrapy/cmdline.py", line 136, in _run_command
cmd.run(args, opts)
File "/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/scrapy/commands/crawl.py", line 42, in run
q = self.crawler.queue
File "/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/scrapy/command.py", line 31, in crawler
self._crawler.configure()
File "/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/scrapy/crawler.py", line 36, in configure
self.spiders = spman_cls.from_settings(self.settings)
File "/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/scrapy/spidermanager.py", line 33, in from_settings
return cls(settings.getlist('SPIDER_MODULES'))
File "/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/scrapy/spidermanager.py", line 23, in __init__
for module in walk_modules(name):
File "/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/scrapy/utils/misc.py", line 65, in walk_modules
submod = __import__(fullpath, {}, {}, [''])
File "/my_crawler/empt/empt/spiders/empt_spider.py", line 59
check_exists_sql = "SELECT * FROM LINKS WHERE link = '%s' LIMIT 1" % item['link']
^
IndentationError: unexpected indent
On this bit of code:
def parse_item(self, response):
hxs = HtmlXPathSelector(response)
sites = hxs.select('//a[contains(@href, ".mp3")]/@href').extract()
items = [ ]
#for site in sites:
#link = site.select('a/@href').extract()
#print site
for site in sites:
item = EmptItem()
item['link'] = site #site.select('a/@href').extract()
#### DB INSERT ATTEMPT ###
#MySQL Test
#open db connection
db = MySQLdb.connect("localhost","root","str0ng","TESTDB")
#prepare a cursor object using cursor() method
cursor = db.cursor()
#see if any links in the DB match the crawled link
check_exists_sql = "SELECT * FROM LINKS WHERE link = '%s' LIMIT 1" % item['link']
cursor.execute(check_exists_sql)
if cursor.rowcount = 0:
#prepare SQL query to insert a record into the db.
sql = "INSERT INTO LINKS ( link ) VALUES ( '%s')" % item['link']
try:
#execute the sql command
cursor.execute(sql)
#commit your changes to the db
db.commit()
except:
#rollback on error
db.rollback()
#fetch a single row using fetchone() method.
#data = cursor.fetchone()
#print "Database version: %s " % data
#disconnect from server
db.close()
### end mysql
items.append(item)
return items?
See Question&Answers more detail:
os 与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…