scrapy
latest
开始
Scrapy 概览
安装指南
Scrapy 教程
Examples
Basic concepts
Command line tool
Spiders
Selectors
Items
Item Loaders
Scrapy shell
Item Pipeline
Feed exports
Requests and Responses
Link Extractors
Settings
Exceptions
Built-in services
Logging
Stats Collection
Sending e-mail
Telnet Console
Web Service
Solving specific problems
Frequently Asked Questions
Debugging Spiders
Spiders Contracts
Common Practices
Broad Crawls
Using your browser’s Developer Tools for scraping
Debugging memory leaks
Downloading and processing files and images
Deploying Spiders
AutoThrottle extension
Benchmarking
Jobs: pausing and resuming crawls
Extending Scrapy
Architecture overview
Downloader Middleware
Spider Middleware
Extensions
Core API
Signals
Item Exporters
scrapy
Docs
»
Index
Edit on GitHub
Index
_
|
A
|
B
|
C
|
D
|
E
|
F
|
G
|
H
|
I
|
J
|
L
|
M
|
N
|
O
|
P
|
Q
|
R
|
S
|
T
|
U
|
X
_
__nonzero__() (scrapy.selector.Selector method)
A
adapt_response() (scrapy.spiders.XMLFeedSpider method)
add_css() (scrapy.loader.ItemLoader method)
add_value() (scrapy.loader.ItemLoader method)
add_xpath() (scrapy.loader.ItemLoader method)
adjust_request_args() (scrapy.contracts.Contract method)
AjaxCrawlMiddleware (class in scrapy.downloadermiddlewares.ajaxcrawl)
allowed_domains (scrapy.spiders.Spider attribute)
B
BaseItemExporter (class in scrapy.exporters)
body (scrapy.http.Request attribute)
(scrapy.http.Response attribute)
body_as_unicode() (scrapy.http.TextResponse method)
C
clear_stats() (scrapy.statscollectors.StatsCollector method)
close_spider()
(scrapy.statscollectors.StatsCollector method)
closed() (scrapy.spiders.Spider method)
CloseSpider
Compose (class in scrapy.loader.processors)
context (scrapy.loader.ItemLoader attribute)
Contract (class in scrapy.contracts)
CookiesMiddleware (class in scrapy.downloadermiddlewares.cookies)
copy() (scrapy.http.Request method)
(scrapy.http.Response method)
CoreStats (class in scrapy.extensions.corestats)
crawl() (scrapy.crawler.Crawler method)
Crawler (class in scrapy.crawler)
crawler (scrapy.spiders.Spider attribute)
CrawlSpider (class in scrapy.spiders)
css() (scrapy.http.TextResponse method)
(scrapy.selector.Selector method)
(scrapy.selector.SelectorList method)
CSVFeedSpider (class in scrapy.spiders)
CsvItemExporter (class in scrapy.exporters)
custom_settings (scrapy.spiders.Spider attribute)
D
default_input_processor (scrapy.loader.ItemLoader attribute)
default_item_class (scrapy.loader.ItemLoader attribute)
default_output_processor (scrapy.loader.ItemLoader attribute)
default_selector_class (scrapy.loader.ItemLoader attribute)
DefaultHeadersMiddleware (class in scrapy.downloadermiddlewares.defaultheaders)
delimiter (scrapy.spiders.CSVFeedSpider attribute)
DepthMiddleware (class in scrapy.spidermiddlewares.depth)
DontCloseSpider
DownloaderMiddleware (class in scrapy.downloadermiddlewares)
DownloaderStats (class in scrapy.downloadermiddlewares.stats)
DownloadTimeoutMiddleware (class in scrapy.downloadermiddlewares.downloadtimeout)
DropItem
DummyStatsCollector (class in scrapy.statscollectors)
E
encoding (scrapy.exporters.BaseItemExporter attribute)
(scrapy.http.TextResponse attribute)
engine (scrapy.crawler.Crawler attribute)
engine_started() (in module scrapy.signals)
engine_stopped() (in module scrapy.signals)
export_empty_fields (scrapy.exporters.BaseItemExporter attribute)
export_item() (scrapy.exporters.BaseItemExporter method)
extensions (scrapy.crawler.Crawler attribute)
extract() (scrapy.selector.Selector method)
(scrapy.selector.SelectorList method)
F
Field (class in scrapy.item)
fields (scrapy.item.Item attribute)
fields_to_export (scrapy.exporters.BaseItemExporter attribute)
FilesPipeline (class in scrapy.pipelines.files)
find_by_request() (scrapy.loader.SpiderLoader method)
finish_exporting() (scrapy.exporters.BaseItemExporter method)
flags (scrapy.http.Response attribute)
FormRequest (class in scrapy.http)
from_crawler()
(scrapy.downloadermiddlewares.DownloaderMiddleware method)
(scrapy.spidermiddlewares.SpiderMiddleware method)
(scrapy.spiders.Spider method)
from_response() (scrapy.http.FormRequest class method)
from_settings() (scrapy.loader.SpiderLoader method)
(scrapy.mail.MailSender class method)
G
get_collected_values() (scrapy.loader.ItemLoader method)
get_css() (scrapy.loader.ItemLoader method)
get_input_processor() (scrapy.loader.ItemLoader method)
get_media_requests() (scrapy.pipelines.files.FilesPipeline method)
(scrapy.pipelines.images.ImagesPipeline method)
get_oldest() (in module scrapy.utils.trackref)
get_output_processor() (scrapy.loader.ItemLoader method)
get_output_value() (scrapy.loader.ItemLoader method)
get_stats() (scrapy.statscollectors.StatsCollector method)
get_value() (scrapy.loader.ItemLoader method)
(scrapy.statscollectors.StatsCollector method)
get_xpath() (scrapy.loader.ItemLoader method)
H
headers (scrapy.http.Request attribute)
(scrapy.http.Response attribute)
(scrapy.spiders.CSVFeedSpider attribute)
HtmlResponse (class in scrapy.http)
HttpAuthMiddleware (class in scrapy.downloadermiddlewares.httpauth)
HttpCacheMiddleware (class in scrapy.downloadermiddlewares.httpcache)
HttpCompressionMiddleware (class in scrapy.downloadermiddlewares.httpcompression)
HttpErrorMiddleware (class in scrapy.spidermiddlewares.httperror)
HttpProxyMiddleware (class in scrapy.downloadermiddlewares.httpproxy)
I
Identity (class in scrapy.loader.processors)
IgnoreRequest
ImagesPipeline (class in scrapy.pipelines.images)
inc_value() (scrapy.statscollectors.StatsCollector method)
indent (scrapy.exporters.BaseItemExporter attribute)
Item (class in scrapy.item)
item (scrapy.loader.ItemLoader attribute)
item_completed() (scrapy.pipelines.files.FilesPipeline method)
(scrapy.pipelines.images.ImagesPipeline method)
item_dropped() (in module scrapy.signals)
item_error() (in module scrapy.signals)
item_scraped() (in module scrapy.signals)
ItemLoader (class in scrapy.loader)
iter_all() (in module scrapy.utils.trackref)
iterator (scrapy.spiders.XMLFeedSpider attribute)
itertag (scrapy.spiders.XMLFeedSpider attribute)
J
Join (class in scrapy.loader.processors)
JsonItemExporter (class in scrapy.exporters)
JsonLinesItemExporter (class in scrapy.exporters)
L
list() (scrapy.loader.SpiderLoader method)
load() (scrapy.loader.SpiderLoader method)
load_item() (scrapy.loader.ItemLoader method)
log() (scrapy.spiders.Spider method)
logger (scrapy.spiders.Spider attribute)
LogStats (class in scrapy.extensions.logstats)
LxmlLinkExtractor (class in scrapy.linkextractors.lxmlhtml)
M
MailSender (class in scrapy.mail)
MapCompose (class in scrapy.loader.processors)
max_value() (scrapy.statscollectors.StatsCollector method)
MemoryStatsCollector (class in scrapy.statscollectors)
meta (scrapy.http.Request attribute)
(scrapy.http.Response attribute)
MetaRefreshMiddleware (class in scrapy.downloadermiddlewares.redirect)
method (scrapy.http.Request attribute)
min_value() (scrapy.statscollectors.StatsCollector method)
N
name (scrapy.spiders.Spider attribute)
namespaces (scrapy.spiders.XMLFeedSpider attribute)
nested_css() (scrapy.loader.ItemLoader method)
nested_xpath() (scrapy.loader.ItemLoader method)
NotConfigured
NotSupported
O
object_ref (class in scrapy.utils.trackref)
OffsiteMiddleware (class in scrapy.spidermiddlewares.offsite)
open_spider()
(scrapy.statscollectors.StatsCollector method)
P
parse() (scrapy.spiders.Spider method)
parse_node() (scrapy.spiders.XMLFeedSpider method)
parse_row() (scrapy.spiders.CSVFeedSpider method)
parse_start_url() (scrapy.spiders.CrawlSpider method)
PickleItemExporter (class in scrapy.exporters)
post_process() (scrapy.contracts.Contract method)
PprintItemExporter (class in scrapy.exporters)
pre_process() (scrapy.contracts.Contract method)
print_live_refs() (in module scrapy.utils.trackref)
process_exception() (scrapy.downloadermiddlewares.DownloaderMiddleware method)
process_item()
process_request() (scrapy.downloadermiddlewares.DownloaderMiddleware method)
process_response() (scrapy.downloadermiddlewares.DownloaderMiddleware method)
process_results() (scrapy.spiders.XMLFeedSpider method)
process_spider_exception() (scrapy.spidermiddlewares.SpiderMiddleware method)
process_spider_input() (scrapy.spidermiddlewares.SpiderMiddleware method)
process_spider_output() (scrapy.spidermiddlewares.SpiderMiddleware method)
process_start_requests() (scrapy.spidermiddlewares.SpiderMiddleware method)
Q
quotechar (scrapy.spiders.CSVFeedSpider attribute)
R
re() (scrapy.selector.Selector method)
(scrapy.selector.SelectorList method)
RedirectMiddleware (class in scrapy.downloadermiddlewares.redirect)
RefererMiddleware (class in scrapy.spidermiddlewares.referer)
register_namespace() (scrapy.selector.Selector method)
remove_namespaces() (scrapy.selector.Selector method)
replace() (scrapy.http.Request method)
(scrapy.http.Response method)
replace_css() (scrapy.loader.ItemLoader method)
replace_value() (scrapy.loader.ItemLoader method)
replace_xpath() (scrapy.loader.ItemLoader method)
Request (class in scrapy.http)
request (scrapy.http.Response attribute)
request_dropped() (in module scrapy.signals)
request_reached_downloader() (in module scrapy.signals)
request_scheduled() (in module scrapy.signals)
Response (class in scrapy.http)
response_downloaded() (in module scrapy.signals)
response_received() (in module scrapy.signals)
RetryMiddleware (class in scrapy.downloadermiddlewares.retry)
ReturnsContract (class in scrapy.contracts.default)
RobotsTxtMiddleware (class in scrapy.downloadermiddlewares.robotstxt)
Rule (class in scrapy.spiders)
rules (scrapy.spiders.CrawlSpider attribute)
S
ScrapesContract (class in scrapy.contracts.default)
scrapy.contracts (module)
scrapy.contracts.default (module)
scrapy.crawler (module)
scrapy.downloadermiddlewares (module)
scrapy.downloadermiddlewares.ajaxcrawl (module)
scrapy.downloadermiddlewares.cookies (module)
scrapy.downloadermiddlewares.defaultheaders (module)
scrapy.downloadermiddlewares.downloadtimeout (module)
scrapy.downloadermiddlewares.httpauth (module)
scrapy.downloadermiddlewares.httpcache (module)
scrapy.downloadermiddlewares.httpcompression (module)
scrapy.downloadermiddlewares.httpproxy (module)
scrapy.downloadermiddlewares.redirect (module)
scrapy.downloadermiddlewares.retry (module)
scrapy.downloadermiddlewares.robotstxt (module)
scrapy.downloadermiddlewares.stats (module)
scrapy.downloadermiddlewares.useragent (module)
scrapy.exceptions (module)
scrapy.exporters (module)
scrapy.extensions.closespider (module)
scrapy.extensions.closespider.CloseSpider (class in scrapy.extensions.closespider)
scrapy.extensions.corestats (module)
scrapy.extensions.debug (module)
scrapy.extensions.debug.Debugger (class in scrapy.extensions.debug)
scrapy.extensions.debug.StackTraceDump (class in scrapy.extensions.debug)
scrapy.extensions.logstats (module)
scrapy.extensions.memdebug (module)
scrapy.extensions.memdebug.MemoryDebugger (class in scrapy.extensions.memdebug)
scrapy.extensions.memusage (module)
scrapy.extensions.memusage.MemoryUsage (class in scrapy.extensions.memusage)
scrapy.extensions.statsmailer (module)
scrapy.extensions.statsmailer.StatsMailer (class in scrapy.extensions.statsmailer)
scrapy.extensions.telnet (module)
,
[1]
scrapy.extensions.telnet.TelnetConsole (class in scrapy.extensions.telnet)
scrapy.http (module)
scrapy.item (module)
scrapy.linkextractors (module)
scrapy.linkextractors.lxmlhtml (module)
scrapy.loader (module)
,
[1]
scrapy.loader.processors (module)
scrapy.mail (module)
scrapy.pipelines.files (module)
scrapy.pipelines.images (module)
scrapy.selector (module)
scrapy.settings (module)
scrapy.signals (module)
scrapy.spidermiddlewares (module)
scrapy.spidermiddlewares.depth (module)
scrapy.spidermiddlewares.httperror (module)
scrapy.spidermiddlewares.offsite (module)
scrapy.spidermiddlewares.referer (module)
scrapy.spidermiddlewares.urllength (module)
scrapy.spiders (module)
scrapy.statscollectors (module)
,
[1]
scrapy.utils.log (module)
scrapy.utils.trackref (module)
SelectJmes (class in scrapy.loader.processors)
Selector (class in scrapy.selector)
selector (scrapy.http.TextResponse attribute)
(scrapy.loader.ItemLoader attribute)
SelectorList (class in scrapy.selector)
send() (scrapy.mail.MailSender method)
serialize_field() (scrapy.exporters.BaseItemExporter method)
set_stats() (scrapy.statscollectors.StatsCollector method)
set_value() (scrapy.statscollectors.StatsCollector method)
settings (scrapy.crawler.Crawler attribute)
(scrapy.spiders.Spider attribute)
SETTINGS_PRIORITIES (in module scrapy.settings)
signals (scrapy.crawler.Crawler attribute)
sitemap_alternate_links (scrapy.spiders.SitemapSpider attribute)
sitemap_follow (scrapy.spiders.SitemapSpider attribute)
sitemap_rules (scrapy.spiders.SitemapSpider attribute)
sitemap_urls (scrapy.spiders.SitemapSpider attribute)
SitemapSpider (class in scrapy.spiders)
Spider (class in scrapy.spiders)
spider (scrapy.crawler.Crawler attribute)
spider_closed() (in module scrapy.signals)
spider_error() (in module scrapy.signals)
spider_idle() (in module scrapy.signals)
spider_opened() (in module scrapy.signals)
spider_stats (scrapy.statscollectors.MemoryStatsCollector attribute)
SpiderLoader (class in scrapy.loader)
SpiderMiddleware (class in scrapy.spidermiddlewares)
start_exporting() (scrapy.exporters.BaseItemExporter method)
start_requests() (scrapy.spiders.Spider method)
start_urls (scrapy.spiders.Spider attribute)
stats (scrapy.crawler.Crawler attribute)
StatsCollector (class in scrapy.statscollectors)
status (scrapy.http.Response attribute)
T
TakeFirst (class in scrapy.loader.processors)
text (scrapy.http.TextResponse attribute)
TextResponse (class in scrapy.http)
U
update_telnet_vars() (in module scrapy.extensions.telnet)
url (scrapy.http.Request attribute)
(scrapy.http.Response attribute)
UrlContract (class in scrapy.contracts.default)
urljoin() (scrapy.http.Response method)
UrlLengthMiddleware (class in scrapy.spidermiddlewares.urllength)
UserAgentMiddleware (class in scrapy.downloadermiddlewares.useragent)
X
XMLFeedSpider (class in scrapy.spiders)
XmlItemExporter (class in scrapy.exporters)
XmlResponse (class in scrapy.http)
xpath() (scrapy.http.TextResponse method)
(scrapy.selector.Selector method)
(scrapy.selector.SelectorList method)
Read the Docs
v: latest
Versions
latest
Downloads
pdf
htmlzip
epub
On Read the Docs
Project Home
Builds
Free document hosting provided by
Read the Docs
.