Index _ | A | B | C | D | E | F | G | I | K | L | M | N | O | P | R | S | T | U | X _ __post_init__() (generic.items.ArticleItem method) __repr__() (generic.items.FileItem method) _clean() (generic.utils.text_parser.ArticleTextParser method) _find_and_request_sources() (generic.mixins.read_more.ReadMoreMixin method) _find_next_page_link() (generic.mixins.read_more.ReadMoreMixin method) _find_read_more_link() (generic.mixins.read_more.ReadMoreMixin method) _find_source_links() (generic.mixins.read_more.ReadMoreMixin method) _generate_feed() (generic.spiders.feed.FeedSpider method) _load_config() (generic.spiders.feed.FeedSpider method) _merge_article_body() (generic.mixins.read_more.ReadMoreMixin method) _remove_element() (generic.utils.text_parser.ArticleTextParser method) _request_next_source() (generic.mixins.read_more.ReadMoreMixin method) A acquired_time (generic.items.ArticleItem attribute) (generic.items.FileItem attribute) ADDONS (in module generic.settings) allowed_domains (generic.spiders.archive.ArchiveSpider attribute) (generic.spiders.base.GenericSpider attribute) (generic.spiders.read_more.ReadMoreSpider attribute) analyze_text_with_spacy() (in module generic.utils) archive_article_xpath (generic.spiders.archive.ArchiveSpiderConfig attribute) archive_next_xpath (generic.spiders.archive.ArchiveSpiderConfig attribute) ArchiveSpider (class in generic.spiders.archive) ArchiveSpiderConfig (class in generic.spiders.archive) args (generic.mixins.read_more.ReadMoreCompatible attribute) (generic.runner.SpiderRunnerConfig attribute) ArticleItem (class in generic.items) ArticleTextParser (class in generic.utils.text_parser) author (generic.items.ArticleItem attribute) B body (generic.items.ArticleItem attribute) BOT_NAME (in module generic.settings) C character_count (generic.items.ArticleItem attribute) CleanSentencesPipeline (class in generic.pipelines) close_spider() (generic.pipelines.SpacyTokenizePipeline method) CONCURRENT_ITEMS (in module generic.settings) CONCURRENT_REQUESTS_PER_DOMAIN (in module generic.settings) config (generic.spiders.feed.FeedSpiderConfig attribute) content (generic.items.FeedItem attribute) (generic.items.FileItem attribute) convert_to_string() (generic.spiders.base.GenericSpiderConfig class method) count_xml_character() (in module generic.utils) custom_settings (generic.spiders.directory.DirectorySpider attribute) (generic.spiders.feed.FeedSpider attribute) (generic.spiders.generic_sitemap.GenericSitemapSpider attribute) D description (generic.items.ArticleItem attribute) DirectorySpider (class in generic.spiders.directory) DOWNLOAD_DELAY (in module generic.settings) DOWNLOAD_HANDLERS (in module generic.settings) DOWNLOADER_MIDDLEWARES (in module generic.settings) DropMissingTextPipeline (class in generic.pipelines) E extract_article() (in module generic.utils) extract_file_download_hrefs() (generic.mixins.file_downloader.FileDownloaderMixin method) F Feed (class in generic.spiders.feed) feed_config (generic.spiders.feed.FeedSpiderConfig attribute) FEED_EXPORT_ENCODING (in module generic.settings) feed_type (generic.spiders.feed.FeedConfig attribute) FeedConfig (class in generic.spiders.feed) FeedEntry (class in generic.spiders.feed) FeedItem (class in generic.items) FeedSpider (class in generic.spiders.feed) FeedSpiderConfig (class in generic.spiders.feed) FeedStoragePipeline (class in generic.pipelines) file_name (generic.items.FeedItem attribute) (generic.spiders.feed.FeedConfig attribute) file_regexp (generic.mixins.file_downloader.FileDownloaderMixinConfig attribute) FileDownloaderMixin (class in generic.mixins.file_downloader) FileDownloaderMixinConfig (class in generic.mixins.file_downloader) FileDownloadSpider (class in generic.spiders.file_download) FileDownloadSpiderConfig (class in generic.spiders.file_download) FileItem (class in generic.items) FileItemPipeline (class in generic.pipelines) FileItemStoragePipeline (class in generic.pipelines) filename (generic.items.FileItem attribute) from_crawler() (generic.middlewares.GenericDownloaderMiddleware class method) (generic.middlewares.GenericSpiderMiddleware class method) (generic.pipelines.SpacyTokenizePipeline class method) from_response() (generic.items.ArticleItem class method) G generate_hashed_filename() (in module generic.utils) generated_at (generic.items.FeedItem attribute) generic module generic.items module generic.middlewares module generic.mixins module generic.mixins.file_downloader module generic.mixins.read_more module generic.pipelines module generic.runner module generic.settings module generic.spiders module generic.spiders.archive module generic.spiders.base module generic.spiders.directory module generic.spiders.feed module generic.spiders.file_download module generic.spiders.generic_sitemap module generic.spiders.read_more module generic.spiders.xml module generic.utils module generic.utils.text_parser module GenericDownloaderMiddleware (class in generic.middlewares) GenericPipeline (class in generic.pipelines) GenericSitemapSpider (class in generic.spiders.generic_sitemap) GenericSitemapSpiderConfig (class in generic.spiders.generic_sitemap) GenericSpider (class in generic.spiders.base) GenericSpiderConfig (class in generic.spiders.base) GenericSpiderMiddleware (class in generic.middlewares) get_config_class() (generic.spiders.archive.ArchiveSpider class method) (generic.spiders.base.GenericSpider class method) (generic.spiders.feed.FeedSpider class method) (generic.spiders.file_download.FileDownloadSpider class method) (generic.spiders.generic_sitemap.GenericSitemapSpider class method) (generic.spiders.read_more.ReadMoreSpider class method) (generic.spiders.xml.XmlSpider class method) get_json_ld() (generic.items.ArticleItem static method) get_meta_property() (in module generic.utils) get_metadata() (in module generic.utils) get_uniform_metadata() (in module generic.utils) get_url_without_fragment() (in module generic.utils) I id (generic.spiders.feed.Feed attribute) (generic.spiders.feed.FeedEntry attribute) idn2ascii() (in module generic.utils) is_file_url() (in module generic.utils) is_path_matched() (in module generic.utils) ITEM_PIPELINES (in module generic.settings) item_type (generic.items.ArticleItem attribute) K kind (generic.items.ArticleItem attribute) L lang (generic.items.ArticleItem attribute) (generic.spiders.feed.Feed attribute) link (generic.spiders.feed.FeedEntry attribute) logger (generic.mixins.read_more.ReadMoreCompatible attribute) M metadata (generic.items.FileItem attribute) modified_time (generic.items.ArticleItem attribute) module generic generic.items generic.middlewares generic.mixins generic.mixins.file_downloader generic.mixins.read_more generic.pipelines generic.runner generic.settings generic.spiders generic.spiders.archive generic.spiders.base generic.spiders.directory generic.spiders.feed generic.spiders.file_download generic.spiders.generic_sitemap generic.spiders.read_more generic.spiders.xml generic.utils generic.utils.text_parser N name (generic.spiders.archive.ArchiveSpider attribute) (generic.spiders.directory.DirectorySpider attribute) (generic.spiders.feed.FeedSpider attribute) (generic.spiders.file_download.FileDownloadSpider attribute) (generic.spiders.generic_sitemap.GenericSitemapSpider attribute) (generic.spiders.read_more.ReadMoreSpider attribute) (generic.spiders.xml.XmlSpider attribute) NEWSPIDER_MODULE (in module generic.settings) O output_dir (generic.items.FileItem attribute) (generic.mixins.file_downloader.FileDownloaderMixinConfig attribute) P parse() (generic.spiders.feed.FeedSpider method) (generic.spiders.generic_sitemap.GenericSitemapSpider method) (generic.spiders.read_more.ReadMoreSpider method) (generic.utils.text_parser.ArticleTextParser method) parse_archive_index() (generic.spiders.archive.ArchiveSpider method) parse_article() (generic.mixins.read_more.ReadMoreMixin method) parse_body() (generic.spiders.directory.DirectorySpider method) parse_content() (generic.spiders.xml.XmlSpider method) parse_file_download_file() (generic.mixins.file_downloader.FileDownloaderMixin method) parse_file_download_page() (generic.mixins.file_downloader.FileDownloaderMixin method) parse_page() (generic.spiders.file_download.FileDownloadSpider method) parse_source_only() (generic.mixins.read_more.ReadMoreMixin method) parse_summary_page() (generic.mixins.read_more.ReadMoreMixin method) parse_xml() (generic.spiders.xml.XmlSpider method) path_regexp (generic.spiders.file_download.FileDownloadSpiderConfig attribute) process_exception() (generic.middlewares.GenericDownloaderMiddleware method) process_item() (generic.pipelines.CleanSentencesPipeline method) (generic.pipelines.DropMissingTextPipeline method) (generic.pipelines.FeedStoragePipeline method) (generic.pipelines.FileItemPipeline method) (generic.pipelines.FileItemStoragePipeline method) (generic.pipelines.GenericPipeline method) (generic.pipelines.SpacyTokenizePipeline method) process_pdf_item() (generic.pipelines.FileItemPipeline method) process_request() (generic.middlewares.GenericDownloaderMiddleware method) process_response() (generic.middlewares.GenericDownloaderMiddleware method) process_spider_exception() (generic.middlewares.GenericSpiderMiddleware method) process_spider_input() (generic.middlewares.GenericSpiderMiddleware method) process_spider_output() (generic.middlewares.GenericSpiderMiddleware method) process_start() (generic.middlewares.GenericSpiderMiddleware method) published_time (generic.items.ArticleItem attribute) R read_more (generic.mixins.read_more.ReadMoreMixinConfig attribute) read_more_xpath (generic.mixins.read_more.ReadMoreMixinConfig attribute) read_next (generic.mixins.read_more.ReadMoreMixinConfig attribute) read_next_contains (generic.mixins.read_more.ReadMoreMixinConfig attribute) ReadMoreCompatible (class in generic.mixins.read_more) ReadMoreMixin (class in generic.mixins.read_more) ReadMoreMixinConfig (class in generic.mixins.read_more) ReadMoreSpider (class in generic.spiders.read_more) ReadMoreSpiderConfig (class in generic.spiders.read_more) ROBOTSTXT_OBEY (in module generic.settings) run() (generic.runner.SpiderRunner method) S segment() (generic.utils.text_parser.ArticleTextParser method) sentences (generic.items.ArticleItem attribute) site_name (generic.items.ArticleItem attribute) sitemap_filter() (generic.spiders.generic_sitemap.GenericSitemapSpider method) sitemap_filter_all() (generic.spiders.generic_sitemap.GenericSitemapSpider method) sitemap_filter_wordpress() (generic.spiders.generic_sitemap.GenericSitemapSpider method) sitemap_type (generic.spiders.generic_sitemap.GenericSitemapSpiderConfig attribute) sitemap_urls (generic.spiders.generic_sitemap.GenericSitemapSpider attribute) source_contains (generic.mixins.read_more.ReadMoreMixinConfig attribute) source_parent_contains (generic.mixins.read_more.ReadMoreMixinConfig attribute) sources (generic.items.ArticleItem attribute) SPACY_API_URL (in module generic.settings) SpacyTokenizePipeline (class in generic.pipelines) spider (generic.runner.SpiderRunnerConfig attribute) SPIDER_MODULES (in module generic.settings) spider_opened() (generic.middlewares.GenericDownloaderMiddleware method) (generic.middlewares.GenericSpiderMiddleware method) SpiderRunner (class in generic.runner) SpiderRunnerConfig (class in generic.runner) split_urls() (generic.spiders.base.GenericSpiderConfig class method) start() (generic.spiders.archive.ArchiveSpider method) (generic.spiders.feed.FeedSpider method) (generic.spiders.file_download.FileDownloadSpider method) (generic.spiders.read_more.ReadMoreSpider method) (generic.spiders.xml.XmlSpider method) start_urls (generic.spiders.archive.ArchiveSpider attribute) str_to_isoformat() (in module generic.utils) T T (in module generic.spiders.base) title (generic.items.ArticleItem attribute) (generic.spiders.feed.Feed attribute) (generic.spiders.feed.FeedEntry attribute) tokens (generic.items.ArticleItem attribute) tokens_include_predicate() (in module generic.utils) type (generic.spiders.feed.Feed attribute) U url (generic.items.ArticleItem attribute) (generic.items.FeedItem attribute) (generic.items.FileItem attribute) urls (generic.spiders.base.GenericSpiderConfig attribute) USER_AGENT (in module generic.settings) uuid (generic.items.ArticleItem attribute) X xml_link_xpath (generic.spiders.xml.XmlSpiderConfig attribute) XmlSpider (class in generic.spiders.xml) XmlSpiderConfig (class in generic.spiders.xml) xpath_href (generic.spiders.feed.FeedConfig attribute) xpath_title (generic.spiders.feed.FeedConfig attribute)