:py:mod:`generic.utils`
=======================

.. py:module:: generic.utils

.. autodoc2-docstring:: generic.utils
   :allowtitles:

Submodules
----------

.. toctree::
   :titlesonly:
   :maxdepth: 1

   generic.utils.text_parser

Package Contents
----------------

Functions
~~~~~~~~~

.. list-table::
   :class: autosummary longtable
   :align: left

   * - :py:obj:`get_meta_property <generic.utils.get_meta_property>`
     - .. autodoc2-docstring:: generic.utils.get_meta_property
          :summary:
   * - :py:obj:`extract_article <generic.utils.extract_article>`
     - .. autodoc2-docstring:: generic.utils.extract_article
          :summary:
   * - :py:obj:`idn2ascii <generic.utils.idn2ascii>`
     - .. autodoc2-docstring:: generic.utils.idn2ascii
          :summary:
   * - :py:obj:`get_uniform_metadata <generic.utils.get_uniform_metadata>`
     - .. autodoc2-docstring:: generic.utils.get_uniform_metadata
          :summary:
   * - :py:obj:`str_to_isoformat <generic.utils.str_to_isoformat>`
     - .. autodoc2-docstring:: generic.utils.str_to_isoformat
          :summary:
   * - :py:obj:`get_metadata <generic.utils.get_metadata>`
     - .. autodoc2-docstring:: generic.utils.get_metadata
          :summary:
   * - :py:obj:`count_xml_character <generic.utils.count_xml_character>`
     - .. autodoc2-docstring:: generic.utils.count_xml_character
          :summary:
   * - :py:obj:`generate_hashed_filename <generic.utils.generate_hashed_filename>`
     - .. autodoc2-docstring:: generic.utils.generate_hashed_filename
          :summary:
   * - :py:obj:`is_path_matched <generic.utils.is_path_matched>`
     - .. autodoc2-docstring:: generic.utils.is_path_matched
          :summary:
   * - :py:obj:`is_file_url <generic.utils.is_file_url>`
     - .. autodoc2-docstring:: generic.utils.is_file_url
          :summary:
   * - :py:obj:`get_url_without_fragment <generic.utils.get_url_without_fragment>`
     - .. autodoc2-docstring:: generic.utils.get_url_without_fragment
          :summary:
   * - :py:obj:`analyze_text_with_spacy <generic.utils.analyze_text_with_spacy>`
     - .. autodoc2-docstring:: generic.utils.analyze_text_with_spacy
          :summary:
   * - :py:obj:`tokens_include_predicate <generic.utils.tokens_include_predicate>`
     - .. autodoc2-docstring:: generic.utils.tokens_include_predicate
          :summary:

API
~~~

.. py:function:: get_meta_property(response: scrapy.http.Response, name: str) -> str
   :canonical: generic.utils.get_meta_property

   .. autodoc2-docstring:: generic.utils.get_meta_property

.. py:function:: extract_article(res: scrapy.http.Response) -> dict
   :canonical: generic.utils.extract_article

   .. autodoc2-docstring:: generic.utils.extract_article

.. py:function:: idn2ascii(url_str: str) -> str
   :canonical: generic.utils.idn2ascii

   .. autodoc2-docstring:: generic.utils.idn2ascii

.. py:function:: get_uniform_metadata(html: str, base_url: str)
   :canonical: generic.utils.get_uniform_metadata

   .. autodoc2-docstring:: generic.utils.get_uniform_metadata

.. py:function:: str_to_isoformat(string: str)
   :canonical: generic.utils.str_to_isoformat

   .. autodoc2-docstring:: generic.utils.str_to_isoformat

.. py:function:: get_metadata(res: scrapy.http.Response) -> dict
   :canonical: generic.utils.get_metadata

   .. autodoc2-docstring:: generic.utils.get_metadata

.. py:function:: count_xml_character(xml_string: str) -> int
   :canonical: generic.utils.count_xml_character

   .. autodoc2-docstring:: generic.utils.count_xml_character

.. py:function:: generate_hashed_filename(url, domain_size: int = 8, url_size: int = 32, max_len: int = 255) -> str
   :canonical: generic.utils.generate_hashed_filename

   .. autodoc2-docstring:: generic.utils.generate_hashed_filename

.. py:function:: is_path_matched(url: str, regexp: str) -> bool
   :canonical: generic.utils.is_path_matched

   .. autodoc2-docstring:: generic.utils.is_path_matched

.. py:function:: is_file_url(url: str, regexp: str = '(?:/|\\.html?|\\.php|\\.aspx?|/[^./]+)$') -> bool
   :canonical: generic.utils.is_file_url

   .. autodoc2-docstring:: generic.utils.is_file_url

.. py:function:: get_url_without_fragment(url_string: str) -> str
   :canonical: generic.utils.get_url_without_fragment

   .. autodoc2-docstring:: generic.utils.get_url_without_fragment

.. py:function:: analyze_text_with_spacy(client: httpx.AsyncClient, text: str, url: str)
   :canonical: generic.utils.analyze_text_with_spacy
   :async:

   .. autodoc2-docstring:: generic.utils.analyze_text_with_spacy

.. py:function:: tokens_include_predicate(tokens)
   :canonical: generic.utils.tokens_include_predicate

   .. autodoc2-docstring:: generic.utils.tokens_include_predicate