[docs]classApifyDatasetLoader(BaseLoader,BaseModel):"""Load datasets from `Apify` web scraping, crawling, and data extraction platform. For details, see https://docs.apify.com/platform/integrations/langchain Example: .. code-block:: python from langchain_community.document_loaders import ApifyDatasetLoader from langchain_core.documents import Document loader = ApifyDatasetLoader( dataset_id="YOUR-DATASET-ID", dataset_mapping_function=lambda dataset_item: Document( page_content=dataset_item["text"], metadata={"source": dataset_item["url"]} ), ) documents = loader.load() """# noqa: E501apify_client:Any"""An instance of the ApifyClient class from the apify-client Python package."""dataset_id:str"""The ID of the dataset on the Apify platform."""dataset_mapping_function:Callable[[Dict],Document]"""A custom function that takes a single dictionary (an Apify dataset item) and converts it to an instance of the Document class."""def__init__(self,dataset_id:str,dataset_mapping_function:Callable[[Dict],Document]):"""Initialize the loader with an Apify dataset ID and a mapping function. Args: dataset_id (str): The ID of the dataset on the Apify platform. dataset_mapping_function (Callable): A function that takes a single dictionary (an Apify dataset item) and converts it to an instance of the Document class. """super().__init__(dataset_id=dataset_id,dataset_mapping_function=dataset_mapping_function)@root_validator(pre=True)defvalidate_environment(cls,values:Dict)->Dict:"""Validate environment. Args: values: The values to validate. """try:fromapify_clientimportApifyClientclient=ApifyClient()ifhttpx_client:=getattr(client.http_client,"httpx_client"):httpx_client.headers["user-agent"]+="; Origin/langchain"values["apify_client"]=clientexceptImportError:raiseImportError("Could not import apify-client Python package. ""Please install it with `pip install apify-client`.")returnvalues