Source code for langchain_community.document_loaders.arcgis_loader
"""Document Loader for ArcGIS FeatureLayers."""from__future__importannotationsimportjsonimportreimportwarningsfromdatetimeimportdatetime,timezonefromtypingimportTYPE_CHECKING,Any,Iterator,List,Optional,Unionfromlangchain_core.documentsimportDocumentfromlangchain_community.document_loaders.baseimportBaseLoaderifTYPE_CHECKING:importarcgis_NOT_PROVIDED="(Not Provided)"
[docs]classArcGISLoader(BaseLoader):"""Load records from an ArcGIS FeatureLayer."""
[docs]def__init__(self,layer:Union[str,arcgis.features.FeatureLayer],gis:Optional[arcgis.gis.GIS]=None,where:str="1=1",out_fields:Optional[Union[List[str],str]]=None,return_geometry:bool=False,result_record_count:Optional[int]=None,lyr_desc:Optional[str]=None,**kwargs:Any,):try:importarcgisexceptImportErrorase:raiseImportError("arcgis is required to use the ArcGIS Loader. ""Install it with pip or conda.")frometry:frombs4importBeautifulSoupself.BEAUTIFULSOUP=BeautifulSoupexceptImportError:warnings.warn("BeautifulSoup not found. HTML will not be parsed.")self.BEAUTIFULSOUP=Noneself.gis=gisorarcgis.gis.GIS()ifisinstance(layer,str):self.url=layerself.layer=arcgis.features.FeatureLayer(layer,gis=gis)else:self.url=layer.urlself.layer=layerself.layer_properties=self._get_layer_properties(lyr_desc)self.where=whereifisinstance(out_fields,str):self.out_fields=out_fieldselifout_fieldsisNone:self.out_fields="*"else:self.out_fields=",".join(out_fields)self.return_geometry=return_geometryself.result_record_count=result_record_countself.return_all_records=notisinstance(result_record_count,int)query_params=dict(where=self.where,out_fields=self.out_fields,return_geometry=self.return_geometry,return_all_records=self.return_all_records,result_record_count=self.result_record_count,)query_params.update(kwargs)self.query_params=query_params
def_get_layer_properties(self,lyr_desc:Optional[str]=None)->dict:"""Get the layer properties from the FeatureLayer."""importarcgislayer_number_pattern=re.compile(r"/\d+$")props=self.layer.propertiesiflyr_descisNone:# retrieve description from the FeatureLayer if not providedtry:ifself.BEAUTIFULSOUP:lyr_desc=self.BEAUTIFULSOUP(props["description"]).textelse:lyr_desc=props["description"]lyr_desc=lyr_descor_NOT_PROVIDEDexceptKeyError:lyr_desc=_NOT_PROVIDEDtry:item_id=props["serviceItemId"]item=self.gis.content.get(item_id)orarcgis.features.FeatureLayer(re.sub(layer_number_pattern,"",self.url),)try:raw_desc=item.descriptionexceptAttributeError:raw_desc=item.properties.descriptionifself.BEAUTIFULSOUP:item_desc=self.BEAUTIFULSOUP(raw_desc).textelse:item_desc=raw_descitem_desc=item_descor_NOT_PROVIDEDexceptKeyError:item_desc=_NOT_PROVIDEDreturn{"layer_description":lyr_desc,"item_description":item_desc,"layer_properties":props,}
[docs]deflazy_load(self)->Iterator[Document]:"""Lazy load records from FeatureLayer."""query_response=self.layer.query(**self.query_params)features=(feature.as_dictforfeatureinquery_response)forfeatureinfeatures:attributes=feature["attributes"]page_content=json.dumps(attributes)metadata={"accessed":f"{datetime.now(timezone.utc).isoformat()}Z","name":self.layer_properties["layer_properties"]["name"],"url":self.url,"layer_description":self.layer_properties["layer_description"],"item_description":self.layer_properties["item_description"],"layer_properties":self.layer_properties["layer_properties"],}ifself.return_geometry:try:metadata["geometry"]=feature["geometry"]exceptKeyError:warnings.warn("Geometry could not be retrieved from the feature layer.")yieldDocument(page_content=page_content,metadata=metadata)