[docs]def__init__(self,data_frame:Any,page_content_column:str="geometry"):"""Initialize with geopandas Dataframe. Args: data_frame: geopandas DataFrame object. page_content_column: Name of the column containing the page content. Defaults to "geometry". """try:importgeopandasasgpdexceptImportError:raiseImportError("geopandas package not found, please install it with ""`pip install geopandas`")ifnotisinstance(data_frame,gpd.GeoDataFrame):raiseValueError(f"Expected data_frame to be a gpd.GeoDataFrame, got {type(data_frame)}")ifpage_content_columnnotindata_frame.columns:raiseValueError(f"Expected data_frame to have a column named {page_content_column}")ifnotisinstance(data_frame[page_content_column],gpd.GeoSeries):raiseValueError(f"Expected data_frame[{page_content_column}] to be a GeoSeries")self.data_frame=data_frameself.page_content_column=page_content_column
[docs]deflazy_load(self)->Iterator[Document]:"""Lazy load records from dataframe."""# assumes all geometries in GeoSeries are same CRS and Geom Typecrs_str=self.data_frame.crs.to_string()ifself.data_frame.crselseNonegeometry_type=self.data_frame.geometry.geom_type.iloc[0]for_,rowinself.data_frame.iterrows():geom=row[self.page_content_column]xmin,ymin,xmax,ymax=geom.boundsmetadata=row.to_dict()metadata["crs"]=crs_strmetadata["geometry_type"]=geometry_typemetadata["xmin"]=xminmetadata["ymin"]=yminmetadata["xmax"]=xmaxmetadata["ymax"]=ymaxmetadata.pop(self.page_content_column)# using WKT instead of str() to help GIS system interoperabilityyieldDocument(page_content=geom.wkt,metadata=metadata)