PebbloLoaderAPIWrapper#

class langchain_community.utilities.pebblo.PebbloLoaderAPIWrapper[source]#

Bases: BaseModel

Wrapper for Pebblo Loader API.

Validate that api key in environment.

param anonymize_snippets: bool = False#: Whether to anonymize snippets going into VectorDB and the generated reports

param api_key: str | None [Required]#: API key for Pebblo Cloud

param classifier_location: str = 'local'#: Location of the classifier, local or cloud. Defaults to ‘local’

param classifier_url: str | None [Required]#: URL of the Pebblo Classifier

param cloud_url: str | None [Required]#: URL of the Pebblo Cloud

static make_request( method: str, url: str, headers: dict, payload: dict | None = None, timeout: int = 20, ) → Response | None[source]#

Make a request to the Pebblo API

Parameters:

method (str) – HTTP method (GET, POST, PUT, DELETE, etc.).
url (str) – URL for the request.
headers (dict) – Headers for the request.
payload (Optional[dict]) – Payload for the request (for POST, PUT, etc.).
timeout (int) – Timeout for the request in seconds.

Returns:

Response object if the request is successful.

Return type:

Optional[Response]

static prepare_docs_for_classification( docs_with_id: List[IndexedDocument], source_path: str, loader_details: dict, ) → Tuple[List[dict], int][source]#

Prepare documents for classification.

Parameters:

docs_with_id (List[IndexedDocument]) – List of documents to be classified.
source_path (str) – Source path of the documents.
loader_details (dict) – Contains loader info.

Returns:

Documents and the aggregate size of the source.

Return type:

Tuple[List[dict], int]

static update_doc_data( docs: List[dict], classified_docs: dict, ) → None[source]#

Update the document data with classified information.

Parameters:

docs (List[dict]) – List of document data to be updated.
classified_docs (dict) – The dictionary containing classified documents.

Return type:

None

build_classification_payload( app: App, docs: List[dict], loader_details: dict, source_owner: str, source_aggregate_size: int, loading_end: bool, ) → dict[source]#

Build the payload for document classification.

Parameters:

app (App) – App instance.
docs (List[dict]) – List of documents to be classified.
loader_details (dict) – Loader details.
source_owner (str) – Owner of the source.
source_aggregate_size (int) – Aggregate size of the source.
loading_end (bool) – Boolean indicating the halt of data loading by loader.

Returns:

Payload for document classification.

Return type:

dict

classify_documents( docs_with_id: List[IndexedDocument], app: App, loader_details: dict, loading_end: bool = False, ) → dict[source]#

Send documents to Pebblo server for classification. Then send classified documents to Daxa cloud(If api_key is present).

Parameters:

docs_with_id (List[IndexedDocument]) – List of documents to be classified.
app (App) – App instance.
loader_details (dict) – Loader details.
loading_end (bool) – Boolean, indicating the halt of data loading by loader.

Return type:

dict

send_docs_to_pebblo_cloud( payload: dict, ) → None[source]#

Send documents to Pebblo cloud.

Parameters:: payload (dict) – The payload containing documents to be sent.
Return type:: None

send_loader_discover( app: App, ) → None[source]#

Send app discovery request to Pebblo server & cloud.

Parameters:: app (App) – App instance to be discovered.
Return type:: None