[docs]classMilvusCollectionHybridSearchRetriever(BaseRetriever):"""Hybrid search retriever that uses Milvus Collection to retrieve documents based on multiple fields. For more information, please refer to: https://milvus.io/docs/release_notes.md#Multi-Embedding---Hybrid-Search """collection:Collection"""Milvus Collection object."""rerank:BaseRanker"""Milvus ranker object. Such as WeightedRanker or RRFRanker."""anns_fields:List[str]"""The names of vector fields that are used for ANNS search."""field_embeddings:List[Union[Embeddings,BaseSparseEmbedding]]"""The embedding functions of each vector fields, which can be either Embeddings or BaseSparseEmbedding."""field_search_params:Optional[List[Dict]]=None"""The search parameters of each vector fields. If not specified, the default search parameters will be used."""field_limits:Optional[List[int]]=None"""Limit number of results for each ANNS field. If not specified, the default top_k will be used."""field_exprs:Optional[List[Optional[str]]]=None"""The boolean expression for filtering the search results."""top_k:int=4"""Final top-K number of documents to retrieve."""text_field:str="text""""The text field name, which will be used as the `page_content` of a `Document` object."""output_fields:Optional[List[str]]=None"""Final output fields of the documents. If not specified, all fields except the vector fields will be used as output fields, which will be the `metadata` of a `Document` object."""def__init__(self,**kwargs:Any):super().__init__(**kwargs)# If some parameters are not specified, set default valuesifself.field_search_paramsisNone:default_search_params={"metric_type":"L2","params":{"nprobe":10},}self.field_search_params=[default_search_params]*len(self.anns_fields)ifself.field_limitsisNone:self.field_limits=[self.top_k]*len(self.anns_fields)ifself.field_exprsisNone:self.field_exprs=[None]*len(self.anns_fields)# Check the fieldsself._validate_fields_num()self.output_fields=self._get_output_fields()self._validate_fields_name()# Load collectionself.collection.load()def_validate_fields_num(self)->None:assert(len(self.anns_fields)>=2),"At least two fields are required for hybrid search."lengths=[len(self.anns_fields)]ifself.field_limitsisnotNone:lengths.append(len(self.field_limits))ifself.field_exprsisnotNone:lengths.append(len(self.field_exprs))ifnotall(length==lengths[0]forlengthinlengths):raiseValueError("All field-related lists must have the same length.")iflen(self.field_search_params)!=len(self.anns_fields):# type: ignore[arg-type]raiseValueError("field_search_params must have the same length as anns_fields.")def_validate_fields_name(self)->None:collection_fields=[x.nameforxinself.collection.schema.fields]forfieldinself.anns_fields:assert(fieldincollection_fields),f"{field} is not a valid field in the collection."assert(self.text_fieldincollection_fields),f"{self.text_field} is not a valid field in the collection."forfieldinself.output_fields:# type: ignore[union-attr]ifnotself.collection.schema.enable_dynamic_field:assert(fieldincollection_fields),f"{field} is not a valid field in the collection."def_get_output_fields(self)->List[str]:ifself.output_fields:returnself.output_fieldsoutput_fields=[x.nameforxinself.collection.schema.fields]forfieldinself.anns_fields:iffieldinoutput_fields:output_fields.remove(field)ifself.text_fieldnotinoutput_fields:output_fields.append(self.text_field)returnoutput_fieldsdef_build_ann_search_requests(self,query:str)->List[AnnSearchRequest]:search_requests=[]forann_field,embedding,param,limit,exprinzip(self.anns_fields,self.field_embeddings,self.field_search_params,# type: ignore[arg-type]self.field_limits,# type: ignore[arg-type]self.field_exprs,# type: ignore[arg-type]):request=AnnSearchRequest(data=[embedding.embed_query(query)],anns_field=ann_field,param=param,limit=limit,expr=expr,)search_requests.append(request)returnsearch_requestsdef_parse_document(self,data:dict)->Document:returnDocument(page_content=data.pop(self.text_field),metadata=data,)def_process_search_result(self,search_results:List[SearchResult])->List[Document]:documents=[]forresultinsearch_results[0]:data={x:result.entity.get(x)forxinself.output_fields}# type: ignore[union-attr]doc=self._parse_document(data)documents.append(doc)returndocuments