[docs]defserialize_f32(vector:List[float])->bytes:"""Serializes a list of floats into a compact "raw bytes" format Source: https://github.com/asg017/sqlite-vec/blob/21c5a14fc71c83f135f5b00c84115139fd12c492/examples/simple-python/demo.py#L8-L10 """returnstruct.pack("%sf"%len(vector),*vector)
[docs]classSQLiteVec(VectorStore):"""SQLite with Vec extension as a vector database. To use, you should have the ``sqlite-vec`` python package installed. Example: .. code-block:: python from langchain_community.vectorstores import SQLiteVec from langchain_community.embeddings.openai import OpenAIEmbeddings ... """
[docs]def__init__(self,table:str,connection:Optional[sqlite3.Connection],embedding:Embeddings,db_file:str="vec.db",):"""Initialize with sqlite client with vss extension."""try:importsqlite_vec# noqa # pylint: disable=unused-importexceptImportError:raiseImportError("Could not import sqlite-vec python package. ""Please install it with `pip install sqlite-vec`.")ifnotconnection:connection=self.create_connection(db_file)ifnotisinstance(embedding,Embeddings):warnings.warn("embeddings input must be Embeddings object.")self._connection=connectionself._table=tableself._embedding=embeddingself.create_table_if_not_exists()
[docs]defcreate_table_if_not_exists(self)->None:self._connection.execute(f""" CREATE TABLE IF NOT EXISTS {self._table} ( rowid INTEGER PRIMARY KEY AUTOINCREMENT, text TEXT, metadata BLOB, text_embedding BLOB ) ; """)self._connection.execute(f""" CREATE VIRTUAL TABLE IF NOT EXISTS {self._table}_vec USING vec0( rowid INTEGER PRIMARY KEY, text_embedding float[{self.get_dimensionality()}] ) ; """)self._connection.execute(f""" CREATE TRIGGER IF NOT EXISTS {self._table}_embed_text AFTER INSERT ON {self._table} BEGIN INSERT INTO {self._table}_vec(rowid, text_embedding) VALUES (new.rowid, new.text_embedding) ; END; """)self._connection.commit()
[docs]defadd_texts(self,texts:Iterable[str],metadatas:Optional[List[dict]]=None,**kwargs:Any,)->List[str]:"""Add more texts to the vectorstore index. Args: texts: Iterable of strings to add to the vectorstore. metadatas: Optional list of metadatas associated with the texts. kwargs: vectorstore specific parameters """max_id=self._connection.execute(f"SELECT max(rowid) as rowid FROM {self._table}").fetchone()["rowid"]ifmax_idisNone:# no text added yetmax_id=0embeds=self._embedding.embed_documents(list(texts))ifnotmetadatas:metadatas=[{}for_intexts]data_input=[(text,json.dumps(metadata),serialize_f32(embed))fortext,metadata,embedinzip(texts,metadatas,embeds)]self._connection.executemany(f"INSERT INTO {self._table}(text, metadata, text_embedding) VALUES (?,?,?)",data_input,)self._connection.commit()# pulling every ids we just insertedresults=self._connection.execute(f"SELECT rowid FROM {self._table} WHERE rowid > {max_id}")return[row["rowid"]forrowinresults]
[docs]defsimilarity_search_with_score_by_vector(self,embedding:List[float],k:int=4,**kwargs:Any)->List[Tuple[Document,float]]:sql_query=f""" SELECT text, metadata, distance FROM {self._table} AS e INNER JOIN {self._table}_vec AS v on v.rowid = e.rowid WHERE v.text_embedding MATCH ? AND k = ? ORDER BY distance """cursor=self._connection.cursor()cursor.execute(sql_query,[serialize_f32(embedding),k],)results=cursor.fetchall()documents=[]forrowinresults:metadata=json.loads(row["metadata"])or{}doc=Document(page_content=row["text"],metadata=metadata)documents.append((doc,row["distance"]))returndocuments
[docs]defsimilarity_search(self,query:str,k:int=4,**kwargs:Any)->List[Document]:"""Return docs most similar to query."""embedding=self._embedding.embed_query(query)documents=self.similarity_search_with_score_by_vector(embedding=embedding,k=k)return[docfordoc,_indocuments]
[docs]defsimilarity_search_with_score(self,query:str,k:int=4,**kwargs:Any)->List[Tuple[Document,float]]:"""Return docs most similar to query."""embedding=self._embedding.embed_query(query)documents=self.similarity_search_with_score_by_vector(embedding=embedding,k=k)returndocuments
[docs]@classmethoddeffrom_texts(cls:Type[SQLiteVec],texts:List[str],embedding:Embeddings,metadatas:Optional[List[dict]]=None,table:str="langchain",db_file:str="vec.db",**kwargs:Any,)->SQLiteVec:"""Return VectorStore initialized from texts and embeddings."""connection=cls.create_connection(db_file)vec=cls(table=table,connection=connection,db_file=db_file,embedding=embedding)vec.add_texts(texts=texts,metadatas=metadatas)returnvec
[docs]defget_dimensionality(self)->int:""" Function that does a dummy embedding to figure out how many dimensions this embedding function returns. Needed for the virtual table DDL. """dummy_text="This is a dummy text"dummy_embedding=self._embedding.embed_query(dummy_text)returnlen(dummy_embedding)