
    BThb                        d dl mZ d dlZd dlZd dlZd dlmZmZmZm	Z	m
Z
mZmZ d dlmZ d dlmZ d dlmZ d dlmZmZ d dlmZmZ d d	lmZ d d
lmZ er	  ej:                  e      Z ede      Z  G d de      Z!y)    )annotationsN)TYPE_CHECKINGAnyIterableListOptionalTupleTypeVar)Document)batch_iterate)VectorStore)SparseValuesVector)DistanceStrategy!sparse_maximal_marginal_relevance)PineconeSparseEmbeddings)PineconeVectorStoreVST)boundc                      e Zd ZdZddddej
                  fddd	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZedd       Z	 	 	 	 	 ddd	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddZ		 	 	 	 	 ddd	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd	Z
	 	 	 d	 	 	 	 	 	 	 	 	 ddZ	 	 	 d	 	 	 	 	 	 	 	 	 ddZd
ddd	 	 	 	 	 	 	 	 	 d dZd
ddd	 	 	 	 	 	 	 	 	 d dZ	 	 	 d	 	 	 	 	 	 	 	 	 	 	 d!dZ	 	 	 d	 	 	 	 	 	 	 	 	 	 	 d"dZ	 	 	 	 	 d#	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d$dZ	 	 	 	 	 d#	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d$dZ	 	 	 	 	 d#	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d%dZ	 	 	 	 	 d#	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d&dZ	 	 	 	 d'	 	 	 	 	 	 	 	 	 	 	 d(dZ	 	 	 	 d'	 	 	 	 	 	 	 	 	 	 	 d(dZ xZS ))PineconeSparseVectorStoreaa  Pinecone sparse vector store integration.

    This class extends PineconeVectorStore to support sparse vector representations.
    It requires a Pinecone sparse index and PineconeSparseEmbeddings.

    Setup:
        ```python
        # Install required packages
        pip install langchain-pinecone pinecone-client
        ```

    Key init args - indexing params:
        text_key (str): The metadata key where the document text will be stored.
        namespace (str): Pinecone namespace to use.
        distance_strategy (DistanceStrategy): Strategy for computing distances.

    Key init args - client params:
        index (pinecone.Index): A Pinecone sparse index.
        embedding (PineconeSparseEmbeddings): A sparse embeddings model.
        pinecone_api_key (str): The Pinecone API key.
        index_name (str): The name of the Pinecone index.

    See full list of supported init args and their descriptions in the params section.

    Instantiate:
        ```python
        from pinecone import Pinecone
        from langchain_pinecone import PineconeSparseVectorStore
        from langchain_pinecone.embeddings import PineconeSparseEmbeddings

        # Initialize Pinecone client
        pc = Pinecone(api_key="your-api-key")

        # Get your sparse index
        index = pc.Index("your-sparse-index-name")

        # Initialize embedding function
        embeddings = PineconeSparseEmbeddings()

        # Create vector store
        vectorstore = PineconeSparseVectorStore(
            index=index,
            embedding=embeddings,
            text_key="content",
            namespace="my-namespace"
        )
        ```

    Add Documents:
        ```python
        from langchain_core.documents import Document

        docs = [
            Document(page_content="This is a sparse vector example"),
            Document(page_content="Another document for testing")
        ]

        # Option 1: Add from Document objects
        vectorstore.add_documents(docs)

        # Option 2: Add from texts
        texts = ["Text 1", "Text 2"]
        metadatas = [{"source": "source1"}, {"source": "source2"}]
        vectorstore.add_texts(texts, metadatas=metadatas)
        ```

    Update Documents:
        Update documents by re-adding them with the same IDs.
        ```python
        ids = ["id1", "id2"]
        texts = ["Updated text 1", "Updated text 2"]
        metadatas = [{"source": "updated_source1"}, {"source": "updated_source2"}]

        vectorstore.add_texts(texts, metadatas=metadatas, ids=ids)
        ```

    Delete Documents:
        ```python
        # Delete by IDs
        vectorstore.delete(ids=["id1", "id2"])

        # Delete by filter
        vectorstore.delete(filter={"source": "source1"})

        # Delete all documents in a namespace
        vectorstore.delete(delete_all=True, namespace="my-namespace")
        ```

    Search:
        ```python
        # Search for similar documents
        docs = vectorstore.similarity_search("query text", k=5)

        # Search with filters
        docs = vectorstore.similarity_search(
            "query text",
            k=5,
            filter={"source": "source1"}
        )

        # Maximal marginal relevance search for diversity
        docs = vectorstore.max_marginal_relevance_search(
            "query text",
            k=5,
            fetch_k=20,
            lambda_mult=0.5
        )
        ```

    Search with score:
        ```python
        # Search with relevance scores
        docs_and_scores = vectorstore.similarity_search_with_score(
            "query text",
            k=5
        )

        for doc, score in docs_and_scores:
            print(f"Score: {score}, Document: {doc.page_content}")
        ```

    Use as Retriever:
        ```python
        # Create a retriever
        retriever = vectorstore.as_retriever()

        # Customize retriever
        retriever = vectorstore.as_retriever(
            search_type="mmr",
            search_kwargs={"k": 5, "fetch_k": 20, "lambda_mult": 0.5},
            filter={"source": "source1"}
        )

        # Use the retriever
        docs = retriever.get_relevant_documents("query text")
        ```
    Ntextpinecone_api_key
index_namec          	     x    |r!|j                         d   dk7  rt        d      t        |   |||||||       y )Nvector_typesparsez>PineconeSparseVectorStore can only be used with Sparse Indexesr   )describe_index_stats
ValueErrorsuper__init__)	selfindex	embeddingtext_key	namespacedistance_strategyr   r   	__class__s	           x/var/www/catia.catastroantioquia-mas.com/valormas/lib/python3.12/site-packages/langchain_pinecone/vectorstores_sparse.pyr"   z"PineconeSparseVectorStore.__init__   sV     U//1-@HLP  	-! 	 	
    c                    | j                   st        d      t        | j                   t              st        d      | j                   S )NzHMust provide a PineconeSparseEmbeddings to the PineconeSparseVectorStorezHPineconeSparseVectorStore can only be used with PineconeSparseEmbeddings)
_embeddingr    
isinstancer   )r#   s    r*   
embeddingsz$PineconeSparseVectorStore.embeddings   sF    Z  $//+CDZ  r+   )	id_prefixc                  || j                   }t        |      }|xs+ |D 	cg c]  }	t        t        j                               ! c}	}|r|D 
cg c]  }
|dz   |
vr|dz   |
z   n|
 }}
|xs |D 	cg c]  }	i  c}	}t        ||      D ]  \  }}||| j                  <    t        dt        |      |      D ]  }||||z    }||||z    }||||z    }| j                  j                  |      }t        |||      D cg c]  \  }}}t        |||       }}}} | j                  j                  d||d|  |S c c}	w c c}
w c c}	w c c}}}w )N#r   idsparse_valuesmetadatavectorsr'    )
_namespaceliststruuiduuid4zip	_text_keyrangelenr/   embed_documentsr   r$   upsert)r#   texts	metadatasidsr'   
batch_sizeembedding_chunk_sizer0   kwargs_r4   r6   r   ichunk_texts	chunk_idschunk_metadatasr/   chunk_idvaluer8   s                        r*   	add_textsz#PineconeSparseVectorStore.add_texts   s    IU77Ac$**,'7TWNP	Cr(A	C"$rIC  4e!4"!4	!)U3 	,NHd'+HT^^$	,
 q#e*&:; 	AA(<$<=KA$8 89I'A0D,DEO88EJ 47z?4 /Xuh (%(KG  DJJ # 	  
7 8 "5s   $D=E1	E=E
c                 K   || j                   }t        |      }|xs+ |D 	cg c]  }	t        t        j                               ! c}	}|r|D 
cg c]  }
|dz   |
vr|dz   |
z   n|
 }}
|xs |D 	cg c]  }	i  c}	}t        ||      D ]  \  }}||| j                  <    t        dt        |      |      D ]  }||||z    }||||z    }||||z    }| j                  j                  |       d{   }t        |||      }| j                  4 d{   }g }t        ||      D ]J  } |j                  d|D cg c]  \  }}}t        |||       c}}}|d|}|j                  |       L t!        j"                  |  d{    ddd      d{     |S c c}	w c c}
w c c}	w 7 7 c c}}}w 7 27 $# 1 d{  7  sw Y   xY ww)a  Asynchronously run more texts through the embeddings and add to the vectorstore.

        Upsert optimization is done by chunking the embeddings and upserting them.
        This is done to avoid memory issues and optimize using HTTP based embeddings.
        For OpenAI embeddings, use pool_threads>4 when constructing the pinecone.Index,
        embedding_chunk_size>1000 and batch_size~64 for best performance.
        Args:
            texts: Iterable of strings to add to the vectorstore.
            metadatas: Optional list of metadatas associated with the texts.
            ids: Optional list of ids to associate with the texts.
            namespace: Optional pinecone namespace to add the texts to.
            batch_size: Batch size to use when adding the texts to the vectorstore.
            embedding_chunk_size: Chunk size to use when embedding the texts.
            id_prefix: Optional string to use as an ID prefix when upserting vectors.

        Returns:
            List of ids from adding the texts into the vectorstore.

        Nr2   r   r3   r7   r9   )r:   r;   r<   r=   r>   r?   r@   rA   rB   r/   aembed_documentsasync_indexr   rD   r   appendasynciogather)r#   rE   rF   rG   r'   rH   rI   r0   rJ   rK   r4   r6   r   rL   rM   rN   rO   r/   vector_tuplesidxtasksbatch_vector_tuplesrP   r5   tasks                            r*   
aadd_textsz$PineconeSparseVectorStore.aadd_texts  s.    > IU77Ac$**,'7TWNP	Cr(A	C"$rIC  4e!4"!4	!)U3 	,NHd'+HT^^$	, q#e*&:; 	-AA(<$<=KA$8 89I'A0D,DEO#??LLJ	:GM'' - -3+8]+S ''%3::  FY! ! !B- ##+.;)1! #, !D LL&'  nne,,,'- - -	-8 
M 8 "5 M-
! -'- - - -s   "G$F!	GF&)
G3	F+<A5G1F02!GF2G%F?<F43F?F;	F?GF=G2G4F?=G?G	GG	G   c                ^    | j                  | j                  j                  |      |||      S )a  Return pinecone documents most similar to query, along with scores.

        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            filter: Dictionary of argument(s) to filter on metadata
            namespace: Namespace to search in. Default will search in '' namespace.

        Returns:
            List of Documents most similar to the query and score for each
        kfilterr'   )&similarity_search_by_vector_with_scorer/   embed_queryr#   queryrb   rc   r'   s        r*   similarity_search_with_scorez6PineconeSparseVectorStore.similarity_search_with_scoreL  s4    $ ::OO''.!Fi ; 
 	
r+   c                   K   | j                  | j                  j                  |       d{   |||       d{   S 7 7 w)a  Asynchronously return pinecone documents most similar to query, along with scores.

        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            filter: Dictionary of argument(s) to filter on metadata
            namespace: Namespace to search in. Default will search in '' namespace.

        Returns:
            List of Documents most similar to the query and score for each
        Nra   )'asimilarity_search_by_vector_with_scorer/   aembed_queryrf   s        r*   asimilarity_search_with_scorez7PineconeSparseVectorStore.asimilarity_search_with_scoreb  sM     $ AA??//66	 B 
 
 	
6
s   *AAAAAAra   c                  || j                   }g }| j                  j                  ||d||      }|d   D ]  }|d   }|j                  d      }	| j                  |v r@|j                  | j                        }
|d   }|j                  t        |	|
|      |f       gt        j                  d| j                   d	        |S )
zGReturn pinecone documents most similar to embedding, along with scores.Tsparse_vectortop_kinclude_metadatar'   rc   matchesr6   r4   scorer4   page_contentr6   Found document with no `` key. Skipping.)
r:   r$   rg   getr@   poprV   r   loggerwarning)r#   r%   rb   rc   r'   docsresultsresr6   r4   r   rs   s               r*   rd   z@PineconeSparseVectorStore.similarity_search_by_vector_with_score{  s     I**""#! # 
 9% 	C:HB~~)||DNN3G$JER .t~~.>>NO	 r+   c                 K   || j                   }g }| j                  4 d{   }|j                  ||d||       d{   }ddd      d{    d   D ]  }|d   }	|j                  d      }
| j                  |	v r@|	j                  | j                        }|d   }|j                  t        |
||	      |f       gt        j                  d	| j                   d
        |S 7 7 7 # 1 d{  7  sw Y   xY ww)zVReturn pinecone documents most similar to embedding, along with scores asynchronously.NTrn   rr   r6   r4   rs   rt   rv   rw   )
r:   rU   rg   rx   r@   ry   rV   r   rz   r{   )r#   r%   rb   rc   r'   r|   rZ   r}   r~   r6   r4   r   rs   s                r*   rj   zAPineconeSparseVectorStore.asimilarity_search_by_vector_with_score  s     I## 	 	sII'!%# &  G	 	 9% 	C:HB~~)||DNN3G$JER .t~~.>>NO	 -		 	 	 	sW   !DC)DC/C+C/DC-BD+C/-D/D5C86D=Dc                f     | j                   |f|||d|}|D cg c]  \  }}|	 c}}S c c}}w )a  Return pinecone documents most similar to query.

        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            filter: Dictionary of argument(s) to filter on metadata
            namespace: Namespace to search in. Default will search in '' namespace.

        Returns:
            List of Documents most similar to the query and score for each
        ra   )rh   	r#   rg   rb   rc   r'   rJ   docs_and_scoresdocrK   s	            r*   similarity_searchz+PineconeSparseVectorStore.similarity_search  sG    & <$;;
v
>D
 #22Q222s   -c                   K    | j                   |f|||d| d {   }|D cg c]  \  }}|	 c}}S 7 c c}}w w)Nra   )rl   r   s	            r*   asimilarity_searchz,PineconeSparseVectorStore.asimilarity_search  sX      !C B B!
v!
>D!
 
 #22Q22
 3s   ?7	?9??c           
        || j                   }| j                  j                  ||dd||      }t        ||d   D 	cg c]  }	t	        j
                  |	d          c}	||      }
|
D cg c]  }|d   |   d    }}|D cg c](  }t        |j                  | j                        |      * c}S c c}	w c c}w c c}w )a  Return docs selected using the maximal marginal relevance.

        Maximal marginal relevance optimizes for similarity to query AND diversity
        among selected documents.

        Args:
            embedding: Embedding to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            fetch_k: Number of Documents to fetch to pass to MMR algorithm.
            lambda_mult: Number between 0 and 1 that determines the degree
                        of diversity among the results with 0 corresponding
                        to maximum diversity and 1 to minimum diversity.
                        Defaults to 0.5.
            filter: Dictionary of argument(s) to filter on metadata
            namespace: Namespace to search in. Default will search in '' namespace.

        Returns:
            List of Documents selected by maximal marginal relevance.
        Tro   rp   include_valuesrq   r'   rc   rr   r5   query_embeddingembedding_listrb   lambda_multr6   ru   r6   )	r:   r$   rg   r   r   	from_dictr   ry   r@   )r#   r%   rb   fetch_kr   rc   r'   rJ   r}   itemmmr_selectedrL   selectedr6   s                 r*   'max_marginal_relevance_search_by_vectorzAPineconeSparseVectorStore.max_marginal_relevance_search_by_vector  s    : I**""#! # 
 9% $I. &&tO'<= #
 @LL!GI&q)*5LL %
 (,,"@8T
 	
 M
s   B2
*B7-B<c           
       K   || j                   }| j                  4 d{   }|j                  ||dd||       d{   }	ddd      d{    t        |	d   D 
cg c]  }
t	        j
                  |
d          c}
||      }|D cg c]  }|	d   |   d    }}|D cg c](  }t        |j                  | j                        |      * c}S 7 7 7 # 1 d{  7  sw Y   xY wc c}
w c c}w c c}w w)	a  Return docs selected using the maximal marginal relevance asynchronously.

        Maximal marginal relevance optimizes for similarity to query AND diversity
        among selected documents.

        Args:
            embedding: Embedding to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            fetch_k: Number of Documents to fetch to pass to MMR algorithm.
            lambda_mult: Number between 0 and 1 that determines the degree
                        of diversity among the results with 0 corresponding
                        to maximum diversity and 1 to minimum diversity.
                        Defaults to 0.5.
            filter: Dictionary of argument(s) to filter on metadata
            namespace: Namespace to search in. Default will search in '' namespace.

        Returns:
            List of Documents selected by maximal marginal relevance.
        NTr   rr   r5   r   r6   r   )	r:   rU   rg   r   r   r   r   ry   r@   )r#   r%   rb   r   r   rc   r'   rJ   rZ   r}   r   r   rL   r   r6   s                  r*   (amax_marginal_relevance_search_by_vectorzBPineconeSparseVectorStore.amax_marginal_relevance_search_by_vector  s,    : I## 	 	sII'#!%# &  G	 	 9% $I. &&tO'<= #
 @LL!GI&q)*5LL %
 (,,t~~">R
 	
)		 	 	 	 M
sz   DCDC CCDCD"C2
DC7!D'-C<DCDC/#C&$C/+Dc                d    | j                   j                  |      }| j                  ||||||      S )a  Return docs selected using the maximal marginal relevance.

        Maximal marginal relevance optimizes for similarity to query AND diversity
        among selected documents.

        Args:
            query: Text to look up documents similar to.
            k: Number of Documents to return. Defaults to 4.
            fetch_k: Number of Documents to fetch to pass to MMR algorithm.
            lambda_mult: Number between 0 and 1 that determines the degree
                        of diversity among the results with 0 corresponding
                        to maximum diversity and 1 to minimum diversity.
                        Defaults to 0.5.
            filter: Dictionary of argument(s) to filter on metadata
            namespace: Namespace to search in. Default will search in '' namespace.

        Returns:
            List of Documents selected by maximal marginal relevance.
        )r/   re   r   	r#   rg   rb   r   r   rc   r'   rJ   r%   s	            r*   max_marginal_relevance_searchz7PineconeSparseVectorStore.max_marginal_relevance_searchW  s8    : OO//6	;;q';	
 	
r+   c                   K   | j                   j                  |       d {   }| j                  ||||||       d {   S 7 #7 w)N)rb   r   r   rc   r'   )r/   rk   r   r   s	            r*   amax_marginal_relevance_searchz8PineconeSparseVectorStore.amax_marginal_relevance_searchy  sZ      //66u==	BB# C 
 
 	
 >
s!   A	AA	 AA	A	c                J   || j                   }|r  | j                  j                  d	d|d| y|Ed}t        dt	        |      |      D ])  }||||z    } | j                  j                  d	||d| + y|  | j                  j                  d	||d| yt        d      )
a=  Delete by vector IDs or filter.
        Args:
            ids: List of ids to delete.
            delete_all: Whether delete all vectors in the index.
            filter: Dictionary of conditions to filter vectors to delete.
            namespace: Namespace to search in. Default will search in '' namespace.
        NT
delete_allr'     r   rG   r'   rc   r'   3Either ids, delete_all, or filter must be provided.r9   )r:   r$   deleterA   rB   r    )	r#   rG   r   r'   rc   rJ   
chunk_sizerL   chunks	            r*   r   z PineconeSparseVectorStore.delete  s      IDJJMMfM  _J1c#h
3 LAJ/!

!!KeyKFKL  DJJKVyKFK  RSSr+   c           
       K   || j                   }|rC| j                  4 d {   } |j                  dd|d| d {    d d d       d {    y |d}| j                  4 d {   }g }t        dt	        |      |      D ].  }	||	|	|z    }
|j                   |j                  d|
|d|       0 t        j                  |  d {    d d d       d {    y |C| j                  4 d {   } |j                  d||d| d {    d d d       d {    y t        d      7 7 7 # 1 d {  7  sw Y   y xY w7 7 7 u# 1 d {  7  sw Y   y xY w7 t7 Y7 K# 1 d {  7  sw Y   y xY ww)	NTr   r   r   r   r   r   r9   )	r:   rU   r   rA   rB   rV   rW   rX   r    )r#   rG   r   r'   rc   rJ   rZ   r   r[   rL   r   s              r*   adeletez!PineconeSparseVectorStore.adelete  s     I'' Q Q3 cjjPDIPPPPQ Q   _J'' - -3q#c(J7 WAA
N3ELL!U!Uf!UVW nne,,,- -  '' O O3 cjjN)NvNNNO O
  RSSQPQ Q Q Q  -
 -- - - - ONO O O O
 s   !FD3FD: D6D:FD8F*E+F.AEEEFEF5E*6F9E0E,E0F"E.#F6D:8F:E EEFEFE'EE'#F,E0.F0F6E97F>F)r$   zOptional[Any]r%   z"Optional[PineconeSparseEmbeddings]r&   Optional[str]r'   r   r(   zOptional[DistanceStrategy]r   r   r   r   )returnr   )NNN    r   )rE   Iterable[str]rF   Optional[List[dict]]rG   Optional[List[str]]r'   r   rH   intrI   r   r0   r   rJ   r   r   z	List[str])rE   r   rF   r   rG   r   r'   r   rH   r   rI   r   r0   r   rJ   r   r   z	list[str])r_   NN)
rg   r<   rb   r   rc   Optional[dict]r'   r   r   List[Tuple[Document, float]])
rg   r<   rb   r   rc   r   r'   r   r   zlist[tuple[Document, float]])
r%   r   rb   r   rc   r   r'   r   r   r   )rg   r<   rb   r   rc   r   r'   r   rJ   r   r   List[Document])rg   r<   rb   r   rc   r   r'   r   rJ   r   r   list[Document])r_      g      ?NN)r%   r   rb   r   r   r   r   floatrc   r   r'   r   rJ   r   r   r   )rg   r<   rb   r   r   r   r   r   rc   r   r'   r   rJ   r   r   r   )rg   r<   rb   r   r   r   r   r   rc   r   r'   r   rJ   r   r   r   )NNNN)rG   r   r   zOptional[bool]r'   r   rc   r   rJ   r   r   None)__name__
__module____qualname____doc__r   COSINEr"   propertyr/   rR   r^   rh   rl   rd   rj   r   r   r   r   r   r   r   r   __classcell__)r)   s   @r*   r   r   $   sL   HX  $8<"(#'8H8O8O
 +/$(

 6
  	

 !
 6
 (
 "
2 	 	 +/#'#'$(+ $(++ (+ !	+
 !+ + "+ !+ + 
+` +/#'#'$(I $(II (I !	I
 !I I "I !I I 
I\ !%#'

 
 	

 !
 
&
2 !%#'

 
 	

 !
 
&
: !%#'!! 	!
 ! !! 
&!N !%#'## 	#
 # !# 
&#P !%#'33 3 	3
 !3 3 
36 !%#'33 3 	3
 !3 3 
3   !%#'4
4
 4
 	4

 4
 4
 !4
 4
 
4
r  !%#'7
7
 7
 	7

 7
 7
 !7
 7
 
7
x  !%#' 
 
  
 	 

  
  
 ! 
  
 
 
J  !%#'

 
 	

 
 
 !
 
 

, $(%)#'!%  # !	
   
F $(%)#'!%  # !	
   
r+   r   )"
__future__r   rW   loggingr=   typingr   r   r   r   r   r	   r
   langchain_core.documentsr   langchain_core.utilsr   langchain_core.vectorstoresr   pineconer   r   langchain_pinecone._utilitiesr   r   langchain_pinecone.embeddingsr   langchain_pinecone.vectorstoresr   	getLoggerr   rz   r   r   r9   r+   r*   <module>r      so    "      . . 3 ) C ?			8	$e;'f
 3 f
r+   