Coherence
This notebook covers how to get started with the Coherence
vector store.
Coherence is an in-memory data grid that provides a distributed, fault-tolerant, and scalable platform for managing and accessing data. It is primarily used for high-performance, mission-critical enterprise applications that require low-latency access to large datasets. In addition to the commercially available product, Oracle also offers Coherence CE (Community Edition)
Setup
To access Coherence
vector stores you'll need to install the langchain-coherence
integration package.
pip install langchain_coherence
Usage
Before using LangChain's CoherenceVectorStore you must ensure that a Coherence server (Coherence CE 25.03+ or Oracle Coherence 14.1.2+) is running
For local development, we recommend using the Coherence CE container image:
docker run -d -p 1408:1408 ghcr.io/oracle/coherence-ce:25.03.2
Add Documents and retrieve them:
from langchain_core.documents import Document
from langchain_core.embeddings import Embeddings
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from coherence import NamedMap, Session
from langchain_core.vectorstores.coherence_store import CoherenceVectorStore
session: Session = await Session.create()
try:
named_map: NamedMap[str, Document] = await session.get_map("my-map")
embedding :Embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-l6-v2")
# this embedding generates vectors of dimension 384
cvs :CoherenceVectorStore = await CoherenceVectorStore.create(
named_map,embedding,384)
d1 :Document = Document(id="1", page_content="apple")
d2 :Document = Document(id="2", page_content="orange")
documents = [d1, d2]
await cvs.aadd_documents(documents)
ids = [doc.id for doc in documents]
l = await cvs.aget_by_ids(ids)
assert len(l) == len(ids)
print("====")
for e in l:
print(e)
finally:
await session.close()
Delete Documents:
from langchain_core.documents import Document
from langchain_core.embeddings import Embeddings
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from coherence import NamedMap, Session
from langchain_core.vectorstores.coherence_store import CoherenceVectorStore
session: Session = await Session.create()
try:
named_map: NamedMap[str, Document] = await session.get_map("my-map")
embedding :Embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-l6-v2")
# this embedding generates vectors of dimension 384
cvs :CoherenceVectorStore = await CoherenceVectorStore.create(
named_map,embedding,384)
d1 :Document = Document(id="1", page_content="apple")
d2 :Document = Document(id="2", page_content="orange")
documents = [d1, d2]
await cvs.aadd_documents(documents)
ids = [doc.id for doc in documents]
await cvs.adelete(ids)
finally:
await session.close()
Similarity Search:
from langchain_core.documents import Document
from langchain_core.embeddings import Embeddings
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from coherence import NamedMap, Session
from langchain_core.vectorstores.coherence_store import CoherenceVectorStore
def test_data():
d1 :Document = Document(id="1", page_content="apple")
d2 :Document = Document(id="2", page_content="orange")
d3 :Document = Document(id="3", page_content="tiger")
d4 :Document = Document(id="4", page_content="cat")
d5 :Document = Document(id="5", page_content="dog")
d6 :Document = Document(id="6", page_content="fox")
d7 :Document = Document(id="7", page_content="pear")
d8 :Document = Document(id="8", page_content="banana")
d9 :Document = Document(id="9", page_content="plum")
d10 :Document = Document(id="10", page_content="lion")
documents = [d1, d2, d3, d4, d5, d6, d7, d8, d9, d10]
return documents
async def test_asimilarity_search():
documents = test_data()
session: Session = await Session.create()
try:
named_map: NamedMap[str, Document] = await session.get_map("my-map")
embedding :Embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-l6-v2")
# this embedding generates vectors of dimension 384
cvs :CoherenceVectorStore = await CoherenceVectorStore.create(
named_map,embedding,384)
await cvs.aadd_documents(documents)
ids = [doc.id for doc in documents]
l = await cvs.aget_by_ids(ids)
assert len(l) == 10
result = await cvs.asimilarity_search("fruit")
assert len(result) == 4
print("====")
for e in result:
print(e)
finally:
await session.close()
Similarity Search by vector :
from langchain_core.documents import Document
from langchain_core.embeddings import Embeddings
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from coherence import NamedMap, Session
from langchain_core.vectorstores.coherence_store import CoherenceVectorStore
def test_data():
d1 :Document = Document(id="1", page_content="apple")
d2 :Document = Document(id="2", page_content="orange")
d3 :Document = Document(id="3", page_content="tiger")
d4 :Document = Document(id="4", page_content="cat")
d5 :Document = Document(id="5", page_content="dog")
d6 :Document = Document(id="6", page_content="fox")
d7 :Document = Document(id="7", page_content="pear")
d8 :Document = Document(id="8", page_content="banana")
d9 :Document = Document(id="9", page_content="plum")
d10 :Document = Document(id="10", page_content="lion")
documents = [d1, d2, d3, d4, d5, d6, d7, d8, d9, d10]
return documents
async def test_asimilarity_search_by_vector():
documents = test_data()
session: Session = await Session.create()
try:
named_map: NamedMap[str, Document] = await session.get_map("my-map")
embedding :Embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-l6-v2")
# this embedding generates vectors of dimension 384
cvs :CoherenceVectorStore = await CoherenceVectorStore.create(
named_map,embedding,384)
await cvs.aadd_documents(documents)
ids = [doc.id for doc in documents]
l = await cvs.aget_by_ids(ids)
assert len(l) == 10
vector = cvs.embeddings.embed_query("fruit")
result = await cvs.asimilarity_search_by_vector(vector)
assert len(result) == 4
print("====")
for e in result:
print(e)
finally:
await session.close()
Related
- Vector store conceptual guide
- Vector store how-to guides