Quantcast
Channel: Weaviate Community Forum - Latest posts
Viewing all articles
Browse latest Browse all 3605

Multimodal search with Bring your own vector

$
0
0

Hi, we are trying to create a schema which will support multimodal search where user can use text queries but needs to do semantic search across columns containing text or vectors.

Below is the schema where image_embeddings is a Bring your own vector column where we will generate the embeddings for a imageand dont want weaviate to create vectors, but this needs to be part of multimodal search with other fields like filename, tags, mime_type. Please provide the correct way to define schema for this multimodal search with Bring your own vector?

client.collections.create(
    name="SemanticSchema",  # The name of the collection ('NV' for named vectors)
    properties=[
        wc.Property(name="lcid", data_type=wc.DataType.TEXT),
        wc.Property(name="checksum", data_type=wc.DataType.TEXT),
        wc.Property(name="filename", data_type=wc.DataType.TEXT),
        wc.Property(name="tags", data_type=wc.DataType.TEXT),
        wc.Property(name="mime_type", data_type=wc.DataType.TEXT),
        wc.Property(name="person_names", data_type=wc.DataType.TEXT_ARRAY),
        wc.Property(name="location", data_type=wc.DataType.TEXT),
        wc.Property(name="image_embeddings", data_type=wc.DataType.NUMBER_ARRAY),
    ],
    # Define & configure the vectorizer module
    vectorizer_config=[

        wc.Configure.NamedVectors.multi2vec_clip(
            name="filename", text_fields=["filename"]
        ),

        wc.Configure.NamedVectors.multi2vec_clip(
            name="tags", text_fields=["tags"]
        ),

        wc.Configure.NamedVectors.multi2vec_clip(
            name="mime_type", text_fields=["mime_type"]
        ),

        wc.Configure.NamedVectors.multi2vec_clip(
            name="location", text_fields=["location"]
        ),
 
        wc.Configure.NamedVectors.multi2vec_clip(
            name="image_filename_tags",
            image_fields=[
                wc.Multi2VecField(name="image_embeddings")
            ],  # 90% of the vector is from the poster
            text_fields=[
                wc.Multi2VecField(name="filename"),
                wc.Multi2VecField(name="tags"),
                wc.Multi2VecField(name="mime_type"),
                wc.Multi2VecField(name="location"),
            ],  # 10% of the vector is from the title
        ),
    ],
    # Define the generative module
    #generative_config=wc.Configure.Generative.openai(),

    # Add sharding configuration
    sharding_config=Configure.sharding(
       virtual_per_physical=128,
       desired_count=2,
       desired_virtual_count=128,
    ),
    replication_config=Configure.replication(
        factor=2,
        async_enabled=True,
    ),
)

Viewing all articles
Browse latest Browse all 3605

Trending Articles