Quantcast
Channel: Weaviate Community Forum - Latest posts
Viewing all articles
Browse latest Browse all 3604

Error : text too long for vectorization

$
0
0

Description

We are moving from v3 to v4. Now, when I use batch upload or multi-insert, I get an error message saying the text is too long for vectorization.

Can anyone help me regarding this?

Below is my code.

def vectorize_tag_page_data(texts, class_name, layer_name):

# Append layer suffix to class name
class_name = f"{class_name}01pagedata"

# Load configuration from environment variables
weaviate_url = os.getenv(f"URL")
weaviate_auth_key = os.getenv(f"AUTH_KEY")
openai_key = os.getenv("OPENAI_API_KEY")

if not weaviate_url or not weaviate_auth_key or not openai_key:
    raise EnvironmentError("One or more required environment variables are missing")

# Prepare data objects for insertion
data_objs = [{"text": texts[key], "metadata": key} for key in texts]
total = len(data_objs)

print(f"\n{total} data objects prepared for insertion.\n")
print(f"Layer URL: {weaviate_url}")

# Initialize client with authentication
client = initialize_weaviate_client(weaviate_url, weaviate_auth_key, openai_key)

# Create collection in Weaviate
try:
    response = client.collections.create(
        name=class_name,
        vectorizer_config=wvc.config.Configure.Vectorizer.text2vec_openai(),
        properties=[
            wvc.config.Property(name="text", data_type=wvc.config.DataType.TEXT),
            wvc.config.Property(name="metadata", data_type=wvc.config.DataType.TEXT),
        ],
    )
    print(response.config.get(simple=False))
except Exception as e:
    print(f"Error while creating collection: {e}")
finally:
    client.close()

# Reinitialize client for data insertion
client = initialize_weaviate_client(weaviate_url, weaviate_auth_key, openai_key)

# Insert data using batching
try:
    collection = client.collections.get(class_name)
    with collection.batch.dynamic() as batch:
        print("Batch insertion started.")
        for i, data_obj in enumerate(data_objs, 1):
            batch.add_object(properties=data_obj)
            print(f"Uploaded Tag Data: {i}/{total}")

        # Check for batch insertion errors
        if batch.number_errors > 0:
            print(f"Number of errors during batch insertion: {batch.number_errors}")
        else:
            print("Batch insertion completed successfully.")

    # Optional: Verify insertion by querying the collection
    try:
        result = collection.query.bm25(query="genAI", limit=10)
        print("\nQuery Results:", result)
    except Exception as e:
        print(f"Error while querying: {e}")
except Exception as e:
    print(f"An exception occurred: {e}")
finally:
    if client is not None:
        client.close()

def initialize_weaviate_client(url, auth_key, openai_key):

client = wvc.Client(
    url=url,
    auth_client_secret=wvc.AuthApiKey(api_key=auth_key),
    additional_headers={
        "X-OpenAI-Api-Key": openai_key
    }
)
return client

Server Setup Information

  • Weaviate Server Version: 4.8.1
  • Deployment Method: I am using on python directly
  • Multi Node? Number of Running Nodes: 1
  • Client Language and Version: Python 3.12.3
  • Multitenancy?:

Any additional Information

[ErrorObject(message=“WeaviateInsertManyAllFailedError(‘Every object failed during insertion. Here is the set of all errors: text too long for vectorization’)”, object_=BatchObject(collection=‘Sswhhsdflesdfesssssr01pagedata’, vector=None, uuid=‘fb5fc0a6-f652-4e64-bc36-d1bcc0536e0b’, properties={‘text’: ‘a’, ‘metadata’: ‘name1’}, tenant=None, references=None, index=0, retry_count=0), original_uuid=None), ErrorObject(message=“WeaviateInsertManyAllFailedError(‘Every object failed during insertion. Here is the set of all errors: text too long for vectorization’)”, object=_BatchObject(collection=‘Sswhhsdflesdfesssssr01pagedata’, vector=None, uuid=‘e21581f3-c7dd-447b-b240-bf566314eea7’, properties={‘text’: ‘1’, ‘metadata’: ‘value1’}, tenant=None, references=None, index=1, retry_count=0), original_uuid=None)]

data I am trying to insert = [{‘text’: ‘a’, ‘metadata’: ‘name1’}, {‘text’: ‘1’, ‘metadata’: ‘value1’}]


Viewing all articles
Browse latest Browse all 3604

Trending Articles