Finally solved using:
Settings.embed_model = CohereEmbedding(cohere_api_key=os.getenv(“COHERE_APIKEY”))
Below the revisited code. Any suggestion to improve it further? Am I doing something wrong?
import weaviate
import asyncio
import os
import weaviate.classes.config as wc
from typing import List
from llama_index.core.schema import BaseNode, Document
from llama_index.readers.web import SimpleWebPageReader
from llama_index.core.node_parser import SimpleNodeParser
from llama_index.vector_stores.weaviate import WeaviateVectorStore
from llama_index.core.storage import StorageContext
from llama_index.core import VectorStoreIndex, Settings
from llama_index.embeddings.cohere import CohereEmbedding
from dotenv import load_dotenv
from weaviate.classes.init import Auth, AdditionalConfig, Timeout
from weaviate.exceptions import WeaviateBaseErrorget the data from the web
def AddData() → (List[BaseNode], List[Document]):
docs = SimpleWebPageReader(html_to_text=True).load_data(
[“LlamaIndex and Weaviate | Weaviate - Vector Database”]
)
print(f"Loaded {len(docs)} documents")
parser = SimpleNodeParser()
nodes = parser.get_nodes_from_documents(docs, show_progress=True)
for n in nodes:
print(n.get_content())
return nodes, docsasync def main():
client = None
load_dotenv()try: headers = { # "X-OpenAI-Api-Key": os.getenv("OPENAI_APIKEY"), "X-Cohere-Api-Key": os.getenv("COHERE_APIKEY") } client = weaviate.connect_to_weaviate_cloud( cluster_url=os.getenv("WEAVIATE_URL"), auth_credentials=Auth.api_key(os.getenv("WEAVIATE_APIKEY")), additional_config=AdditionalConfig( timeout=Timeout(init=30, query=60, insert=30), # Values in seconds ), headers=headers, skip_init_checks=False ) # Necessary for Cohere? os.environ["COHERE_API_KEY"] = os.getenv("COHERE_APIKEY") if client.is_ready(): print(f"Weaviate is ready! Successfully connected to {client.get_meta()}") else: print("Failed to connect to Weaviate Cloud") exit(0) # get existing collections collections = client.collections.list_all() if len(collections) > 0: print(f"Found {len(collections)} collections:") [print(c) for c in collections] else: print("No collections found") # Check if BlogPosts collection exists if client.collections.get("BlogPosts").exists(): print("Collection 'BlogPosts' already exists") else: client.collections.create( name="BlogPosts", description="A collection of blog posts", properties=[ wc.Property(name="content", data_type=wc.DataType.TEXT, description="The content of the blog post"), ], # Define the vectorizer module vectorizer_config=wc.Configure.Vectorizer.text2vec_cohere(), # Define the generative module generative_config=wc.Configure.Generative.cohere() ) doInsert = False nodes, documents = AddData() vector_store = WeaviateVectorStore(weaviate_client=client, index_name="BlogPosts", text_key="content") Settings.embed_model = CohereEmbedding(cohere_api_key=os.getenv("COHERE_APIKEY")) if doInsert: storage_context = StorageContext.from_defaults(vector_store=vector_store) # we initiate our index index = VectorStoreIndex.from_documents(documents=documents, storage_context=storage_context, show_progress=True) retriever = VectorStoreIndex.from_vector_store(vector_store).as_retriever( similarity_top_k=1 ) nodes = retriever.retrieve("What is weaviate?") print(nodes[0]) except WeaviateBaseError as e: print(f"Failed to connect to Weaviate Cloud: {e.message}") exit(0) except Exception as e: print(f"An error occurred: {e}") exit(0) finally: if client is not None: client.close()
asyncio.run(main())