Quantcast
Channel: Weaviate Community Forum - Latest posts
Viewing all articles
Browse latest Browse all 3604

nearText operaion isn't work

$
0
0

thanks for the help!
image

schema = {
    "classes": [
        {
            "class": "Article", # name of the class
            "description": "An Article class to store the article summary and its authors", # a description of what this class represents
            "properties": [ # class properties
                {
                    "name": "title",
                    "dataType": ["string"],
                    "description": "The title of the article", 
                },
                {
                    "name": "summary",
                    "dataType": ["text"],
                    "description": "The summary of the article",
                },
                {
                    "name": "wordCount",
                    "dataType": ["int"],
                    "description": "The number of words in the article's summary",
                },
                {
                    "name": "hasAuthors",
                    "dataType": ["Author"],
                    "description": "The authors this article has",
                },
                {
                    "name": "hasCategory",
                    "dataType": ["Category"],
                    "description": "The category of this article",
                }
            ]
        }, {
            # Write the Author class here
            "class": "Author", 
            "description": "An Author class to store the author's name and the articles who wrote", 
            "properties": [
                {
                    "name": "name",
                    "dataType": ["string"],
                    "description": "The name of the author", 
                },
                {
                    "name": "wroteArticles",
                    "dataType": ["Article"],
                    "description": "The articles this author has",
                }
            ]
        }, {
            # Write the Category class here
            "class":"Category",
            "description":"A Category class to store the category that article belongs to",
            "properties":[
                {
                    "name":"name",
                    "dataType":["string"],
                    "description":"the name of the category"
                }
            ]
        }

 ]
}

this is the schema and i download the news from the cnn.com as my data

import newspaper
import uuid
import json
from tqdm import tqdm

def get_articles_from_newspaper(
        news_url: str, 
        max_articles: int=100
    ) -> None:
    """
    Download and save newspaper articles as weaviate schemas.
    Parameters
    ----------
    newspaper_url : str
        Newspaper title.
    """
    
    objects = []
    
    # Build the actual newspaper    
    news_builder = newspaper.build(news_url, memoize_articles=False)
    
    if max_articles > news_builder.size():
        max_articles = news_builder.size()
    pbar = tqdm(total=max_articles)
    pbar.set_description(f"{news_url}")
    i = 0
    while len(objects) < max_articles and i < news_builder.size():
        article = news_builder.articles[i]
        try:
            article.download()
            article.parse()
            article.nlp()

            if (article.title != '' and \
                article.title is not None and \
                article.summary != '' and \
                article.summary is not None and\
                article.authors):

                # create an UUID for the article using its URL
                article_id = uuid.uuid3(uuid.NAMESPACE_DNS, article.url)

                # create the object
                objects.append({
                    'id': str(article_id),
                    'title': article.title,
                    'summary': article.summary,
                    'authors': article.authors,
                    'word_count': len(article.summary.split())
                })
                
                pbar.update(1)

        except:
            # something went wrong with getting the article, ignore it
            pass
        i += 1
    pbar.close()
    return objects
data = []
data += get_articles_from_newspaper('http://cnn.com')

and then i upload my data

from weaviate.batch import Batch # for the typing purposes
from weaviate.util import generate_uuid5


def add_article(batch: Batch, article_data: dict) -> str:
    
    article_object = {
        'title': article_data['title'],
        'wordCount': article_data['word_count'],
        'summary': article_data['summary'].replace('\n', '') # remove newline character
    }
    article_id = article_data['id']
    
    # add article to the batch
    batch.add_data_object( 
        data_object=article_object,
        class_name='Article',
        uuid=article_id
    )
    
    return article_id

def add_author(batch: Batch, author_name: str) -> str:
    
    author_object = {'name': author_name}

    # generate an UUID for the Author
    author_id = generate_uuid5(author_name)
    
    # add author to the batch
    # EXERCISE: call here the batch.add_data_object function to add the author to the batch
    batch.add_data_object( 
        data_object=author_object,
        class_name='Author',
        uuid=author_id
    )
    
    return author_id

def add_references(batch: Batch, article_id: str, author_id: str)-> None:
    # add references to the batch
    ## Author -> Article
    batch.add_reference(
        from_object_uuid=author_id,
        from_object_class_name='Author',
        from_property_name='wroteArticles',
        to_object_uuid=article_id
    )
    
    ## Article -> Author 
    # EXERCISE: call here the batch.add_reference function to add the article->author reference
    batch.add_reference(
        from_object_uuid=article_id,
        from_object_class_name='Article',
        from_property_name='hasAuthors',
        to_object_uuid=author_id
    )
client.batch.configure(batch_size=50, dynamic=True, callback=None)
with client.batch as batch:

    for i in data:

        # add article to the batch
        article_id = add_article(batch, i)

        for author in i['authors']:

            # add author to the batch
            author_id = add_author(batch, author)

            # add cross references to the batch
            add_references(batch, article_id=article_id, author_id=author_id)

Viewing all articles
Browse latest Browse all 3604

Trending Articles