import json from elasticsearch import Elasticsearch from elasticsearch import helpers
defread_and_write_index(): # define an empty list for the Elasticsearch docs doc_list = [] es = Elasticsearch(hosts='http://localhost',port=9200)
# use Python's enumerate() function to iterate over list of doc strings input_file = open('/Users/ligaofeng/Java/Resource/300/300.json', encoding="utf8", errors='ignore') json_array = json.load(input_file)
for item in json_array: try: # convert the string to a dict object # add a new field to the Elasticsearch doc dict_doc = {} # add a dict key called "_id" if you'd like to specify an ID for the doc dict_doc["_id"] = item['id'] dict_doc["contents"] = item['contents'] dict_doc["type"] = item['type'] dict_doc["author"] = item['author'] dict_doc["title"] = item['title']
# append the dict object to the list [] doc_list += [dict_doc]
except json.decoder.JSONDecodeError as err: # print the errors print("ERROR for num:", item['id'], "-- JSONDecodeError:", err, "for doc:", dict_doc) print("Dict docs length:", len(doc_list))
try: print ("\nAttempting to index the list of docs using helpers.bulk()")
# use the helpers library's Bulk API to index list of Elasticsearch docs resp = helpers.bulk( es, doc_list, index = "poem_index", doc_type = "_doc" )
# print the response returned by Elasticsearch print ("helpers.bulk() RESPONSE:", resp) print ("helpers.bulk() RESPONSE:", json.dumps(resp, indent=4)) except Exception as err: # print any errors returned w ## Prerequisiteshile making the helpers.bulk() API call print("Elasticsearch helpers.bulk() ERROR:", err) quit()