spaCy¶
spaCy is a natural language processing library, including word segmentation, part-of-speech tagging, stemming, named entity recognition, noun phrase extraction and other functions.
Install¶
# Install spaCy 3 For CUDA 11.2, replace the version in [] according to the mirror CUDA version
pip install spacy[cuda112]==3.0.6
# Install spaCy 2 For CUDA 11.2, replace the version in [] according to the mirror CUDA version
pip install spacy[cuda112]==2.3.5
# Download the model through the spacy module because the wall may not be available, you can install it through the following pip installation method
python -m spacy download en_core_web_sm
# install 3.0.0 en_core_web_sm
pip install https://mirror.ghproxy.com/https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0-py3-none-any.whl - -no-cache
# install 2.3.1 en_core_web_sm
pip install https://mirror.ghproxy.com/https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.3.1/en_core_web_sm-2.3.1.tar.gz --no-cache
use¶
import spacy
# Load English tokenizer, tagger, parser and NER
nlp = spacy.load("en_core_web_sm")
# Process whole documents
text = ("When Sebastian Thrun started working on self-driving cars at "
"Google in 2007, few people outside of the company took him "
"seriously. "I can tell you very senior CEOs of major American "
"car companies would shake my hand and turn away because I wasn't "
"worth talking to," said Thrun, in an interview with Recode earlier "
"this week.")
doc = nlp(text)
# Analyze syntax
print("Noun phrases:", [chunk.text for chunk in doc.noun_chunks])
print("Verbs:", [token.lemma_ for token in doc if token.pos_ == "VERB"])
# Find named entities, phrases and concepts
for entity in doc.ents:
print(entity.text, entity.label_)