在spaCy中进行文本过滤可以使用以下方法:
import spacy
nlp = spacy.load("en_core_web_sm")
doc = nlp("This is a sample text for filtering.")
filtered_text = " ".join([token.text for token in doc if token.pos_ != "VERB"])
print(filtered_text)
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
nlp = spacy.load("en_core_web_sm")
doc = nlp("This is a sample text for filtering.")
filtered_text = " ".join([token.text for token in doc if token.text.lower() not in STOP_WORDS])
print(filtered_text)
import spacy
nlp = spacy.load("en_core_web_sm")
def custom_filter(doc):
return " ".join([token.text for token in doc if token.text.lower() not in ["sample", "filtering"]])
doc = nlp("This is a sample text for filtering.")
filtered_text = custom_filter(doc)
print(filtered_text)