python – UserWarning: Your stop_words may be inconsistent with your preprocessing

I am following this tutorial to make a chatbot with the following code.

import nltk
import numpy as np
import random
import string
import bs4 as bs
import urllib.request
import re

# Creating the Corpus
raw_html = urllib.request.urlopen('')
raw_html =
article_html = bs.BeautifulSoup(raw_html, 'lxml')
article_paragraphs = article_html.find_all('p')

article_text = ''

for para in article_paragraphs:
    article_text += para.text

article_text = article_text.lower()

# Text Preprocessing
article_text = re.sub(r'((0-9)*)', ' ', article_text)
article_text = re.sub(r's+', ' ', article_text)
article_sentences = nltk.sent_tokenize(article_text)
article_words = nltk.word_tokenize(article_text)

wnlemmatizer = nltk.stem.WordNetLemmatizer()

# Helper Function
def perform_lemmatization(tokens):
    return (wnlemmatizer.lemmatize(token) for token in tokens)

punctuation_removal = dict((ord(punctuation), None) for punctuation in string.punctuation)

def get_processed_text(document):
    return perform_lemmatization(nltk.word_tokenize(document.lower().translate(punctuation_removal)))

# Responding to Greetings
greeting_inputs = ("hey", "good morning", "good evening", "morning", "evening", "hi", "whatsup")
greeting_responses = ("hey", "hey hows you?", "*nods*", "hello, how you doing", "hello", "Welcome, I am good and you")

def generate_greeting_response(greeting):
    for token in greeting.split():
        if token.lower() in greeting_inputs:
            return random.choice(greeting_responses)
            return 'Try again'

# Responding to User Queries
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def generate_response(user_input):
    tennisrobo_response = ''

    word_vectorizer = TfidfVectorizer(tokenizer=get_processed_text, stop_words='english')
    all_word_vectors = word_vectorizer.fit_transform(article_sentences)
    similar_vector_values = cosine_similarity(all_word_vectors(-1), all_word_vectors)
    similar_sentence_number = similar_vector_values.argsort()(0)(-2)

    matched_vector = similar_vector_values.flatten()
    vector_matched = matched_vector(-2)

    if vector_matched == 0:
        tennisrobo_response = tennisrobo_response + "I am sorry, I could not understand you"
        return tennisrobo_response
        tennisrobo_response = tennisrobo_response + article_sentences(similar_sentence_number)
        return tennisrobo_response

Running the code, I get the following error:

UserWarning: Your stop_words may be inconsistent with your preprocessing. Tokenizing the stop words generated tokens ('ha', 'le', 'u', 'wa') not in stop_words.
  warnings.warn('Your stop_words may be inconsistent with '

After searching google I got linked to this answer saying that there may be an inconsistency between my stop words and tokenizer. However, I am very new to python and NLTK and cannot find where the inconsistency is.

Where is the part of the code that is causing this error?