import numpy as np
import unittest
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from compshs.text.frequency import FrequencyCounter
[docs]class TestFrequency(unittest.TestCase):
[docs] def setUp(self):
self.corpus = ['The quick brown fox.', 'The dog is lazier.']
[docs] def test_init_vectorizer(self):
counter = FrequencyCounter(vectorizer_name='tf')
self.assertIsInstance(counter.vectorizer, CountVectorizer)
counter = FrequencyCounter(vectorizer_name='tfidf')
self.assertIsInstance(counter.vectorizer, TfidfVectorizer)
with self.assertRaises(ValueError):
FrequencyCounter(vectorizer_name='toto')
[docs] def test_get_token_names(self):
counter = FrequencyCounter(vectorizer_name='tf')
_ = counter.fit(self.corpus)
token_names = counter.get_token_names()
self.assertIsInstance(token_names, np.ndarray)
self.assertTrue(len(token_names) >= 0)