Source code for compshs.text.tests.test_topic_modelling

import numpy as np
from sklearn.decomposition import LatentDirichletAllocation, NMF
from scipy import sparse
import unittest

from compshs.text.topic_modelling import TopicModeler


[docs]class TestTopicModelling(unittest.TestCase):
[docs] def setUp(self): self.matrix = sparse.identity(3)
[docs] def test_init_topic_modeler(self): topic_modeler = TopicModeler(model_name='LDA') self.assertIsInstance(topic_modeler.modeler, LatentDirichletAllocation) topic_modeler = TopicModeler(model_name='NMF') self.assertIsInstance(topic_modeler.modeler, NMF) with self.assertRaises(ValueError): TopicModeler(model_name='toto')
[docs] def test_fit_transform(self): topic_modeler = TopicModeler(model_name='LDA') _ = topic_modeler.fit(self.matrix) self.assertTrue(np.any(topic_modeler.modeler.components_ != 0)) topic_names, topic_distribution = topic_modeler.transform(self.matrix) self.assertEqual(len(topic_names), topic_distribution.shape[1]) self.assertEqual(topic_distribution.shape[0], self.matrix.shape[0]) topic_names, topic_distribution = topic_modeler.fit_transform(self.matrix) self.assertEqual(len(topic_names), topic_distribution.shape[1]) self.assertEqual(topic_distribution.shape[0], self.matrix.shape[0])
[docs] def test_get_word_contributions(self): topic_modeler = TopicModeler(model_name='LDA') _ = topic_modeler.fit(self.matrix) word_contributions = topic_modeler.get_word_contributions() self.assertIsInstance(word_contributions, np.ndarray) self.assertTrue(len(word_contributions) >= 0)