Source code for compshs.data.tests.test_parse

import os
import tempfile
import unittest
import sqlite3
import json

from compshs.data.base import Dataset
from compshs.data.parse import from_directory, from_sql


[docs]class TestFromDirectory(unittest.TestCase):
[docs] def setUp(self): self.test_dir = tempfile.TemporaryDirectory() self.dir_path = self.test_dir.name self.file_contents = {'file1.txt': 'The quick brown fox.', 'file2.txt': 'The lazier dog.'} for filename, content in self.file_contents.items(): with open(os.path.join(self.dir_path, filename), 'w', encoding='utf-8') as f: f.write(content)
[docs] def test_from_directory_with_names(self): dataset_name = 'xyz' dataset = from_directory(self.dir_path, dataset_name) self.assertIsInstance(dataset, Dataset) self.assertEqual(dataset.name, 'xyz') self.assertEqual(len(dataset.corpus), len(self.file_contents)) self.assertCountEqual(dataset.corpus, list(self.file_contents.values()))
[docs] def test_from_directory_without_names(self): dataset = from_directory(self.dir_path) self.assertIsInstance(dataset, Dataset) self.assertEqual(dataset.name, os.path.basename(self.dir_path)) self.assertEqual(len(dataset.corpus), len(self.file_contents)) self.assertCountEqual(dataset.corpus, list(self.file_contents.values()))
[docs] def test_from_directory_empty(self): empty_dir = tempfile.TemporaryDirectory() dataset = from_directory(empty_dir.name, 'empty') self.assertIsInstance(dataset, Dataset) self.assertEqual(dataset.name, 'empty') self.assertEqual(len(dataset.corpus), 0) empty_dir.cleanup()
[docs]class TestFromSQL(unittest.TestCase): cursor = None connection = None test_db_path = None
[docs] @classmethod def setUpClass(cls): cls.test_db_fd, cls.test_db_path = tempfile.mkstemp(suffix='.db') cls.connection = sqlite3.connect(cls.test_db_path) cls.cursor = cls.connection.cursor() cls.cursor.execute(''' CREATE TABLE test_table( id INTEGER PRIMARY KEY, data TEXT ) ''') data = [ json.dumps({'title': 'doc1', 'content': 'The quick brown fox.'}), json.dumps({'title': 'doc2', 'content': 'The lazier dog.'}) ] cls.cursor.executemany('INSERT INTO test_table (data) VALUES (?)', [(d,) for d in data]) cls.connection.commit()
[docs] @classmethod def tearDownClass(cls): cls.connection.close() os.close(cls.test_db_fd) os.remove(cls.test_db_path)
[docs] def test_01_from_sql_valid(self): dataset = from_sql(self.test_db_path, 'my_dataset_name', 'test_table', 'data', 'title', 'content') self.assertEqual(dataset.name, 'my_dataset_name') self.assertEqual(len(dataset.corpus), 2) self.assertEqual(dataset.corpus[0], ('doc1', 'The quick brown fox.')) self.assertEqual(dataset.corpus[1], ('doc2', 'The lazier dog.')) self.assertIsInstance(dataset, Dataset)
[docs] def test_02_from_sql_default_dataset_name(self): dataset = from_sql(self.test_db_path, None, 'test_table', 'data', 'title', 'content') self.assertEqual(dataset.name, 'test_table')
[docs] def test_03_from_sql_empty_table(self): self.cursor.execute('DROP TABLE test_table') self.cursor.execute('CREATE TABLE test_table (id INTEGER PRIMARY KEY, data TEXT)') self.connection.commit() dataset = from_sql(self.test_db_path, 'empty_dataset', 'test_table', 'data', 'title', 'content') self.assertEqual(dataset.name, 'empty_dataset') self.assertEqual(len(dataset.corpus), 0)