Source code for compshs.utils.check

"""
Created in 2025
@author: Simon Delarue <simon.delarue@telecom-paris.fr>
"""

import re
import spacy
import sqlite3
from typing import Tuple


[docs]def load_lang(lang: str = 'en_core_web_sm'): """Load (trained) Spacy pipeline. Parameters ---------- lang: str Spacy pipeline name (default is the english pipeline ``'en_core_web_sm'``). Returns ------- Trained spacy pipeline, otherwise blank minimal pipeline. """ try: return spacy.load(lang) except OSError as e: print(f'Error: {e} Failed to load Spacy pipeline {lang}.') print(f'Downloading the Spacy pipeline...') spacy.cli.download(lang) try: return spacy.load(lang) except OSError as e: print(f'Error: {e} Failed to load Spacy pipeline after download.') print(f'Fall back to minimal blank pipeline, i.e. tokenizer only.') return spacy.blank(lang)
[docs]def check_sql_identifier(identifier: str) -> str: """Ensure that an SQL identifier (table or column name) is safe to use in queries. Parameters ---------- identifier: str Identifier name to check (column or table name). Returns ------- str Identifier if valid. """ if not re.fullmatch(r"[A-Za-z0-9_ ]+", identifier): raise ValueError(f'Invalid SQL identifier: {identifier}') return identifier
[docs]def check_sql_identifiers(identifiers: Tuple[str, ...]) -> Tuple[str, ...]: """Ensure that a list of SQL identifiers (table or column names) is safe to use in queries. Parameters ---------- identifiers: List List of identifier names to check (column or table names). Returns ------- list List of identifiers if valid. """ return tuple(check_sql_identifier(ident) for ident in identifiers)
[docs]def check_exist_table_name(connection: sqlite3.Connection, table_name: str) -> bool: """Check whether a table exist in a database.""" cursor = connection.cursor() cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name=?", (table_name,)) exists = cursor.fetchone() is not None return exists
[docs]def check_exist_column_name(connection: sqlite3.Connection, table_name: str, column_name: str) -> bool: """Check whether a column exist in a table.""" cursor = connection.cursor() cursor.execute(f"PRAGMA table_info({table_name})") columns = [column[1] for column in cursor.fetchall()] return column_name in columns