#!/usr/bin/env python3
"""
Health Check Script for Entomological Label Information Extraction
Validates system requirements and provides diagnostic information.
"""
import sys
import os
import subprocess
import shutil
from pathlib import Path
[docs]
def check_python_version():
"""Check Python version and provide recommendations."""
version = sys.version_info
platform = sys.platform
print(f"Python: {version.major}.{version.minor}.{version.micro}")
if version >= (3, 11):
print("[OK] Excellent! Python 3.11+ detected")
elif version >= (3, 10):
print("[OK] Good! Python 3.10+ detected")
elif version >= (3, 9):
print("[WARNING] Python 3.9 detected. 3.10+ recommended for full compatibility")
if platform == "darwin": # macOS
print("[HINT] macOS: brew install python@3.11")
elif platform.startswith("linux"):
print("[HINT] Linux: sudo apt install python3.11 (Ubuntu/Debian) or equivalent")
elif platform.startswith("win"):
print("[HINT] Windows: Download from python.org or use Microsoft Store")
else:
print("[HINT] Visit python.org for installation instructions")
else:
print("[ERROR] Python version too old. 3.10+ required")
return False
return True
[docs]
def check_docker():
"""Check Docker installation and status."""
try:
# Check if Docker is installed
docker_path = shutil.which("docker")
if not docker_path:
print("Docker: [ERROR] Not installed")
platform = sys.platform
if platform == "darwin": # macOS
print("[HINT] macOS: Install Docker Desktop from https://docker.com")
elif platform.startswith("linux"):
print("[HINT] Linux: sudo apt install docker.io (Ubuntu/Debian) or visit https://docker.com")
elif platform.startswith("win"):
print("[HINT] Windows: Install Docker Desktop from https://docker.com")
else:
print("[HINT] Visit https://docker.com for installation instructions")
return False
# Check Docker version
result = subprocess.run(["docker", "--version"], capture_output=True, text=True)
if result.returncode == 0:
version = result.stdout.strip()
print(f"Docker: {version}")
# Check if Docker daemon is running
result = subprocess.run(["docker", "info"], capture_output=True, text=True)
if result.returncode == 0:
print("[OK] Docker daemon is running")
return True
else:
print("[WARNING] Docker installed but daemon not running")
platform = sys.platform
if platform == "darwin"or platform.startswith("win"): # macOS or Windows
print("[HINT] Start Docker Desktop application")
elif platform.startswith("linux"):
print("[HINT] Linux: sudo systemctl start docker")
else:
print("[HINT] Start Docker service for your platform")
return False
except Exception as e:
print(f"Docker: [ERROR] Error checking Docker: {e}")
return False
[docs]
def check_project_structure():
"""Check if we're in the correct project directory."""
try:
current_dir = Path.cwd()
project_name = "entomological-label-information-extraction"
print(f"Project: {current_dir.name}")
except Exception as e:
print(f"Project: [WARNING] Could not determine current directory: {e}")
# Try to get script directory instead
script_dir = Path(__file__).parent.parent
current_dir = script_dir
print(f"Using script directory: {current_dir.name}")
required_files = [
"launch_gui.py",
"pyproject.toml",
"environment.yml",
"README.md"
]
required_dirs = [
"data",
"docs",
"scripts",
"tools",
"models",
"pipelines"
]
missing_files = [f for f in required_files if not (current_dir / f).exists()]
missing_dirs = [d for d in required_dirs if not (current_dir / d).exists()]
if not missing_files and not missing_dirs:
print("[OK] All required files and directories present")
return True
else:
print("[WARNING] Missing components:")
for f in missing_files:
print(f"- {f}")
for d in missing_dirs:
print(f"- {d}/")
return False
[docs]
def check_system_resources():
"""Check available system resources."""
try:
# Check available disk space - cross-platform approach
# Try current directory first, fallback to script directory
check_path = '.'
try:
os.statvfs('.') # Test if current directory is accessible
except:
check_path = os.path.dirname(os.path.abspath(__file__))
if sys.platform.startswith('win'):
import ctypes
free_bytes = ctypes.c_ulonglong(0)
ctypes.windll.kernel32.GetDiskFreeSpaceExW(ctypes.c_wchar_p(check_path), ctypes.pointer(free_bytes), None, None)
free_gb = free_bytes.value / (1024**3)
else:
# Unix-like systems (Linux, macOS)
statvfs = os.statvfs(check_path)
free_bytes = statvfs.f_frsize * statvfs.f_bavail
free_gb = free_bytes / (1024**3)
print(f"Disk Space: {free_gb:.1f}GB available")
if free_gb < 5:
print("[WARNING] Low disk space. 5GB+ recommended")
else:
print("[OK] Sufficient disk space")
return True
except Exception as e:
print(f"Disk Space: [WARNING] Could not check disk space: {e}")
return True
[docs]
def check_dependencies():
"""Check for optional dependencies."""
deps = {
"git": "Version control",
"conda": "Package manager (optional)",
"tesseract": "OCR engine (for local processing)"
}
print("Dependencies:")
all_good = True
for dep, description in deps.items():
path = shutil.which(dep)
if path:
try:
result = subprocess.run([dep, "--version"], capture_output=True, text=True)
if result.returncode == 0:
version = result.stdout.split('\n')[0]
print(f"[OK] {dep}: {version}")
else:
print(f"[OK] {dep}: Installed")
except:
print(f"[OK] {dep}: Installed")
else:
if dep == "tesseract":
print(f"[WARNING] {dep}: Not found ({description})")
platform = sys.platform
if platform == "darwin": # macOS
print("[HINT] macOS: brew install tesseract")
elif platform.startswith("linux"):
print("[HINT] Linux: sudo apt install tesseract-ocr (Ubuntu/Debian)")
elif platform.startswith("win"):
print("[HINT] Windows: Download from https://github.com/tesseract-ocr/tesseract")
else:
print("[HINT] Visit https://github.com/tesseract-ocr/tesseract")
elif dep == "conda":
print(f"[INFO] {dep}: Not found ({description})")
else:
print(f"[WARNING] {dep}: Not found ({description})")
all_good = False
return all_good
[docs]
def main():
"""Run comprehensive health check."""
print("Health Check - Entomological Label Information Extraction")
print("="* 60)
checks = [
("Python Version", check_python_version),
("Docker", check_docker),
("Project Structure", check_project_structure),
("System Resources", check_system_resources),
("Dependencies", check_dependencies)
]
results = []
for name, check_func in checks:
print()
try:
result = check_func()
results.append((name, result))
except Exception as e:
print(f"[ERROR] Error during {name} check: {e}")
results.append((name, False))
print()
print("="* 60)
print("Health Check Summary:")
passed = 0
total = len(results)
for name, result in results:
status = "[PASS]"if result else "[FAIL]"
print(f"{name}: {status}")
if result:
passed += 1
print()
if passed == total:
print("[SUCCESS] All checks passed! Your system is ready to run the extraction tool.")
print("Ready to launch GUI: python3 launch_gui.py")
print("Or run command line: ./tools/run_mli_pipeline.sh or ./tools/run_sli_pipeline.sh")
elif passed >= total - 1:
print("[MOSTLY OK] Most checks passed. You should be able to run the tool with Docker.")
print("Ready to launch GUI: python3 launch_gui.py")
print("Or run command line: ./tools/run_mli_pipeline.sh or ./tools/run_sli_pipeline.sh")
else:
print("[ERROR] Several checks failed. Please resolve the issues above before running.")
print("See README.md for detailed setup instructions.")
print()
print("For help:")
print("README.md - Complete installation and usage guide")
print("python launch_gui.py - Start the graphical interface")
if __name__ == "__main__":
main()