From 0a1e871cdbbe5baebd87b3a4b95fae41a594ab50 Mon Sep 17 00:00:00 2001 From: cauvang32 Date: Fri, 3 Oct 2025 13:05:07 +0700 Subject: [PATCH] feat: Implement package cleanup and usage tracking in Docker and non-Docker environments --- src/utils/code_interpreter.py | 133 ++++++++++++++++++++++++++++++++-- 1 file changed, 128 insertions(+), 5 deletions(-) diff --git a/src/utils/code_interpreter.py b/src/utils/code_interpreter.py index 67a2818..b316c21 100644 --- a/src/utils/code_interpreter.py +++ b/src/utils/code_interpreter.py @@ -20,6 +20,7 @@ import shutil import traceback import contextlib import venv +import ast from typing import Dict, Any, Optional, List, Tuple from pathlib import Path from datetime import datetime, timedelta @@ -538,19 +539,87 @@ class PackageManager: except Exception: return True + async def _cleanup_old_packages(self): + """Clean up packages not used in PACKAGE_CLEANUP_DAYS days.""" + try: + cache = self._load_cache() + packages = cache.get("packages", {}) + current_time = datetime.now() + packages_to_remove = [] + + # Find packages older than PACKAGE_CLEANUP_DAYS + for package_lower, info in packages.items(): + last_used = info.get("last_used", info.get("installed_at")) + if last_used: + try: + last_used_date = datetime.fromisoformat(last_used) + days_unused = (current_time - last_used_date).days + if days_unused > PACKAGE_CLEANUP_DAYS: + packages_to_remove.append(info.get("name", package_lower)) + except Exception as e: + logger.warning(f"Error parsing date for {package_lower}: {e}") + + if packages_to_remove: + logger.info(f"Found {len(packages_to_remove)} packages unused for >{PACKAGE_CLEANUP_DAYS} days: {packages_to_remove}") + + # In Docker: Uninstall individual packages from system + if self.is_docker: + successfully_removed = [] + for package in packages_to_remove: + try: + logger.info(f"Uninstalling unused package from system: {package}") + process = await asyncio.create_subprocess_exec( + str(self.pip_path), "uninstall", "-y", package, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE + ) + stdout, stderr = await asyncio.wait_for(process.communicate(), timeout=60) + + if process.returncode == 0: + logger.info(f"✓ Successfully uninstalled: {package}") + successfully_removed.append(package.lower()) + else: + error_msg = stderr.decode() + logger.warning(f"Failed to uninstall {package}: {error_msg}") + except asyncio.TimeoutError: + logger.warning(f"Timeout uninstalling {package}") + except Exception as e: + logger.error(f"Error uninstalling {package}: {e}") + + # Remove successfully uninstalled packages from cache + for package_lower in successfully_removed: + cache["packages"].pop(package_lower, None) + + cache["last_cleanup"] = current_time.isoformat() + self._save_cache(cache) + logger.info(f"Docker cleanup completed. Removed {len(successfully_removed)}/{len(packages_to_remove)} packages.") + else: + # In non-Docker: Recreate entire venv (existing behavior) + logger.info("Non-Docker environment: recreating venv for cleanup") + await self._recreate_venv() + else: + logger.info("No old packages to clean up") + # Update cleanup timestamp even if nothing to clean + cache["last_cleanup"] = current_time.isoformat() + self._save_cache(cache) + + except Exception as e: + logger.error(f"Error during package cleanup: {e}") + async def ensure_venv_ready(self) -> bool: """Ensure virtual environment is ready.""" try: + # Check if cleanup is needed (both Docker and non-Docker) + if self._needs_cleanup(): + logger.info("Performing periodic package cleanup...") + await self._cleanup_old_packages() + # In Docker, we use system Python directly (no venv needed) if self.is_docker: logger.info("Docker environment detected - using system Python, skipping venv checks") return True # Non-Docker: full validation - if self._needs_cleanup(): - logger.info("Performing periodic venv cleanup...") - await self._recreate_venv() - return True if not self.venv_dir.exists() or not self.python_path.exists(): logger.info("Creating virtual environment...") await self._recreate_venv() @@ -625,12 +694,22 @@ class PackageManager: def mark_package_installed(self, package: str): """Mark package as installed.""" cache = self._load_cache() + now = datetime.now().isoformat() cache["packages"][package.lower()] = { - "installed_at": datetime.now().isoformat(), + "installed_at": now, + "last_used": now, "name": package } self._save_cache(cache) + def update_package_usage(self, package: str): + """Update last used timestamp for a package.""" + cache = self._load_cache() + package_lower = package.lower() + if package_lower in cache.get("packages", {}): + cache["packages"][package_lower]["last_used"] = datetime.now().isoformat() + self._save_cache(cache) + def is_package_approved(self, package: str) -> Tuple[bool, str]: """Check if package is approved for installation.""" package_lower = package.lower().strip() @@ -699,6 +778,38 @@ class CodeExecutor: return False, f"Blocked unsafe operation: {pattern}" return True, "Code passed security validation" + def _extract_imports_from_code(self, code: str) -> List[str]: + """ + Extract imported module names from code using AST parsing. + + Args: + code: Python code to analyze + + Returns: + List of top-level module names (e.g., ['numpy', 'pandas', 'matplotlib']) + """ + modules = set() + try: + tree = ast.parse(code) + for node in ast.walk(tree): + if isinstance(node, ast.Import): + for alias in node.names: + # Get top-level module (e.g., 'numpy' from 'numpy.random') + module_name = alias.name.split('.')[0] + modules.add(module_name) + elif isinstance(node, ast.ImportFrom): + if node.module: + # Get top-level module (e.g., 'sklearn' from 'sklearn.metrics') + module_name = node.module.split('.')[0] + modules.add(module_name) + except SyntaxError: + # If code has syntax errors, we'll catch them during execution + logger.debug("Syntax error while parsing imports, will be caught during execution") + except Exception as e: + logger.warning(f"Error extracting imports: {e}") + + return list(modules) + def _extract_missing_modules(self, error_output: str) -> List[str]: """ Extract missing module names from error output. @@ -1091,6 +1202,18 @@ def load_file(file_id): if failed_packages: result["failed_packages"] = failed_packages + # Track package usage if execution was successful + if return_code == 0: + try: + imported_modules = self._extract_imports_from_code(code) + for module in imported_modules: + # Update usage timestamp for installed packages + if self.package_manager.is_package_installed(module): + self.package_manager.update_package_usage(module) + logger.debug(f"Updated usage timestamp for package: {module}") + except Exception as e: + logger.warning(f"Failed to track package usage: {e}") + return result except asyncio.TimeoutError: