Enhance file handling instructions in code interpreter and message handler; clarify usage of load_file() and print() for user-uploaded files

2025-12-01 17:38:53 +07:00
parent 668f56020d
commit d549c43844
5 changed files with 138 additions and 26 deletions
--- a/src/config/code_interpreter_prompts.py
+++ b/src/config/code_interpreter_prompts.py
@@ -12,6 +12,7 @@ You have access to a powerful code interpreter environment that allows you to:
 - Execute Python code in a secure, isolated environment
 - Maximum execution time: 60 seconds
 - Output limit: 100KB
 - ⚠️ **IMPORTANT: Use print() to display results!** Only printed output is captured and shown to the user.
 ## 📦 **Package Management (Auto-Install)**
 The code interpreter can AUTOMATICALLY install missing packages when needed!
@@ -43,18 +44,64 @@ import seaborn as sns  # Will auto-install if missing
 import pandas as pd    # Will auto-install if missing
 df = pd.DataFrame({'x': [1,2,3], 'y': [4,5,6]})
 print(df)  # ⚠️ Use print() to show output!
 sns.scatterplot(data=df, x='x', y='y')
 plt.savefig('plot.png')
 print("Chart saved!")  # Confirm completion
 ```
 ⚠️ **REMINDER: Only printed output is visible!** Always use print() for any data you want the user to see.
 ## 📁 **File Management (48-Hour Lifecycle)**
 ### **User-Uploaded Files**
 - Users can upload files (CSV, Excel, JSON, images, etc.)
 - Files are stored with unique `file_id`
 - Access files using: `df = load_file('file_id_here')`
 - Files expire after 48 hours automatically
 ### **CRITICAL: How to Load Files**
 **Option 1: load_file() - Returns data directly (RECOMMENDED)**
 ```python
 # For CSV files - returns DataFrame directly, DO NOT pass to pd.read_csv()!
 # ⚠️ Use the ACTUAL file_id from the upload message, NOT this example!
 df = load_file('<file_id_from_upload_message>')
 print(df.head())  # Works immediately!
 ```
 **Option 2: get_file_path() - Returns path for manual loading**
 ```python
 # If you need the actual file path:
 path = get_file_path('<file_id_from_upload_message>')
 df = pd.read_csv(path)
 ```
 ### **COMMON MISTAKES TO AVOID**
 ```python
 # ❌ WRONG - load_file() returns a DataFrame, NOT a path!
 file_path = load_file('<file_id>')
 df = pd.read_csv(file_path)  # ERROR: Cannot read DataFrame as CSV!
 # ❌ WRONG - file_id is NOT a file path!
 df = pd.read_csv('<file_id>')  # ERROR: File not found!
 # ❌ WRONG - Using example IDs from documentation!
 df = load_file('example_from_docs')  # ERROR: Use REAL file_id from upload!
 # ✅ CORRECT - use load_file() with the ACTUAL file_id from upload message
 df = load_file('<file_id_from_upload_message>')  # Copy exact ID from 📁 FILE UPLOADED
 print(df.head())  # ⚠️ Use print() to show output!
 print(df.describe())
 # ✅ CORRECT - use get_file_path() if you need the path
 path = get_file_path('<file_id_from_upload_message>')
 df = pd.read_csv(path)
 print(df.info())  # Always print results!
 ```
 ⚠️ CRITICAL: The file_id is shown in the conversation when a file is uploaded.
 Look for: "📁 FILE UPLOADED" or "df = load_file('...')" in recent messages!
 ### **Generated Files**
 - ANY file you create is captured and saved
 - Supported types: images, CSVs, text, JSON, HTML, PDFs, etc. (80+ formats)
@@ -94,10 +141,14 @@ plt.savefig('plot.png')
 **Load uploaded file:**
 ```python
-# User uploaded 'sales_data.csv' with file_id: 'user_123_1234567890_abc123'
+# ⚠️ Find the ACTUAL file_id in the conversation's "📁 FILE UPLOADED" message!
-df = load_file('user_123_1234567890_abc123')
+# DO NOT copy this example - use the real file_id shown when the user uploaded!
-print(df.head())
+df = load_file('<paste_actual_file_id_here>')
-print(f"Loaded {len(df)} rows")
+
 # ⚠️ CRITICAL: Always use print() to display results!
 print(df.head())  # Show first rows
 print(df.describe())  # Show statistics
 print(f"Loaded {len(df)} rows, {len(df.columns)} columns")
 ```
 **Create multiple output files:**
--- a/src/config/config.py
+++ b/src/config/config.py
@@ -195,9 +195,35 @@ NORMAL_CHAT_PROMPT = """You're ChatGPT for Discord. Be concise, helpful, safe. R
 TOOLS:
 1. google_search(query) - Web search for current info
 2. scrape_webpage(url) - Extract webpage content
-3. execute_python_code(code) - Run Python, packages auto-install. Use load_file('file_id') for user files. Save outputs to files.
+3. execute_python_code(code) - Run Python, packages auto-install. **FILE ACCESS: See critical instructions below!**
 4. set_reminder(content, time) / get_reminders() - Manage reminders
 ═══════════════════════════════════════════════════════════════
 ⚠️ CRITICAL: FILE ACCESS IN CODE INTERPRETER
 ═══════════════════════════════════════════════════════════════
 When users upload files, you will see a message like:
   📁 FILE UPLOADED - USE THIS FILE_ID:
   Filename: data.csv
   ⚠️ TO ACCESS THIS FILE IN CODE, YOU MUST USE:
      df = load_file('<THE_ACTUAL_FILE_ID_FROM_CONTEXT>')
 **IMPORTANT: Copy the EXACT file_id from the file upload message - do NOT use examples!**
 ✅ CORRECT:
   df = load_file('<file_id_from_upload_message>')
   print(df.head())  # Use print() to show output!
 ⚠️ IMPORTANT: Always use print() to display results - code output is only captured via print()!
 ❌ WRONG - Using filename:
   df = pd.read_csv('data.csv')  # FAILS - file not found!
 ❌ WRONG - Using example file_id from prompts:
   df = load_file('example_id_from_docs')  # FAILS - use the REAL ID!
 ⚠️ CRITICAL: Look for the 📁 FILE UPLOADED message in this conversation and copy the EXACT file_id shown there!
 ═══════════════════════════════════════════════════════════════
 IMAGE GENERATION & EDITING TOOLS
 ═══════════════════════════════════════════════════════════════
--- a/src/module/message_handler.py
+++ b/src/module/message_handler.py
@@ -1222,11 +1222,20 @@ print("\\n=== Correlation Analysis ===")
            user_message = message.content.strip() if message.content else ""
            file_context = (
-                f"\n\n[User uploaded file: {filename}]\n"
+                f"\n\n══════════════════════════════════════════════\n"
-                f"[File ID: {file_id}]\n"
+                f"📁 FILE UPLOADED - USE THIS FILE_ID:\n"
-                f"[File Type: {file_type}]\n"
+                f"══════════════════════════════════════════════\n"
-                f"[Size: {size_str}]\n"
+                f"Filename: {filename}\n"
-                f"[Available in code_interpreter via: load_file('{file_id}')]\n"
+                f"File Type: {file_type}\n"
                f"Size: {size_str}\n"
                f"\n"
                f"⚠️ TO ACCESS THIS FILE IN CODE, YOU MUST USE:\n"
                f"   df = load_file('{file_id}')\n"
                f"\n"
                f"❌ DO NOT use the filename directly (e.g., pd.read_csv('{filename}'))\n"
                f"❌ DO NOT use file_id as a path (e.g., pd.read_csv('{file_id}'))\n"
                f"✅ ONLY use: load_file('{file_id}')\n"
                f"══════════════════════════════════════════════\n"
            )
            if user_message:
--- a/src/utils/code_interpreter.py
+++ b/src/utils/code_interpreter.py
@@ -197,7 +197,7 @@ BLOCKED_PATTERNS = [
    r'gc\.',
    r'sys\.getsizeof',
    r'sys\.getrefcount',
-    r'id\s*\(',  # Block id() which can leak memory addresses
+    r'\bid\s*\(',  # Block id() which can leak memory addresses (\b ensures word boundary)
 ]
 # Additional patterns that log warnings but don't block
@@ -1050,31 +1050,57 @@ import os
 FILES = {json.dumps(file_paths_map)}
 def get_file_path(file_id):
    '''
    Get the actual file path for a given file ID.
    Use this to get the path for pd.read_csv(), open(), etc.
    Args:
        file_id: The file ID provided when the file was uploaded
    Returns:
        str: The actual file path on disk
    Example:
        path = get_file_path('878573881449906208_1764556246_bdbaecc8')
        df = pd.read_csv(path)
    Available files: Use list(FILES.keys()) to see available files
    '''
    if file_id not in FILES:
        raise ValueError(f"File '{{file_id}}' not found. Available: {{list(FILES.keys())}}")
    return FILES[file_id]
 def load_file(file_id):
    '''
-    Load a file automatically based on its extension.
+    Load a file automatically based on its extension and return the data directly.
-    Supports 200+ file types with smart auto-detection.
+    DO NOT pass the result to pd.read_csv() - it already returns a DataFrame!
    Args:
        file_id: The file ID provided when the file was uploaded
    Returns:
        Loaded file data (varies by file type):
-        - CSV/TSV: pandas DataFrame
+        - CSV/TSV: pandas DataFrame (ready to use!)
        - Excel (.xlsx, .xls): pandas ExcelFile object
        - JSON: pandas DataFrame or dict
        - Parquet/Feather: pandas DataFrame
        - Text files: string content
        - Images: PIL Image object
        - And 200+ more formats...
-    Excel file usage examples:
+    CORRECT usage for CSV:
-        excel_file = load_file('file_id')
+        df = load_file('file_id')  # Returns DataFrame directly
-        sheet_names = excel_file.sheet_names
+        print(df.head())
        df = excel_file.parse('Sheet1')
        df2 = pd.read_excel(excel_file, sheet_name='Sheet1')
-    Available files: {{', '.join(FILES.keys()) if FILES else 'None'}}
+    WRONG usage (DO NOT DO THIS):
        file_path = load_file('file_id')  # WRONG! This is a DataFrame, not a path
        df = pd.read_csv(file_path)       # This will FAIL!
    If you need the file path instead, use get_file_path():
        path = get_file_path('file_id')
        df = pd.read_csv(path)
    Available files: Use list(FILES.keys()) to see available files
    '''
    if file_id not in FILES:
        available_files = list(FILES.keys())
--- a/src/utils/monitoring.py
+++ b/src/utils/monitoring.py
@@ -190,7 +190,7 @@ def capture_exception(
    """
    logger.exception(f"Captured exception: {exception}")
-    if SENTRY_AVAILABLE and sentry_sdk.Hub.current.client:
+    if SENTRY_AVAILABLE and sentry_sdk.is_initialized():
        with sentry_sdk.push_scope() as scope:
            if context:
                for key, value in context.items():
@@ -219,7 +219,7 @@ def capture_message(
    log_method = getattr(logger, level, logger.info)
    log_method(message)
-    if SENTRY_AVAILABLE and sentry_sdk.Hub.current.client:
+    if SENTRY_AVAILABLE and sentry_sdk.is_initialized():
        with sentry_sdk.push_scope() as scope:
            if context:
                for key, value in context.items():
@@ -242,7 +242,7 @@ def set_user_context(
        username: Discord username
        guild_id: Discord guild ID
    """
-    if SENTRY_AVAILABLE and sentry_sdk.Hub.current.client:
+    if SENTRY_AVAILABLE and sentry_sdk.is_initialized():
        sentry_sdk.set_user({
            "id": str(user_id),
            "username": username,
@@ -325,7 +325,7 @@ async def measure_async(name: str, **metadata):
    # Start Sentry transaction if available
    transaction = None
-    if SENTRY_AVAILABLE and sentry_sdk.Hub.current.client:
+    if SENTRY_AVAILABLE and sentry_sdk.is_initialized():
        transaction = sentry_sdk.start_transaction(
            op="task",
            name=name