Enhance file handling instructions in code interpreter and message handler; clarify usage of load_file() and print() for user-uploaded files

2025-12-01 17:38:53 +07:00
parent 668f56020d
commit d549c43844
5 changed files with 138 additions and 26 deletions
--- a/src/config/code_interpreter_prompts.py
+++ b/src/config/code_interpreter_prompts.py
@@ -12,6 +12,7 @@ You have access to a powerful code interpreter environment that allows you to:
 - Execute Python code in a secure, isolated environment
 - Maximum execution time: 60 seconds
 - Output limit: 100KB
+- ⚠️ **IMPORTANT: Use print() to display results!** Only printed output is captured and shown to the user.

 ## 📦 **Package Management (Auto-Install)**
 The code interpreter can AUTOMATICALLY install missing packages when needed!
@@ -43,18 +44,64 @@ import seaborn as sns  # Will auto-install if missing
 import pandas as pd    # Will auto-install if missing

 df = pd.DataFrame({'x': [1,2,3], 'y': [4,5,6]})
+print(df)  # ⚠️ Use print() to show output!
 sns.scatterplot(data=df, x='x', y='y')
 plt.savefig('plot.png')
+print("Chart saved!")  # Confirm completion
 ```

+⚠️ **REMINDER: Only printed output is visible!** Always use print() for any data you want the user to see.
+
 ## 📁 **File Management (48-Hour Lifecycle)**

 ### **User-Uploaded Files**
 - Users can upload files (CSV, Excel, JSON, images, etc.)
 - Files are stored with unique `file_id`
- Access files using: `df = load_file('file_id_here')`
 - Files expire after 48 hours automatically

+### **CRITICAL: How to Load Files**
+
+**Option 1: load_file() - Returns data directly (RECOMMENDED)**
+```python
+# For CSV files - returns DataFrame directly, DO NOT pass to pd.read_csv()!
+# ⚠️ Use the ACTUAL file_id from the upload message, NOT this example!
+df = load_file('<file_id_from_upload_message>')
+print(df.head())  # Works immediately!
+```
+
+**Option 2: get_file_path() - Returns path for manual loading**
+```python
+# If you need the actual file path:
+path = get_file_path('<file_id_from_upload_message>')
+df = pd.read_csv(path)
+```
+
+### **COMMON MISTAKES TO AVOID**
+```python
+# ❌ WRONG - load_file() returns a DataFrame, NOT a path!
+file_path = load_file('<file_id>')
+df = pd.read_csv(file_path)  # ERROR: Cannot read DataFrame as CSV!
+
+# ❌ WRONG - file_id is NOT a file path!
+df = pd.read_csv('<file_id>')  # ERROR: File not found!
+
+# ❌ WRONG - Using example IDs from documentation!
+df = load_file('example_from_docs')  # ERROR: Use REAL file_id from upload!
+
+# ✅ CORRECT - use load_file() with the ACTUAL file_id from upload message
+df = load_file('<file_id_from_upload_message>')  # Copy exact ID from 📁 FILE UPLOADED
+print(df.head())  # ⚠️ Use print() to show output!
+print(df.describe())
+
+# ✅ CORRECT - use get_file_path() if you need the path
+path = get_file_path('<file_id_from_upload_message>')
+df = pd.read_csv(path)
+print(df.info())  # Always print results!
+```
+
+⚠️ CRITICAL: The file_id is shown in the conversation when a file is uploaded.
+Look for: "📁 FILE UPLOADED" or "df = load_file('...')" in recent messages!
+
 ### **Generated Files**
 - ANY file you create is captured and saved
 - Supported types: images, CSVs, text, JSON, HTML, PDFs, etc. (80+ formats)
@@ -94,10 +141,14 @@ plt.savefig('plot.png')

 **Load uploaded file:**
 ```python
-# User uploaded 'sales_data.csv' with file_id: 'user_123_1234567890_abc123'
-df = load_file('user_123_1234567890_abc123')
-print(df.head())
-print(f"Loaded {len(df)} rows")
+# ⚠️ Find the ACTUAL file_id in the conversation's "📁 FILE UPLOADED" message!
+# DO NOT copy this example - use the real file_id shown when the user uploaded!
+df = load_file('<paste_actual_file_id_here>')
+
+# ⚠️ CRITICAL: Always use print() to display results!
+print(df.head())  # Show first rows
+print(df.describe())  # Show statistics
+print(f"Loaded {len(df)} rows, {len(df.columns)} columns")
 ```

 **Create multiple output files:**
--- a/src/config/config.py
+++ b/src/config/config.py
@@ -195,9 +195,35 @@ NORMAL_CHAT_PROMPT = """You're ChatGPT for Discord. Be concise, helpful, safe. R
 TOOLS:
 1. google_search(query) - Web search for current info
 2. scrape_webpage(url) - Extract webpage content
-3. execute_python_code(code) - Run Python, packages auto-install. Use load_file('file_id') for user files. Save outputs to files.
+3. execute_python_code(code) - Run Python, packages auto-install. **FILE ACCESS: See critical instructions below!**
 4. set_reminder(content, time) / get_reminders() - Manage reminders

+═══════════════════════════════════════════════════════════════
+⚠️ CRITICAL: FILE ACCESS IN CODE INTERPRETER
+═══════════════════════════════════════════════════════════════
+
+When users upload files, you will see a message like:
+   📁 FILE UPLOADED - USE THIS FILE_ID:
+   Filename: data.csv
+   ⚠️ TO ACCESS THIS FILE IN CODE, YOU MUST USE:
+      df = load_file('<THE_ACTUAL_FILE_ID_FROM_CONTEXT>')
+
+**IMPORTANT: Copy the EXACT file_id from the file upload message - do NOT use examples!**
+
+✅ CORRECT:
+   df = load_file('<file_id_from_upload_message>')
+   print(df.head())  # Use print() to show output!
+   
+⚠️ IMPORTANT: Always use print() to display results - code output is only captured via print()!
+
+❌ WRONG - Using filename:
+   df = pd.read_csv('data.csv')  # FAILS - file not found!
+   
+❌ WRONG - Using example file_id from prompts:
+   df = load_file('example_id_from_docs')  # FAILS - use the REAL ID!
+
+⚠️ CRITICAL: Look for the 📁 FILE UPLOADED message in this conversation and copy the EXACT file_id shown there!
+
 ═══════════════════════════════════════════════════════════════
 IMAGE GENERATION & EDITING TOOLS
 ═══════════════════════════════════════════════════════════════
--- a/src/module/message_handler.py
+++ b/src/module/message_handler.py
@@ -1222,11 +1222,20 @@ print("\\n=== Correlation Analysis ===")
            user_message = message.content.strip() if message.content else ""
            
            file_context = (
-                f"\n\n[User uploaded file: {filename}]\n"
-                f"[File ID: {file_id}]\n"
-                f"[File Type: {file_type}]\n"
-                f"[Size: {size_str}]\n"
-                f"[Available in code_interpreter via: load_file('{file_id}')]\n"
+                f"\n\n══════════════════════════════════════════════\n"
+                f"📁 FILE UPLOADED - USE THIS FILE_ID:\n"
+                f"══════════════════════════════════════════════\n"
+                f"Filename: {filename}\n"
+                f"File Type: {file_type}\n"
+                f"Size: {size_str}\n"
+                f"\n"
+                f"⚠️ TO ACCESS THIS FILE IN CODE, YOU MUST USE:\n"
+                f"   df = load_file('{file_id}')\n"
+                f"\n"
+                f"❌ DO NOT use the filename directly (e.g., pd.read_csv('{filename}'))\n"
+                f"❌ DO NOT use file_id as a path (e.g., pd.read_csv('{file_id}'))\n"
+                f"✅ ONLY use: load_file('{file_id}')\n"
+                f"══════════════════════════════════════════════\n"
            )
            
            if user_message:
--- a/src/utils/code_interpreter.py
+++ b/src/utils/code_interpreter.py
@@ -197,7 +197,7 @@ BLOCKED_PATTERNS = [
    r'gc\.',
    r'sys\.getsizeof',
    r'sys\.getrefcount',
-    r'id\s*\(',  # Block id() which can leak memory addresses
+    r'\bid\s*\(',  # Block id() which can leak memory addresses (\b ensures word boundary)
 ]

 # Additional patterns that log warnings but don't block
@@ -1050,31 +1050,57 @@ import os

 FILES = {json.dumps(file_paths_map)}

+def get_file_path(file_id):
+    '''
+    Get the actual file path for a given file ID.
+    Use this to get the path for pd.read_csv(), open(), etc.
+    
+    Args:
+        file_id: The file ID provided when the file was uploaded
+    
+    Returns:
+        str: The actual file path on disk
+    
+    Example:
+        path = get_file_path('878573881449906208_1764556246_bdbaecc8')
+        df = pd.read_csv(path)
+    
+    Available files: Use list(FILES.keys()) to see available files
+    '''
+    if file_id not in FILES:
+        raise ValueError(f"File '{{file_id}}' not found. Available: {{list(FILES.keys())}}")
+    return FILES[file_id]
+
 def load_file(file_id):
    '''
-    Load a file automatically based on its extension.
-    Supports 200+ file types with smart auto-detection.
+    Load a file automatically based on its extension and return the data directly.
+    DO NOT pass the result to pd.read_csv() - it already returns a DataFrame!
    
    Args:
        file_id: The file ID provided when the file was uploaded
    
    Returns:
        Loaded file data (varies by file type):
-        - CSV/TSV: pandas DataFrame
+        - CSV/TSV: pandas DataFrame (ready to use!)
        - Excel (.xlsx, .xls): pandas ExcelFile object
        - JSON: pandas DataFrame or dict
        - Parquet/Feather: pandas DataFrame
        - Text files: string content
        - Images: PIL Image object
-        - And 200+ more formats...
    
-    Excel file usage examples:
-        excel_file = load_file('file_id')
-        sheet_names = excel_file.sheet_names
-        df = excel_file.parse('Sheet1')
-        df2 = pd.read_excel(excel_file, sheet_name='Sheet1')
+    CORRECT usage for CSV:
+        df = load_file('file_id')  # Returns DataFrame directly
+        print(df.head())
    
-    Available files: {{', '.join(FILES.keys()) if FILES else 'None'}}
+    WRONG usage (DO NOT DO THIS):
+        file_path = load_file('file_id')  # WRONG! This is a DataFrame, not a path
+        df = pd.read_csv(file_path)       # This will FAIL!
+    
+    If you need the file path instead, use get_file_path():
+        path = get_file_path('file_id')
+        df = pd.read_csv(path)
+    
+    Available files: Use list(FILES.keys()) to see available files
    '''
    if file_id not in FILES:
        available_files = list(FILES.keys())
--- a/src/utils/monitoring.py
+++ b/src/utils/monitoring.py
@@ -190,7 +190,7 @@ def capture_exception(
    """
    logger.exception(f"Captured exception: {exception}")
    
-    if SENTRY_AVAILABLE and sentry_sdk.Hub.current.client:
+    if SENTRY_AVAILABLE and sentry_sdk.is_initialized():
        with sentry_sdk.push_scope() as scope:
            if context:
                for key, value in context.items():
@@ -219,7 +219,7 @@ def capture_message(
    log_method = getattr(logger, level, logger.info)
    log_method(message)
    
-    if SENTRY_AVAILABLE and sentry_sdk.Hub.current.client:
+    if SENTRY_AVAILABLE and sentry_sdk.is_initialized():
        with sentry_sdk.push_scope() as scope:
            if context:
                for key, value in context.items():
@@ -242,7 +242,7 @@ def set_user_context(
        username: Discord username
        guild_id: Discord guild ID
    """
-    if SENTRY_AVAILABLE and sentry_sdk.Hub.current.client:
+    if SENTRY_AVAILABLE and sentry_sdk.is_initialized():
        sentry_sdk.set_user({
            "id": str(user_id),
            "username": username,
@@ -325,7 +325,7 @@ async def measure_async(name: str, **metadata):
    
    # Start Sentry transaction if available
    transaction = None
-    if SENTRY_AVAILABLE and sentry_sdk.Hub.current.client:
+    if SENTRY_AVAILABLE and sentry_sdk.is_initialized():
        transaction = sentry_sdk.start_transaction(
            op="task",
            name=name