From d549c43844c18c2857e8f405c4b86a410a7b8b7c Mon Sep 17 00:00:00 2001 From: cauvang32 Date: Mon, 1 Dec 2025 17:38:53 +0700 Subject: [PATCH] Enhance file handling instructions in code interpreter and message handler; clarify usage of load_file() and print() for user-uploaded files --- src/config/code_interpreter_prompts.py | 61 +++++++++++++++++++++++--- src/config/config.py | 28 +++++++++++- src/module/message_handler.py | 19 +++++--- src/utils/code_interpreter.py | 48 +++++++++++++++----- src/utils/monitoring.py | 8 ++-- 5 files changed, 138 insertions(+), 26 deletions(-) diff --git a/src/config/code_interpreter_prompts.py b/src/config/code_interpreter_prompts.py index aad89b9..bfc4348 100644 --- a/src/config/code_interpreter_prompts.py +++ b/src/config/code_interpreter_prompts.py @@ -12,6 +12,7 @@ You have access to a powerful code interpreter environment that allows you to: - Execute Python code in a secure, isolated environment - Maximum execution time: 60 seconds - Output limit: 100KB +- āš ļø **IMPORTANT: Use print() to display results!** Only printed output is captured and shown to the user. ## šŸ“¦ **Package Management (Auto-Install)** The code interpreter can AUTOMATICALLY install missing packages when needed! @@ -43,18 +44,64 @@ import seaborn as sns # Will auto-install if missing import pandas as pd # Will auto-install if missing df = pd.DataFrame({'x': [1,2,3], 'y': [4,5,6]}) +print(df) # āš ļø Use print() to show output! sns.scatterplot(data=df, x='x', y='y') plt.savefig('plot.png') +print("Chart saved!") # Confirm completion ``` +āš ļø **REMINDER: Only printed output is visible!** Always use print() for any data you want the user to see. + ## šŸ“ **File Management (48-Hour Lifecycle)** ### **User-Uploaded Files** - Users can upload files (CSV, Excel, JSON, images, etc.) - Files are stored with unique `file_id` -- Access files using: `df = load_file('file_id_here')` - Files expire after 48 hours automatically +### **CRITICAL: How to Load Files** + +**Option 1: load_file() - Returns data directly (RECOMMENDED)** +```python +# For CSV files - returns DataFrame directly, DO NOT pass to pd.read_csv()! +# āš ļø Use the ACTUAL file_id from the upload message, NOT this example! +df = load_file('') +print(df.head()) # Works immediately! +``` + +**Option 2: get_file_path() - Returns path for manual loading** +```python +# If you need the actual file path: +path = get_file_path('') +df = pd.read_csv(path) +``` + +### **COMMON MISTAKES TO AVOID** +```python +# āŒ WRONG - load_file() returns a DataFrame, NOT a path! +file_path = load_file('') +df = pd.read_csv(file_path) # ERROR: Cannot read DataFrame as CSV! + +# āŒ WRONG - file_id is NOT a file path! +df = pd.read_csv('') # ERROR: File not found! + +# āŒ WRONG - Using example IDs from documentation! +df = load_file('example_from_docs') # ERROR: Use REAL file_id from upload! + +# āœ… CORRECT - use load_file() with the ACTUAL file_id from upload message +df = load_file('') # Copy exact ID from šŸ“ FILE UPLOADED +print(df.head()) # āš ļø Use print() to show output! +print(df.describe()) + +# āœ… CORRECT - use get_file_path() if you need the path +path = get_file_path('') +df = pd.read_csv(path) +print(df.info()) # Always print results! +``` + +āš ļø CRITICAL: The file_id is shown in the conversation when a file is uploaded. +Look for: "šŸ“ FILE UPLOADED" or "df = load_file('...')" in recent messages! + ### **Generated Files** - ANY file you create is captured and saved - Supported types: images, CSVs, text, JSON, HTML, PDFs, etc. (80+ formats) @@ -94,10 +141,14 @@ plt.savefig('plot.png') **Load uploaded file:** ```python -# User uploaded 'sales_data.csv' with file_id: 'user_123_1234567890_abc123' -df = load_file('user_123_1234567890_abc123') -print(df.head()) -print(f"Loaded {len(df)} rows") +# āš ļø Find the ACTUAL file_id in the conversation's "šŸ“ FILE UPLOADED" message! +# DO NOT copy this example - use the real file_id shown when the user uploaded! +df = load_file('') + +# āš ļø CRITICAL: Always use print() to display results! +print(df.head()) # Show first rows +print(df.describe()) # Show statistics +print(f"Loaded {len(df)} rows, {len(df.columns)} columns") ``` **Create multiple output files:** diff --git a/src/config/config.py b/src/config/config.py index 7630ec6..d63c161 100644 --- a/src/config/config.py +++ b/src/config/config.py @@ -195,9 +195,35 @@ NORMAL_CHAT_PROMPT = """You're ChatGPT for Discord. Be concise, helpful, safe. R TOOLS: 1. google_search(query) - Web search for current info 2. scrape_webpage(url) - Extract webpage content -3. execute_python_code(code) - Run Python, packages auto-install. Use load_file('file_id') for user files. Save outputs to files. +3. execute_python_code(code) - Run Python, packages auto-install. **FILE ACCESS: See critical instructions below!** 4. set_reminder(content, time) / get_reminders() - Manage reminders +═══════════════════════════════════════════════════════════════ +āš ļø CRITICAL: FILE ACCESS IN CODE INTERPRETER +═══════════════════════════════════════════════════════════════ + +When users upload files, you will see a message like: + šŸ“ FILE UPLOADED - USE THIS FILE_ID: + Filename: data.csv + āš ļø TO ACCESS THIS FILE IN CODE, YOU MUST USE: + df = load_file('') + +**IMPORTANT: Copy the EXACT file_id from the file upload message - do NOT use examples!** + +āœ… CORRECT: + df = load_file('') + print(df.head()) # Use print() to show output! + +āš ļø IMPORTANT: Always use print() to display results - code output is only captured via print()! + +āŒ WRONG - Using filename: + df = pd.read_csv('data.csv') # FAILS - file not found! + +āŒ WRONG - Using example file_id from prompts: + df = load_file('example_id_from_docs') # FAILS - use the REAL ID! + +āš ļø CRITICAL: Look for the šŸ“ FILE UPLOADED message in this conversation and copy the EXACT file_id shown there! + ═══════════════════════════════════════════════════════════════ IMAGE GENERATION & EDITING TOOLS ═══════════════════════════════════════════════════════════════ diff --git a/src/module/message_handler.py b/src/module/message_handler.py index 5b161d8..d30fd38 100644 --- a/src/module/message_handler.py +++ b/src/module/message_handler.py @@ -1222,11 +1222,20 @@ print("\\n=== Correlation Analysis ===") user_message = message.content.strip() if message.content else "" file_context = ( - f"\n\n[User uploaded file: {filename}]\n" - f"[File ID: {file_id}]\n" - f"[File Type: {file_type}]\n" - f"[Size: {size_str}]\n" - f"[Available in code_interpreter via: load_file('{file_id}')]\n" + f"\n\n══════════════════════════════════════════════\n" + f"šŸ“ FILE UPLOADED - USE THIS FILE_ID:\n" + f"══════════════════════════════════════════════\n" + f"Filename: {filename}\n" + f"File Type: {file_type}\n" + f"Size: {size_str}\n" + f"\n" + f"āš ļø TO ACCESS THIS FILE IN CODE, YOU MUST USE:\n" + f" df = load_file('{file_id}')\n" + f"\n" + f"āŒ DO NOT use the filename directly (e.g., pd.read_csv('{filename}'))\n" + f"āŒ DO NOT use file_id as a path (e.g., pd.read_csv('{file_id}'))\n" + f"āœ… ONLY use: load_file('{file_id}')\n" + f"══════════════════════════════════════════════\n" ) if user_message: diff --git a/src/utils/code_interpreter.py b/src/utils/code_interpreter.py index 49e11d1..747bb17 100644 --- a/src/utils/code_interpreter.py +++ b/src/utils/code_interpreter.py @@ -197,7 +197,7 @@ BLOCKED_PATTERNS = [ r'gc\.', r'sys\.getsizeof', r'sys\.getrefcount', - r'id\s*\(', # Block id() which can leak memory addresses + r'\bid\s*\(', # Block id() which can leak memory addresses (\b ensures word boundary) ] # Additional patterns that log warnings but don't block @@ -1050,31 +1050,57 @@ import os FILES = {json.dumps(file_paths_map)} +def get_file_path(file_id): + ''' + Get the actual file path for a given file ID. + Use this to get the path for pd.read_csv(), open(), etc. + + Args: + file_id: The file ID provided when the file was uploaded + + Returns: + str: The actual file path on disk + + Example: + path = get_file_path('878573881449906208_1764556246_bdbaecc8') + df = pd.read_csv(path) + + Available files: Use list(FILES.keys()) to see available files + ''' + if file_id not in FILES: + raise ValueError(f"File '{{file_id}}' not found. Available: {{list(FILES.keys())}}") + return FILES[file_id] + def load_file(file_id): ''' - Load a file automatically based on its extension. - Supports 200+ file types with smart auto-detection. + Load a file automatically based on its extension and return the data directly. + DO NOT pass the result to pd.read_csv() - it already returns a DataFrame! Args: file_id: The file ID provided when the file was uploaded Returns: Loaded file data (varies by file type): - - CSV/TSV: pandas DataFrame + - CSV/TSV: pandas DataFrame (ready to use!) - Excel (.xlsx, .xls): pandas ExcelFile object - JSON: pandas DataFrame or dict - Parquet/Feather: pandas DataFrame - Text files: string content - Images: PIL Image object - - And 200+ more formats... - Excel file usage examples: - excel_file = load_file('file_id') - sheet_names = excel_file.sheet_names - df = excel_file.parse('Sheet1') - df2 = pd.read_excel(excel_file, sheet_name='Sheet1') + CORRECT usage for CSV: + df = load_file('file_id') # Returns DataFrame directly + print(df.head()) - Available files: {{', '.join(FILES.keys()) if FILES else 'None'}} + WRONG usage (DO NOT DO THIS): + file_path = load_file('file_id') # WRONG! This is a DataFrame, not a path + df = pd.read_csv(file_path) # This will FAIL! + + If you need the file path instead, use get_file_path(): + path = get_file_path('file_id') + df = pd.read_csv(path) + + Available files: Use list(FILES.keys()) to see available files ''' if file_id not in FILES: available_files = list(FILES.keys()) diff --git a/src/utils/monitoring.py b/src/utils/monitoring.py index a2ffd21..4cd623f 100644 --- a/src/utils/monitoring.py +++ b/src/utils/monitoring.py @@ -190,7 +190,7 @@ def capture_exception( """ logger.exception(f"Captured exception: {exception}") - if SENTRY_AVAILABLE and sentry_sdk.Hub.current.client: + if SENTRY_AVAILABLE and sentry_sdk.is_initialized(): with sentry_sdk.push_scope() as scope: if context: for key, value in context.items(): @@ -219,7 +219,7 @@ def capture_message( log_method = getattr(logger, level, logger.info) log_method(message) - if SENTRY_AVAILABLE and sentry_sdk.Hub.current.client: + if SENTRY_AVAILABLE and sentry_sdk.is_initialized(): with sentry_sdk.push_scope() as scope: if context: for key, value in context.items(): @@ -242,7 +242,7 @@ def set_user_context( username: Discord username guild_id: Discord guild ID """ - if SENTRY_AVAILABLE and sentry_sdk.Hub.current.client: + if SENTRY_AVAILABLE and sentry_sdk.is_initialized(): sentry_sdk.set_user({ "id": str(user_id), "username": username, @@ -325,7 +325,7 @@ async def measure_async(name: str, **metadata): # Start Sentry transaction if available transaction = None - if SENTRY_AVAILABLE and sentry_sdk.Hub.current.client: + if SENTRY_AVAILABLE and sentry_sdk.is_initialized(): transaction = sentry_sdk.start_transaction( op="task", name=name