#!/usr/bin/env python
"""
Migrate Discord Image URLs to New Image Generation System

This script:
1. Scans codebase for Discord CDN URLs
2. Imports them to generated_images table as 'manual' provider
3. Optionally generates replacement images using AI
4. Updates files with new URLs

Usage:
    # Import Discord URLs to database
    python migrate_discord_images.py --import

    # Generate AI replacements for Discord images
    python migrate_discord_images.py --generate

    # Preview what would be changed (dry run)
    python migrate_discord_images.py --preview
"""

import os
import re
import json
import asyncio
import argparse
import logging
from typing import Dict, List, Tuple
from pathlib import Path
from database.db_operations import get_database_connection
from image_generation import generate_and_cache_image, save_generated_image

logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

PROJECT_ROOT = Path(__file__).parent.parent


# ============================================================
# Discord URL Detection
# ============================================================

# Patterns to find Discord CDN URLs
DISCORD_PATTERNS = [
    r'https://cdn\.discordapp\.com/[^"\'\)\s]+\.(png|jpg|jpeg|gif)',
    r'https://media\.discordapp\.net/[^"\'\)\s]+\.(png|jpg|jpeg|gif)',
]

# Files to scan
FILES_TO_SCAN = [
    'main.js',
    'index.html',
    'ws/mockData.json',
    'ws/events.py',
    'ws/dayEvents.py',
    'ws/conversationEvents.py',
]


def find_discord_urls_in_file(file_path: Path) -> List[Tuple[str, str, int]]:
    """
    Find all Discord URLs in a file.

    Returns:
        List of (url, context_line, line_number) tuples
    """
    urls_found = []

    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            lines = f.readlines()

        for line_num, line in enumerate(lines, 1):
            for pattern in DISCORD_PATTERNS:
                matches = re.finditer(pattern, line)
                for match in matches:
                    url = match.group(0)
                    # Remove query parameters for comparison
                    base_url = url.split('?')[0]
                    urls_found.append((base_url, line.strip(), line_num))

    except Exception as e:
        logger.warning(f"Error reading {file_path}: {e}")

    return urls_found


def scan_all_files() -> Dict[str, List[Tuple[str, str, int]]]:
    """
    Scan all files for Discord URLs.

    Returns:
        Dict mapping file paths to list of (url, context, line_num) tuples
    """
    results = {}

    for file_rel_path in FILES_TO_SCAN:
        file_path = PROJECT_ROOT / file_rel_path

        if not file_path.exists():
            logger.warning(f"File not found: {file_path}")
            continue

        urls = find_discord_urls_in_file(file_path)

        if urls:
            results[file_rel_path] = urls
            logger.info(f"Found {len(urls)} Discord URL(s) in {file_rel_path}")

    return results


# ============================================================
# Image Identification & Prompts
# ============================================================

# Map Discord URLs to descriptions for AI generation
DISCORD_IMAGE_METADATA = {
    # Seasonal backgrounds (main.js)
    'craig_vg_spring_day_cozy_cartoon': {
        'prompt': 'beautiful spring day landscape with flowers blooming and blue sky',
        'event_type': 'season_spring',
        'event_category': 'seasonal',
        'description': 'Spring season background'
    },
    'craig_vg_summer_day_cozy_cartoon': {
        'prompt': 'sunny summer day landscape with bright sun and clear blue sky',
        'event_type': 'season_summer',
        'event_category': 'seasonal',
        'description': 'Summer season background'
    },
    'craig_vg_fall_day_cozy_cartoon': {
        'prompt': 'autumn fall day landscape with orange and red leaves',
        'event_type': 'season_autumn',
        'event_category': 'seasonal',
        'description': 'Autumn season background'
    },
    'craig_vg_winter_day_cozy_cartoon': {
        'prompt': 'winter day landscape with snow and evergreen trees',
        'event_type': 'season_winter',
        'event_category': 'seasonal',
        'description': 'Winter season background'
    },

    # School images
    'greenwood_elementary_school_exterior': {
        'prompt': 'large elementary school building exterior with playground',
        'event_type': 'elementary_school_exterior',
        'event_category': 'education',
        'description': 'Elementary school building'
    },
    'Eagle_Private_Academy_elementary': {
        'prompt': 'prestigious private elementary school building exterior',
        'event_type': 'private_school_exterior',
        'event_category': 'education',
        'description': 'Private academy building'
    },
}


def identify_image(url: str) -> Dict:
    """
    Identify what an image is based on URL keywords.

    Returns:
        Dict with prompt, event_type, category, description
    """
    # Check known images first
    for key, metadata in DISCORD_IMAGE_METADATA.items():
        if key in url:
            return metadata

    # Generic fallback based on URL patterns
    url_lower = url.lower()

    if 'school' in url_lower:
        return {
            'prompt': 'school building exterior',
            'event_type': 'school_generic',
            'event_category': 'education',
            'description': 'School building (generic)'
        }
    elif 'bedroom' in url_lower:
        return {
            'prompt': 'cozy bedroom interior',
            'event_type': 'bedroom_generic',
            'event_category': 'home',
            'description': 'Bedroom (generic)'
        }
    elif 'spring' in url_lower:
        return {
            'prompt': 'spring landscape',
            'event_type': 'season_spring',
            'event_category': 'seasonal',
            'description': 'Spring scene'
        }
    elif 'summer' in url_lower:
        return {
            'prompt': 'summer landscape',
            'event_type': 'season_summer',
            'event_category': 'seasonal',
            'description': 'Summer scene'
        }
    elif 'fall' in url_lower or 'autumn' in url_lower:
        return {
            'prompt': 'autumn landscape',
            'event_type': 'season_autumn',
            'event_category': 'seasonal',
            'description': 'Autumn scene'
        }
    elif 'winter' in url_lower:
        return {
            'prompt': 'winter landscape',
            'event_type': 'season_winter',
            'event_category': 'seasonal',
            'description': 'Winter scene'
        }

    # Ultimate fallback
    return {
        'prompt': 'scene from life simulation game',
        'event_type': 'generic',
        'event_category': 'misc',
        'description': 'Generic scene (please add prompt manually)'
    }


# ============================================================
# Import to Database
# ============================================================

async def import_discord_urls():
    """Import all Discord URLs to database as manual images"""
    logger.info("Scanning files for Discord URLs...")
    all_urls = scan_all_files()

    if not all_urls:
        logger.info("No Discord URLs found.")
        return

    # Flatten and deduplicate URLs
    unique_urls = set()
    for file_urls in all_urls.values():
        for url, _, _ in file_urls:
            unique_urls.add(url)

    logger.info(f"\nFound {len(unique_urls)} unique Discord images")
    logger.info(f"Importing to database...\n")

    db = get_database_connection()
    cursor = db.cursor()

    imported = 0
    skipped = 0

    for url in unique_urls:
        # Check if already exists
        cursor.execute("""
            SELECT id FROM generated_images
            WHERE image_url = %s
        """, (url,))

        if cursor.fetchone():
            logger.info(f"⊘ Already exists: {url[:60]}...")
            skipped += 1
            continue

        # Identify image
        metadata = identify_image(url)

        # Insert to database
        cursor.execute("""
            INSERT INTO generated_images
            (image_url, prompt, style_preset, event_type, event_category,
             provider, model_version, generation_cost)
            VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
        """, (
            url,
            metadata['prompt'],
            'cozy_cartoon',
            metadata['event_type'],
            metadata['event_category'],
            'manual',
            'midjourney',
            0.00  # No cost, manually created
        ))

        db.commit()
        image_id = cursor.lastrowid

        logger.info(f"✓ Imported #{image_id}: {metadata['description']}")
        logger.info(f"  URL: {url[:70]}...")
        logger.info(f"  Type: {metadata['event_type']}")
        imported += 1

    logger.info(f"\n{'='*70}")
    logger.info(f"Import complete:")
    logger.info(f"  Imported: {imported}")
    logger.info(f"  Skipped: {skipped}")
    logger.info(f"{'='*70}\n")


# ============================================================
# Generate AI Replacements
# ============================================================

async def generate_replacements(provider: str = 'imagen4'):
    """Generate AI replacements for Discord images"""
    logger.info("Fetching manual Discord images from database...")

    db = get_database_connection()
    cursor = db.cursor(dictionary=True)

    cursor.execute("""
        SELECT * FROM generated_images
        WHERE provider = 'manual' AND is_active = TRUE
        ORDER BY event_category, event_type
    """)

    images = cursor.fetchall()

    if not images:
        logger.info("No manual images found. Run --import first.")
        return

    logger.info(f"Found {len(images)} Discord images to replace")
    logger.info(f"Generating AI replacements using {provider.upper()}...\n")

    for i, img in enumerate(images, 1):
        logger.info(f"[{i}/{len(images)}] {img['event_type']}")
        logger.info(f"  Original: {img['image_url'][:60]}...")
        logger.info(f"  Prompt: {img['prompt']}")

        # Generate new image
        new_url = await generate_and_cache_image(
            prompt=img['prompt'],
            event_type=f"{img['event_type']}_ai",
            event_category=img['event_category'],
            provider=provider,
            use_cache=False
        )

        if new_url:
            logger.info(f"  ✓ Generated: {new_url[:60]}...")

            # Store mapping for later reference
            cursor.execute("""
                INSERT INTO event_images (event_function_name, event_type, generated_image_id)
                SELECT %s, %s, id FROM generated_images WHERE image_url = %s
            """, (img['event_type'], img['event_type'], new_url))
            db.commit()
        else:
            logger.error(f"  ✗ Failed to generate replacement")

        logger.info("")

        # Rate limiting
        if i < len(images):
            await asyncio.sleep(3)

    logger.info(f"{'='*70}")
    logger.info(f"Replacement generation complete: {len(images)} images")
    logger.info(f"Estimated cost: ${len(images) * 0.04:.2f}")
    logger.info(f"{'='*70}\n")


# ============================================================
# Preview Mode
# ============================================================

def preview_changes():
    """Preview what files and URLs would be affected"""
    logger.info("Scanning files for Discord URLs...\n")
    all_urls = scan_all_files()

    if not all_urls:
        logger.info("No Discord URLs found.")
        return

    total_urls = 0

    for file_path, urls in all_urls.items():
        logger.info(f"\n{'='*70}")
        logger.info(f"File: {file_path}")
        logger.info(f"{'='*70}")

        for url, context, line_num in urls:
            total_urls += 1
            metadata = identify_image(url)

            logger.info(f"\nLine {line_num}:")
            logger.info(f"  Current URL: {url[:70]}...")
            logger.info(f"  Identified as: {metadata['description']}")
            logger.info(f"  AI Prompt: {metadata['prompt']}")
            logger.info(f"  Category: {metadata['event_category']}")
            logger.info(f"  Context: {context[:100]}...")

    logger.info(f"\n{'='*70}")
    logger.info(f"Total Discord URLs found: {total_urls}")
    logger.info(f"Files affected: {len(all_urls)}")
    logger.info(f"{'='*70}\n")


# ============================================================
# Main CLI
# ============================================================

async def main():
    parser = argparse.ArgumentParser(
        description="Migrate Discord images to new AI generation system",
        formatter_class=argparse.RawDescriptionHelpFormatter
    )

    parser.add_argument(
        '--import',
        dest='do_import',
        action='store_true',
        help='Import Discord URLs to database as manual images'
    )

    parser.add_argument(
        '--generate',
        action='store_true',
        help='Generate AI replacements for Discord images'
    )

    parser.add_argument(
        '--provider',
        choices=['imagen4', 'flux', 'dalle3'],
        default='imagen4',
        help='Provider for generating replacements (default: imagen4)'
    )

    parser.add_argument(
        '--preview',
        action='store_true',
        help='Preview Discord URLs and what would be changed'
    )

    args = parser.parse_args()

    if args.preview:
        preview_changes()
    elif args.do_import:
        await import_discord_urls()
    elif args.generate:
        await generate_replacements(provider=args.provider)
    else:
        parser.print_help()


if __name__ == "__main__":
    asyncio.run(main())
