
from playwright.sync_api import sync_playwright
from .models import Product
from django.utils.timezone import now
import json
import re
from urllib.parse import quote, urljoin, urlparse, urlunparse
from decimal import Decimal
import time


BASE_URL = "https://www.temu.com"
BACKEND_API_URL = "https://backend.temu.com"
API_URL = "https://api.temu.com"
SITE_NAME = "temu"


def get_base_image_url(image_url):
    """
    Extracts the base image URL by removing transformations.
    Supports Cloudinary and kwcdn.com formats.
    """
    if not isinstance(image_url, str):
        return None
    
    parsed_url = urlparse(image_url)
    
    # Case 1: img.kwcdn.com
    if "img.kwcdn.com" in image_url:
        path = parsed_url.path
        
        # First, try to match the full product/fancy/ pattern
        image_path_match = re.search(r'/product/fancy/(.*)$', path)
        if image_path_match:
            # Extract just the filename/remaining path after product/fancy/
            remaining_path = image_path_match.group(1)
            # Clean the base path to avoid duplication
            base_path = f"/product/fancy/{remaining_path}"
            return urlunparse(parsed_url._replace(path=base_path, query='', fragment=''))
        
        # Fallback: look for /product/ pattern
        upload_index = path.find('/product/')
        if upload_index != -1:
            after_upload = path[upload_index + len('/product/'):]
            # Remove version numbers
            after_upload = re.sub(r'v\d+/', '', after_upload, count=1)
            
            # Check for transformations pattern
            transformation_pattern = r'^([a-zA-Z_]+\d*(?:,[a-zA-Z_]+\d*)*)+\/'
            if re.match(transformation_pattern, after_upload):
                parts = after_upload.split('/')
                public_id_start_index = 0
                for i, part in enumerate(parts):
                    if '.' in part or 'media' in part or 'fancy' in part or 'product' in part:
                        public_id_start_index = i
                        break
                
                cleaned_after_upload = '/'.join(parts[public_id_start_index:])
                if cleaned_after_upload:
                    # Only add /product/fancy/ if it's not already there
                    if not cleaned_after_upload.startswith('fancy/'):
                        base_path = f"/product/fancy/{cleaned_after_upload}"
                    else:
                        base_path = f"/product/{cleaned_after_upload}"
                    return urlunparse(parsed_url._replace(path=base_path, query='', fragment=''))
        
        return None
    
    # Case 2: kwcdn.com
    elif "kwcdn.com" in image_url:
        # Simply drop query params to get original image
        return urlunparse(parsed_url._replace(query='', fragment=''))
    
    # Unsupported
    return None



def extract_image_from_product_page(url, browser_context=None):
    """Extract image URL from individual product page"""
    if not url or 'temu.com' not in url:
        return None
        
    print(f"🖼️  Extracting image from: {url}")
    
    # Use existing context if provided, otherwise create new one
    should_close_context = False
    if browser_context is None:
        with sync_playwright() as p:
            browser = p.chromium.launch(headless=True)
            browser_context = browser.new_context(
                viewport={'width': 1920, 'height': 1080},
                user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
            )
            should_close_context = True
    
    try:
        page = browser_context.new_page()
        
        # Navigate to product page
        page.goto(url, timeout=30000, wait_until='domcontentloaded')
        page.wait_for_timeout(3000)  # Wait for images to load
        
        # Try multiple selectors for product images
        image_selectors = [
            'img[data-testid="product-image"]',
            '.product-image img',
            '.main-image img',
            '.product-gallery img',
            'img[alt*="product"]',
            'img[src*="product"]',
            '.image-container img',
            '.product-media img',
            'img[class*="product"]',
            'img[class*="main"]',
            # Generic fallbacks
            'img[width="400"]',
            'img[width="500"]',
            'img[height="400"]',
            'img[height="500"]'
        ]
        
        image_url = None
        
        for selector in image_selectors:
            try:
                elements = page.query_selector_all(selector)
                for element in elements:
                    src = element.get_attribute('src')
                    if src and src.startswith(('http', '//')):
                        # Validate that this looks like a product image
                        if any(keyword in src.lower() for keyword in ['product', 'item', 'goods']) or \
                           any(ext in src.lower() for ext in ['.jpg', '.jpeg', '.png', '.webp']):
                            
                            # Ensure proper URL format
                            if src.startswith('//'):
                                image_url = 'https:' + src
                            elif src.startswith('/'):
                                image_url = BASE_URL + src
                            else:
                                image_url = src
                            
                            print(f"   ✅ Found image: {image_url[:80]}...")
                            break
                
                if image_url:
                    break
                    
            except Exception as e:
                continue
        
        # If no specific product image found, try to get the largest image
        if not image_url:
            try:
                all_images = page.query_selector_all('img')
                largest_img = None
                max_size = 0
                
                for img in all_images:
                    src = img.get_attribute('src')
                    if src and src.startswith(('http', '//')):
                        try:
                            width = img.get_attribute('width') or '0'
                            height = img.get_attribute('height') or '0'
                            width = int(re.findall(r'\d+', width)[0]) if re.findall(r'\d+', width) else 0
                            height = int(re.findall(r'\d+', height)[0]) if re.findall(r'\d+', height) else 0
                            size = width * height
                            
                            if size > max_size and size > 10000:  # Minimum size threshold
                                max_size = size
                                largest_img = src
                        except:
                            continue
                
                if largest_img:
                    if largest_img.startswith('//'):
                        image_url = 'https:' + largest_img
                    elif largest_img.startswith('/'):
                        image_url = BASE_URL + largest_img
                    else:
                        image_url = largest_img
                    
                    print(f"   📏 Using largest image: {image_url[:80]}...")
            except Exception as e:
                print(f"   ❌ Error finding largest image: {e}")
        
        if not image_url:
            print("   ❌ No suitable image found")
            
        # NEW: Process with get_base_image_url if it's a img.kwcdn.com URL
        if image_url and "img.kwcdn.com" in image_url:
            processed_url = get_base_image_url(image_url)
            if processed_url:
                image_url = processed_url
                print(f"   img.kwcdn.com URL processed to base: {image_url[:80]}...")
            
        page.close()
        return image_url
        
    except Exception as e:
        print(f"   ❌ Error extracting image: {e}")
        return None
    finally:
        if should_close_context and browser_context:
            browser_context.browser.close()

def extract_images_for_products(products):
    """Extract images for all products using their URLs"""
    if not products:
        return products
    
    print(f"\n🖼️  Starting image extraction for {len(products)} products...")
    
    with sync_playwright() as p:
        browser = p.chromium.launch(
            headless=True,
            args=[
                '--no-sandbox',
                '--disable-dev-shm-usage',
                '--disable-blink-features=AutomationControlled'
            ]
        )
        
        context = browser.new_context(
            viewport={'width': 1920, 'height': 1080},
            user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
        )
        
        try:
            for i, product in enumerate(products):
                print(f"\n📷 Processing {i+1}/{len(products)}: {product['name'][:50]}...")
                
                # Skip if image already exists and is a full URL (might be a placeholder)
                if product.get('image_url') and product['image_url'].startswith('http') and "img.kwcdn.com" not in product['image_url']:
                    print(f"   ✅ Image already exists: {product['image_url'][:80]}...")
                    continue
                # If it's a img.kwcdn.com URL, we still want to process it to ensure it's the base URL
                elif product.get('image_url') and "img.kwcdn.com" in product['image_url']:
                    processed_url = get_base_image_url(product['image_url'])
                    if processed_url:
                        product['image_url'] = processed_url
                        print(f"   img.kwcdn.com URL processed to base: {product['image_url'][:80]}...")
                        continue # Skip direct page extraction if we got a good base URL

                # Extract image from product page
                image_url = extract_image_from_product_page(product['url'], context)
                
                if image_url:
                    product['image_url'] = image_url
                else:
                    product['image_url'] = ''  # Set empty if no image found
                
                # Add small delay to be respectful
                time.sleep(1)
                
        except Exception as e:
            print(f"❌ Error during image extraction: {e}")
        finally:
            browser.close()
    
    return products

def intercept_temu_api_calls(query):
    """Intercept actual API calls made by Temu's frontend"""

    api_calls = []
    products = []

    with sync_playwright() as p:
        browser = p.chromium.launch(
            headless=True,  # Keep visible to see what's happening
            args=[
                '--no-sandbox',
                '--disable-dev-shm-usage',
                '--disable-blink-features=AutomationControlled'
            ]
        )

        try:
            context = browser.new_context(
                viewport={'width': 1920, 'height': 1080},
                user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
            )

            page = context.new_page()

            # Track all network requests
            def handle_request(request):
                url = request.url
                # Look for API-like URLs
                if any(indicator in url.lower() for indicator in [
                    'api', 'search', 'product', 'backend', 'service', 'graphql'
                ]):
                    # print(f"🔍 REQUEST: {request.method} {url}")

                    # Log request headers and body
                    if request.method == 'POST':
                        try:
                            post_data = request.post_data
                            if post_data:
                                pass
                                # print(f"   POST Data: {post_data[:200]}...")
                        except:
                            pass

            def handle_response(response):
                url = response.url
                content_type = response.headers.get('content-type', '').lower()

                # Focus on JSON responses that might contain product data
                if ('json' in content_type or 'application/json' in content_type) and response.status == 200:
                    # print(f"✅ JSON RESPONSE: {response.status} {url}")

                    try:
                        data = response.json()
                        api_calls.append({
                            'url': url,
                            'method': 'GET',  # Most API calls will be GET
                            'data': data,
                            'headers': dict(response.headers)
                        })

                        # Quick check if this looks like product data
                        data_str = json.dumps(data)[:500].lower()
                        if any(term in data_str for term in ['product', 'name', 'price', '₦', 'naira']):
                            print(f"   🎯 Potentially contains product data!")

                    except Exception as e:
                        print(f"   ❌ Failed to parse JSON: {e}")

                elif any(indicator in url.lower() for indicator in [
                    'search', 'product', 'api', 'backend'
                ]) and response.status == 200:
                    pass
                    # print(f"📄 OTHER RESPONSE: {response.status} {url} ({content_type})")

            # Set up listeners
            page.on('request', handle_request)
            page.on('response', handle_response)

            # Navigate to search page
            search_url = f"{BASE_URL}/search?search={quote(query)}"
            print(f"🌐 Navigating to: {search_url}")

            page.goto(search_url, timeout=60000, wait_until='domcontentloaded')

            # Wait for initial load
            print("⏳ Waiting for initial page load...")
            page.wait_for_timeout(8000)

            # Scroll to trigger lazy loading and more API calls
            print("📜 Scrolling to trigger more API calls...")
            for i in range(5):
                page.evaluate("window.scrollTo(0, document.body.scrollHeight);")
                page.wait_for_timeout(3000)
                print(f"   Scroll {i+1}/5 completed")

            # Try interacting with filters/sorting to trigger more API calls
            print("🔄 Trying to trigger filter/sort API calls...")
            try:
                # Look for sort/filter elements
                sort_elements = page.query_selector_all("select, button[data-testid*='sort'], .sort, .filter")
                if sort_elements:
                    # print(f"Found {len(sort_elements)} potential sort/filter elements")
                    for i, element in enumerate(sort_elements[:3]):
                        try:
                            element.click()
                            page.wait_for_timeout(2000)
                            print(f"   Clicked element {i+1}")
                        except:
                            continue
            except Exception as e:
                print(f"Filter interaction failed: {e}")

            # Final wait for any delayed API calls
            print("⏳ Final wait for API calls...")
            page.wait_for_timeout(5000)

            print(f"\n📊 Captured {len(api_calls)} API calls")

        except Exception as e:
            print(f"Browser error: {e}")
        finally:
            browser.close()

    # Process captured API calls
    print("\n🔍 Analyzing captured API calls...")
    for i, call in enumerate(api_calls):
        print(f"\nAPI Call {i+1}: {call['url']}")
        try:
            extracted_products = extract_products_from_api_data(call['data'], call['url'])
            products.extend(extracted_products)
            if extracted_products:
                print(f"   ✅ Extracted {len(extracted_products)} products")
        except Exception as e:
            print(f"   ❌ Error processing: {e}")

    return products, api_calls

def extract_products_from_api_data(data, source_url):
    """Extract products from various API response formats"""
    products = []

    def find_products_recursive(obj, path="root"):
        """Recursively search for product-like data"""
        found_products = []

        if isinstance(obj, list):
            # Check if this list contains product-like objects
            for i, item in enumerate(obj[:50]):  # Limit to prevent infinite loops
                if isinstance(item, dict):
                    # Check if item looks like a product
                    keys = [k.lower() for k in item.keys()]
                    product_indicators = ['name', 'title', 'price', 'cost', 'amount', 'product']

                    if any(indicator in ' '.join(keys) for indicator in product_indicators):
                        product = parse_single_product(item, f"{path}[{i}]")
                        if product:
                            found_products.append(product)
                    else:
                        # Recurse into the item
                        found_products.extend(find_products_recursive(item, f"{path}[{i}]"))

        elif isinstance(obj, dict):
            # First, check if this dict itself is a product
            keys = [k.lower() for k in obj.keys()]
            product_indicators = ['name', 'title', 'price', 'cost', 'amount']

            if len(keys) > 3 and any(indicator in ' '.join(keys) for indicator in product_indicators):
                product = parse_single_product(obj, path)
                if product:
                    found_products.append(product)

            # Then recurse into nested objects
            for key, value in obj.items():
                if isinstance(value, (dict, list)):
                    found_products.extend(find_products_recursive(value, f"{path}.{key}"))

        return found_products

    products = find_products_recursive(data)

    # if products:
    #     print(f"Found {len(products)} products in API response from {source_url}")

    return products



def parse_single_product(item, source_path):
    """Parse a single product from API data"""
    if not isinstance(item, dict):
        return None

    # Extract name/title
    name = None
    for key in ['name', 'title', 'product_name', 'productName', 'display_name']:
        if key in item and item[key]:
            name = str(item[key]).strip()
            break

    if not name:
        return None

    # Extract price with enhanced logic
    price = 0
    price_keys = [
        'price', 'current_price', 'selling_price', 'amount', 'cost', 'sale_price',
        'price_info', 'pricing', 'final_price', 'display_price', 'regular_price',
        'list_price', 'retail_price', 'unit_price', 'product_price'
    ]
    
    for key in price_keys:
        if key in item:
            try:
                price_val = item[key]
                print(f"Debug - Found price key '{key}': {price_val}")
                # price_schema_value = price_val['price_schema']
                price_schema_value = price_val['price_str']
                print(f"Debug - Found price schema: {price_schema_value}")
                if isinstance(price_val, (int, float)):
                    price = int(price_val)
                    print(f"Debug - Extracted numeric price: {price}")
                    break
                if isinstance(price_val, (int, float, Decimal)):
                    price = Decimal(price_val).quantize(Decimal('1'))
                    price = int(price)
                    print(f"Debug - Extracted numeric price: {price}")
                    break
                elif isinstance(price_val, str):
                    # Clean the string and extract numbers
                    clean_price = price_val.replace(',', '').replace('₦', '').replace('$', '').replace('€', '').replace('£', '')
                    clean_price = clean_price.replace(' ', '').strip()
                    clean_price = re.sub(r'[^\d.]', '', clean_price)
                    
                    # Try to convert to float first, then int
                    if clean_price:
                        try:
                            price = int(float(clean_price))
                            print(f"Debug - Extracted price from string '{price_val}': {price}")
                            break
                        except:
                            # Fallback: extract first sequence of digits
                            numbers = re.findall(r'\d+', price_val)
                            if numbers:
                                price = int(numbers[0])
                                print(f"Debug - Extracted first number from '{price_val}': {price}")
                                break
                elif isinstance(price_val, dict):
                    print(f"Debug - Price is dict: {price_val}")
                    # Sometimes price is nested in a dictionary
                    price_subkeys = [
                        'amount', 'value', 'price', 'current', 'final', 'display',
                        'selling', 'regular', 'base', 'unit', 'cost'
                    ]
                    for subkey in price_subkeys:
                        if subkey in price_val and price_val[subkey] is not None:
                            try:
                                sub_price = price_val[subkey]
                                if isinstance(sub_price, (int, float)):
                                    price = int(sub_price)
                                    print(f"Debug - Extracted nested price from '{subkey}': {price}")
                                    break
                                elif isinstance(sub_price, str):
                                    clean_price = re.sub(r'[^\d.]', '', sub_price)
                                    if clean_price:
                                        price = int(float(clean_price))
                                        print(f"Debug - Extracted nested string price: {price}")
                                        break
                            except Exception as e:
                                print(f"Debug - Error extracting nested price: {e}")
                                continue
                    if price > 0:
                        break
                elif isinstance(price_val, list) and price_val:
                    # Sometimes price is in a list (e.g., price range)
                    try:
                        first_price = price_val[0]
                        if isinstance(first_price, (int, float)):
                            price = int(first_price)
                            print(f"Debug - Extracted price from list: {price}")
                            break
                        elif isinstance(first_price, str):
                            clean_price = re.sub(r'[^\d.]', '', first_price)
                            if clean_price:
                                price = int(float(clean_price))
                                print(f"Debug - Extracted price from list string: {price}")
                                break
                    except:
                        continue
            except Exception as e:
                print(f"Debug - Error processing price key '{key}': {e}")
                continue

    print(f"Debug - Final extracted price: {int(price_val['price_schema'])}")

    # Extract URL
    url = None
    product_id = None
    slug = None

    # Ensure BASE_URL is properly set
    base_url = BASE_URL if 'BASE_URL' in globals() and BASE_URL else "https://www.temu.com"

    # Try to find a product ID first
    for key in ['id', 'product_id', 'productId', 'entity_id', 'goods_id', 'item_id']:
        if key in item and item[key]:
            product_id = str(item[key]).strip()
            break

    # Try to find a slug
    for key in ['slug', 'product_slug', 'url_key', 'goods_name_slug', 'name_slug']:
        if key in item and item[key]:
            slug = str(item[key]).strip()
            break

    # Debug: Print what we found
    print(f"Debug - Product ID: {product_id}, Slug: {slug}, Name: {name[:30]}...")

    # Prioritize direct URL or product-specific path
    for key in ['url', 'link', 'href', 'product_url', 'goods_url']:
        if key in item and item[key]:
            temp_url = str(item[key])
            if temp_url.startswith('http'):
                url = temp_url
            elif temp_url.startswith('/'):
                url = urljoin(base_url, temp_url)
            # If a valid-looking product URL is found, use it
            if url and ('/ng/' in url or '/channel/' in url):
                break

    # If no direct URL found, try to construct from slug and ID
    if not url:
        # Generate slug from product name if no slug found
        if not slug and name:
            # Convert product name to slug format (lowercase, replace spaces/special chars with hyphens)
            slug = re.sub(r'[^\w\s-]', '', name.lower())
            slug = re.sub(r'[-\s]+', '-', slug)
            slug = slug.strip('-')
            print(f"Generated slug from name: {slug}")
        
        if product_id and slug:
            # Temu product URL pattern: /ng/{slug}-g-{product_id}.html
            url = f"{base_url}/ng/{slug}-g-{product_id}.html"
            print(f"Constructed URL with ID and slug: {url}")
        elif slug:
            # If no product_id, try with just slug (less ideal)
            url = f"{base_url}/ng/{slug}.html"
            print(f"Constructed URL with slug only: {url}")
        elif product_id:
            # If no slug but have product_id, create minimal slug from name
            if name:
                minimal_slug = re.sub(r'[^\w]', '-', name.lower())[:50]
                minimal_slug = re.sub(r'-+', '-', minimal_slug).strip('-')
                url = f"{base_url}/ng/{minimal_slug}-g-{product_id}.html"
                print(f"Constructed URL with minimal slug: {url}")

    # If still no URL, fall back to search URL (as a last resort)
    if not url:
        if name:
            url = f"{base_url}/search?q={quote(name)}"
            print(f"Warning: Falling back to search URL for '{name[:50]}...'")
        else:
            # Absolute fallback
            url = base_url
            print("Error: Could not construct any URL - using base URL")

    # Extract image (basic extraction - will be enhanced later)
    image_url = None
    for key in ['image', 'image_url', 'thumbnail', 'photo', 'picture', 'images', 'goods_img']:
        if key in item and item[key]:
            img_val = item[key]
            if isinstance(img_val, str):
                image_url = img_val
            elif isinstance(img_val, list) and img_val:
                image_url = str(img_val[0])
            elif isinstance(img_val, dict):
                for subkey in ['url', 'src', 'href']:
                    if subkey in img_val:
                        image_url = str(img_val[subkey])
                        break

            if image_url:
                if image_url.startswith('//'):
                    image_url = 'https:' + image_url
                elif image_url.startswith('/'):
                    image_url = base_url + image_url

                # NEW: Process with get_base_image_url if it's a Cloudinary URL
                if "img.kwcdn.com" in image_url:
                    processed_url = get_base_image_url(image_url)
                    if processed_url:
                        image_url = processed_url
                        print(f"   img.kwcdn.com URL processed to base: {image_url[:80]}...")
                break

    if not price > 0:
        print(f"Invalid product price detected: {price}")
        return
    # str_price = price_val['price_str'].replace('₦', '').replace(',', '').strip() if price_val['price_str'].startswith('₦') else price_val['price_str']
    return {
        'name': name[:200],
        'site': SITE_NAME if 'SITE_NAME' in globals() else 'Temu',
        'url': url,
        'price': price,
        'image_url': image_url or '',  # Will be enhanced later
        'stock': True,
        'return_policy': "Return within 7 days",
        'last_checked': now() if 'now' in globals() else None
    }



def try_direct_backend_api(query):
    """Try the backend.temu.com endpoint we found"""
    import requests

    endpoints_to_try = [
        f"{API_URL}/search?q={quote(query)}",
        f"{BASE_URL}/api/search?query={quote(query)}",
        f"{BASE_URL}/backend/search?q={quote(query)}",
    ]

    session = requests.Session()
    session.headers.update({
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
        'Accept': 'application/json, text/plain, */*',
        'Referer': BASE_URL,
        'Origin': BASE_URL
    })

    for endpoint in endpoints_to_try:
        try:
            response = session.get(endpoint, timeout=10)
            if response.status_code == 200:
                try:
                    data = response.json()
                    products = extract_products_from_api_data(data, endpoint)
                    if products:
                        return products

                except json.JSONDecodeError:
                    print(f"   ❌ Not JSON response")
                except Exception as e:
                    print(f"   ❌ Error processing: {e}")

        except requests.RequestException as e:
            print(f"   ❌ Request failed: {e}")

    return []

def main_temu(query):
    """Comprehensive Temu scraping approach"""
    start_time = time.time()
    products = []
    MAX_PRODUCTS = 10 # Define the maximum number of products to return

    print("=== TRYING DIRECT BACKEND API ===")
    products = try_direct_backend_api(query)
    
    # If direct API found more than MAX_PRODUCTS, trim it
    if len(products) > MAX_PRODUCTS:
        products = products[:MAX_PRODUCTS]

    if products:
        print(f"✅ Success with direct API! Found {len(products)} products")
    else:
        print("\n=== INTERCEPTING FRONTEND API CALLS ===")
        products, api_calls = intercept_temu_api_calls(query)

        # If API interception found more than MAX_PRODUCTS, trim it
        if len(products) > MAX_PRODUCTS:
            products = products[:MAX_PRODUCTS]

        if products:
            print(f"✅ Success with API interception! Found {len(products)} products")
        else:
            print("\n❌ All API methods failed")

            # Save API call information for debugging
            if api_calls:
                print(f"\n📝 Captured {len(api_calls)} API calls for manual analysis:")
                for i, call in enumerate(api_calls[:5]):
                    print(f"{i+1}. {call['url']} mmmmmmm")

    # Extract images for the (now potentially limited) products
    if products:
        print("\n=== EXTRACTING PRODUCT IMAGES ===")
        products = extract_images_for_products(products)
        # temu_db(products)
    end_time = time.time()
    print(f"\n✨ Script finished in {end_time - start_time:.2f} seconds.")

    return products

def temu_db(products):
    """Save products to database"""
    if not products:
        print("No products to store.")
        return

    success_count = 0
    for item in products:
        try:
            if not item.get('name') or not item.get('url') or not item.get('price') > 0:
                print(f"Skipping product due to missing name or URL or invalid price: {item}")
                continue

            product, created = Product.objects.update_or_create(
                name=item['name'][:200],
                site=item['site'],
                url=item['url'],
                defaults={
                    'price': max(0, item.get('price', 0)),
                    'image_url': item.get('image_url', '')[:500],
                    'stock': item.get('stock', True),
                    'return_policy': item.get('return_policy', 'Return within 7 days'),
                    'last_checked': now()
                }
            )

            action = "Created" if created else "Updated"
            image_status = "✅" if product.image_url else "❌"
            print(f"{action}: {product.name[:50]}... - ₦{product.price:,} {image_status}")
            success_count += 1

        except Exception as e:
            print(f"DB Error for product {item.get('name', 'N/A')} (URL: {item.get('url', 'N/A')}): {e}")

    print(f"Successfully saved {success_count} products")
   

