from playwright.sync_api import sync_playwright, TimeoutError as PWTimeoutError
import urllib.parse
import re
import time
import threading
from queue import Queue

BASE_URL = "https://www.konga.com"

def clean_price(raw_price: str):
    if not raw_price:
        return 0
    cleaned = re.sub(r"[^\d.]", "", raw_price)
    try:
        return int(cleaned)
    except ValueError:
        return 0

def scrape_product(page, url: str):
    """Scrapes a single Konga product page, returning None if the product is unavailable."""
    try:
        page.goto(url, wait_until="load", timeout=60000)
    except PWTimeoutError:
        print(f"Network timeout while loading {url}. Skipping.")
        return None
    except Exception as e:
        print(f"Failed to load {url}: {str(e)}. Skipping.")
        return None

    # --- 1. Out of Stock Check (Explicit and Robust) ---
    # NEW: Using the specific outer class and inner h3 tag provided by the user.
    out_of_stock_selector = "div.emptyResult_emptyResultInfo__ h3:has-text('Item is out of stock')" 
    
    if page.locator(out_of_stock_selector).count() > 0:
        print(f"Item at {url} explicitly marked 'Out of Stock' using new selector. Skipping.")
        return None # <-- SKIP: Return None
    
    # --- 2. Scrape Title (Required) ---
    title_locator = page.locator("h4[class^='productDetail_productName__']")
    title = title_locator.inner_text().strip() if title_locator.count() > 0 else None
    
    # --- 3. Missing Title Check (Implied Out of Stock/Unavailable) ---
    if not title:
        # Check for the common "Keep calm and try searching..." message 
        # that appears when a product is de-listed or OOS, as a backup.
        try:
            # Look for the container that usually holds the product details
            content_text = page.locator("div[class^='productDetail_content__']").inner_text()
            if "Keep calm and try searching" in content_text:
                print(f"Item at {url} name missing, detected unavailable message. Skipping.")
                return None # <-- SKIP: Return None
        except:
            # If we can't get the content text and the title is missing, assume unparsable/unavailable
            print(f"Could not find product title or parse page content for {url}. Skipping.")
            return None # <-- SKIP: Return None

    # --- 4. Continue scraping if the item is available ---

    # Scrape Price
    price = None
    try:
        price_container = page.locator("div.priceBox_priceBoxPrice__i7paS")
        if price_container.count():
            price = price_container.first.inner_text().strip()
    except:
        price = None

    # Scrape SKU (Optional)
    sku = None
    try:
        sku_el = page.locator("text=Product Code").locator("xpath=..")
        if sku_el.count():
            sku_text = sku_el.inner_text()
            # Ensure text exists before splitting
            sku = sku_text.split()[-1] if sku_text and sku_text.split() else None
    except:
        pass

    # Scrape Features
    features = []
    try:
        desc_nodes = page.locator("div[role='presentation'] li")
        for i in range(desc_nodes.count()):
            features.append(desc_nodes.nth(i).inner_text())
    except:
        pass

    # Scrape Image URL
    image_url = None
    try:
        image_url = page.locator("meta[property='og:image']").get_attribute("content")
    except:
        pass

    # Return the successfully scraped product
    return {
        "url": url,
        "name": title,
        "site": "Konga",
        "price": clean_price(price),
        "image_url": image_url,
        "stock": True, # Assumed in stock
        "return_policy": "Return within 7 days",
    }

def scrape_konga_batch(query: str, batch_size=3, max_products=20, batch_callback=None):
    """
    Scrape Konga and return results in batches to reduce waiting time.
    
    Args:
        query: Search query
        batch_size: Number of products per batch
        max_products: Maximum products to scrape
        batch_callback: Function to call with each batch of products
    """
    start_time = time.time()
    search_url = f"{BASE_URL}/search?search={urllib.parse.quote(query)}"

    try:
        with sync_playwright() as p:
            browser = p.chromium.launch(headless=True)
            page = browser.new_page()

            try:
                page.goto(search_url, wait_until="load", timeout=90000)
            except PWTimeoutError:
                error_msg = f"Network timeout while searching for '{query}'"
                if batch_callback:
                    batch_callback([], completed=True, error=error_msg)
                return []
            except Exception as e:
                error_msg = f"Failed to load search page: {str(e)}"
                if batch_callback:
                    batch_callback([], completed=True, error=error_msg)
                return []

            try:
                page.wait_for_selector("main a[href*='/product/']", timeout=30000)
            except PWTimeoutError:
                error_msg = f"No products found or network too slow for '{query}'"
                if batch_callback:
                    batch_callback([], completed=True, error=error_msg)
                browser.close()
                return []

            product_links = page.query_selector_all("main a[href*='/product/']")
            urls = []
            for link in product_links:
                href = link.get_attribute("href")
                if href and href.startswith("/product/"):
                    full_url = urllib.parse.urljoin(BASE_URL, href)
                    if full_url not in urls:
                        urls.append(full_url)

            print(f"Found {len(urls)} products on Konga")
            all_results = []

            # Process in batches
            for i in range(0, min(len(urls), max_products), batch_size):
                batch_urls = urls[i:i + batch_size]
                batch_results = []
                
                for url in batch_urls:
                    try:
                        product_data = scrape_product(page, url)
                        if product_data is not None:
                            # 2. Check for an explicit 'error' key if it's a dict
                            if "error" not in product_data: 
                                batch_results.append(product_data)
                            else:
                                # This handles the case where scrape_product returns a {'error': ...} dict
                                print(f"Error scraping {url}: {product_data['error']}")
                    except Exception as e:
                        print(f"Error scraping product: {e}")
                        continue
                
                all_results.extend(batch_results)
                
                # Send batch to callback if provided
                if batch_callback:
                    is_completed = (i + batch_size >= min(len(urls), max_products))
                    batch_callback(batch_results, completed=is_completed)
                
                # Small delay between batches
                time.sleep(0.5)

            browser.close()
            end_time = time.time()
            print(f"✨ Konga batch scraping finished in {end_time - start_time:.2f} seconds. Found {len(all_results)} products.")
            
            return all_results

    except Exception as e:
        error_msg = f"Unexpected error: {str(e)}"
        if batch_callback:
            batch_callback([], completed=True, error=error_msg)
        return []

def scrape_konga_async(query: str, batch_size=3, max_products=20):
    """
    Non-blocking version that uses threading to return batches immediately.
    Returns a queue that will receive batches.
    """
    def scraper_thread(query, batch_size, max_products, queue):
        try:
            def batch_handler(batch, completed=False, error=None):
                queue.put({
                    'batch': batch,
                    'completed': completed,
                    'error': error,
                    'site': 'konga'
                })
            
            scrape_konga_batch(query, batch_size, max_products, batch_handler)
        except Exception as e:
            queue.put({
                'batch': [],
                'completed': True,
                'error': str(e),
                'site': 'konga'
            })
    
    queue = Queue()
    thread = threading.Thread(
        target=scraper_thread, 
        args=(query, batch_size, max_products, queue),
        daemon=True
    )
    thread.start()
    
    return queue

# Original function maintained for backward compatibility
def scrape_konga(query: str, limit: int = 100):
    """Original synchronous version - returns all products at once"""
    return scrape_konga_batch(query, batch_size=limit, max_products=limit)

# Enhanced scraper class with progress tracking
class KongaScraper:
    def __init__(self, batch_size=3, max_products=20):
        self.batch_size = batch_size
        self.max_products = max_products
    
    def scrape_with_progress(self, query, progress_callback=None):
        """Scrape with progress updates"""
        batches_received = 0
        total_products = 0
        
        def handle_batch(batch, completed=False, error=None):
            nonlocal batches_received, total_products
            
            if error:
                if progress_callback:
                    progress_callback([], completed=True, error=error, site='konga')
                return
            
            total_products += len(batch)
            batches_received += 1
            
            if progress_callback:
                progress_callback(
                    batch, 
                    completed=completed, 
                    total_so_far=total_products,
                    site='konga',
                    batch_number=batches_received
                )
        
        return scrape_konga_batch(
            query, 
            batch_size=self.batch_size,
            max_products=self.max_products,
            batch_callback=handle_batch
        )
