from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException
import time
import json
import re
from urllib.parse import quote

def scrape_temu_products(search_term, num_products=10):
    """
    Scrape Temu products using their actual search URL structure
    """
    # Set up Chrome options to avoid detection
    chrome_options = Options()
    chrome_options.add_argument("--headless=new")
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--disable-dev-shm-usage")
    chrome_options.add_argument("--window-size=1400,900")
    chrome_options.add_argument("--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
    chrome_options.add_argument("--disable-blink-features=AutomationControlled")
    chrome_options.add_experimental_option("excludeSwitches", ["enable-automation", "enable-logging"])
    chrome_options.add_experimental_option('useAutomationExtension', False)
    
    driver = webdriver.Chrome(options=chrome_options)
    driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
    
    products = []
    
    try:
        # Construct the exact URL pattern you provided
        search_url = f"https://www.temu.com/search_result.html?search_key={quote(search_term)}&search_method=user&refer_page_el_sn=200010&srch_enter_source=top_search_entrance_10005&refer_page_name=home&refer_page_id=10005_{int(time.time()*1000)}_dmleundgd2&refer_page_sn=10005&_x_sessn_id=q7wynd4dej"
        
        print(f"Navigating to: {search_url}")
        driver.get(search_url)
        
        # Wait for page to load
        time.sleep(8)
        
        # Scroll to load more products
        for i in range(1, 4):
            scroll_position = i * 800
            driver.execute_script(f"window.scrollTo(0, {scroll_position});")
            print(f"Scrolled to position: {scroll_position}")
            time.sleep(2)
        
        # Take screenshot for debugging
        driver.save_screenshot('temu_search_results.png')
        print("Screenshot saved as temu_search_results.png")
        
        # Save page source for analysis
        page_source = driver.page_source
        with open('temu_page_source.html', 'w', encoding='utf-8') as f:
            f.write(page_source)
        
        # METHOD 1: Look for product data in script tags (common in Temu)
        print("Searching for product data in script tags...")
        script_patterns = [
            r'window\.__APP_INITIAL_STATE__\s*=\s*({.*?});',
            r'window\.initialState\s*=\s*({.*?});',
            r'var\s+productData\s*=\s*({.*?});',
            r'"productInfo":\s*(\[.*?\]),',
            r'"goodsList":\s*(\[.*?\]),',
            r'"items":\s*(\[.*?\]),'
        ]
        
        for pattern in script_patterns:
            matches = re.findall(pattern, page_source, re.DOTALL)
            if matches:
                print(f"Found data with pattern: {pattern[:50]}...")
                try:
                    product_data = json.loads(matches[0])
                    extracted_products = extract_products_from_json(product_data, num_products)
                    if extracted_products:
                        print(f"Extracted {len(extracted_products)} products from JSON")
                        return extracted_products
                except json.JSONDecodeError as e:
                    print(f"JSON decode error: {e}")
                    continue
        
        # METHOD 2: Direct DOM scraping for product cards
        print("Attempting direct DOM scraping...")
        
        # Temu-specific selectors based on their current structure
        temu_selectors = [
            "div[data-product-id]",
            "div._1hVov",  # Product card
            "div._6-CkO",  # Product container
            "div._2Kn8j",  # Product info
            "div._3mKVV",  # Price container
            "div[class*='product-card']",
            "div[class*='goods-item']",
            "article[data-product]",
            "section[data-product-list]"
        ]
        
        for selector in temu_selectors:
            try:
                elements = driver.find_elements(By.CSS_SELECTOR, selector)
                if elements:
                    print(f"Found {len(elements)} elements with selector: {selector}")
                    for i, element in enumerate(elements[:num_products]):
                        try:
                            product_info = extract_product_info(element)
                            if product_info:
                                products.append(product_info)
                        except Exception as e:
                            print(f"Error processing element {i}: {e}")
                            continue
                    if products:
                        break
            except Exception as e:
                print(f"Error with selector {selector}: {e}")
                continue
        
        # METHOD 3: Text-based extraction as fallback
        if not products:
            print("Using text-based extraction fallback...")
            body = driver.find_element(By.TAG_NAME, "body")
            body_text = body.text
            products = extract_products_from_text(body_text, num_products)
        
        return products[:num_products]
        
    except Exception as e:
        print(f"Error during scraping: {e}")
        return []
    finally:
        driver.quit()

def extract_product_info(element):
    """Extract product information from a DOM element"""
    try:
        text = element.text.strip()
        if not text or len(text) < 20:
            return None
        
        # Extract name (first meaningful line)
        lines = [line.strip() for line in text.split('\n') if line.strip()]
        name = None
        for line in lines:
            if (len(line) > 3 and 
                not any(keyword in line.lower() for keyword in ['$', 'usd', 'buy', 'add to cart', 'sign in', 'login'])):
                name = line[:80] + "..." if len(line) > 80 else line
                break
        
        if not name:
            name = lines[0][:80] + "..." if lines else "Unknown Product"
        
        # Extract price
        price = extract_price(text)
        
        # Try to extract image
        try:
            img = element.find_element(By.TAG_NAME, "img")
            image_url = img.get_attribute('src') or img.get_attribute('data-src') or ""
        except:
            image_url = ""
        
        # Try to extract link
        try:
            link = element.find_element(By.TAG_NAME, "a")
            product_url = link.get_attribute('href') or ""
        except:
            product_url = ""
        
        return {
            'name': name,
            'price': price,
            'image_url': image_url,
            'product_url': product_url,
            'snippet': text[:100] + '...' if len(text) > 100 else text
        }
        
    except Exception as e:
        print(f"Error extracting product info: {e}")
        return None

def extract_price(text):
    """Extract price from text"""
    price_patterns = [
        r'\$\d+\.\d{2}',  # $12.99
        r'\$\d+',         # $12
        r'USD\s*\d+\.\d{2}',  # USD 12.99
    ]
    
    for pattern in price_patterns:
        match = re.search(pattern, text)
        if match:
            return match.group(0)
    
    return "Price not found"

def extract_products_from_json(data, max_products):
    """Recursively search for products in JSON data"""
    products = []
    
    if isinstance(data, dict):
        # Check for common product keys
        for key, value in data.items():
            key_lower = key.lower()
            if any(prod_key in key_lower for prod_key in ['product', 'item', 'good', 'sku']):
                if isinstance(value, list):
                    for item in value[:max_products - len(products)]:
                        if isinstance(item, dict):
                            product = create_product_from_dict(item)
                            if product:
                                products.append(product)
                elif isinstance(value, dict):
                    product = create_product_from_dict(value)
                    if product:
                        products.append(product)
            
            # Recursively search nested structures
            if isinstance(value, (dict, list)) and len(products) < max_products:
                products.extend(extract_products_from_json(value, max_products - len(products)))
    
    elif isinstance(data, list):
        for item in data:
            if isinstance(item, (dict, list)) and len(products) < max_products:
                products.extend(extract_products_from_json(item, max_products - len(products)))
    
    return products[:max_products]

def create_product_from_dict(item):
    """Create product object from dictionary"""
    try:
        name = item.get('name') or item.get('title') or item.get('productName') or 'Unknown Product'
        price = item.get('price') or item.get('currentPrice') or item.get('minPrice') or 'Price not found'
        
        # Format price if it's a number
        if isinstance(price, (int, float)):
            price = f"${price:.2f}"
        
        return {
            'name': str(name)[:100],
            'price': str(price),
            'image_url': item.get('imageUrl') or item.get('image') or item.get('imgUrl') or '',
            'product_url': item.get('url') or item.get('link') or item.get('productUrl') or ''
        }
    except:
        return None

def extract_products_from_text(text, max_products):
    """Extract products from plain text"""
    products = []
    lines = text.split('\n')
    
    for line in lines:
        line = line.strip()
        if (len(line) > 30 and 
            any(char in line for char in ['$', 'USD']) and
            not any(keyword in line.lower() for keyword in ['sign in', 'login', 'register', 'menu', 'navigation'])):
            
            price = extract_price(line)
            name = line.replace(price, '').strip()[:80] + "..." if len(line) > 80 else line
            
            products.append({
                'name': name,
                'price': price,
                'snippet': line[:100] + '...' if len(line) > 100 else line
            })
            
            if len(products) >= max_products:
                break
    
    return products
