From cdaec2da179b37e0504aa658f0d233198b13f574 Mon Sep 17 00:00:00 2001 From: navanchauhan Date: Fri, 20 May 2022 18:30:27 -0600 Subject: [PATCH] added code --- database.py | 49 +++++++++++++++++++++ main.py | 122 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 171 insertions(+) create mode 100644 database.py create mode 100644 main.py diff --git a/database.py b/database.py new file mode 100644 index 0000000..369cf97 --- /dev/null +++ b/database.py @@ -0,0 +1,49 @@ +import sqlalchemy +from sqlalchemy import create_engine +from sqlalchemy import Table, Column, Integer, String, MetaData, ForeignKey, PickleType +from sqlalchemy import insert +from sqlalchemy.orm import sessionmaker +from sqlalchemy.exc import IntegrityError + +#database_url = "sqlite:///jlm.db" + +meta = MetaData() + +movies_table = Table( + "movies", + meta, + Column("trakt_id", Integer, primary_key=True, autoincrement=False), + Column("title", String), + Column("overview", String), + Column("genres", String), + Column("year", Integer), + Column("released", String), + Column("runtime", Integer), + Column("country", String), + Column("language", String), + Column("rating", Integer), + Column("votes", Integer), + Column("comment_count", Integer), + Column("tagline", String), + Column("embeddings", PickleType) + +) + +def init_db_stuff(database_url: str): + engine = create_engine(database_url) + meta.create_all(engine) + Session = sessionmaker(bind=engine) + return engine, Session +""" + movie = { + "title": movie["movie"]["title"], + "overview": movie["movie"]["overview"], + "genres": movie["movie"]["genres"], + "language": movie["movie"]["language"], + "year": movie["movie"]["year"], + "trakt_id": movie["movie"]["ids"]["trakt"], + "released": movie["movie"]["released"], + "runtime": movie["movie"]["runtime"], + "country": movie["movie"]["country"] + } +""" \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..1030a62 --- /dev/null +++ b/main.py @@ -0,0 +1,122 @@ +import requests +import os +from database import * +from tqdm import tqdm + +from datetime import datetime + +import time + +trakt_id = os.getenv("TRAKT_ID") +trakt_se = os.getenv("TRAKT_SE") + +max_requests = 5000 # How many requests do you want to make +req_count = 0 + +years = "1900-2021" +page = 1 +extended = "full" # Required to get additional information +limit = "10" # No of entires per request +languages = "en" # Limit to particular language + +api_base = "https://api.trakt.tv" +database_url = "sqlite:///jlm.db" + +headers = { + "Content-Type": "application/json", + "trakt-api-version": "2", + "trakt-api-key": trakt_id +} + +params = { + "query": "", + "years": years, + "page": page, + "extended": extended, + "limit": limit, + "languages": languages +} + + +def create_movie_dict(movie: dict): + movie = { + "title": movie["movie"]["title"], + "overview": movie["movie"]["overview"], + "genres": movie["movie"]["genres"], + "language": movie["movie"]["language"], + "year": int(movie["movie"]["year"]), + "trakt_id": movie["movie"]["ids"]["trakt"], + "released": movie["movie"]["released"], + "runtime": int(movie["movie"]["runtime"]), + "country": movie["movie"]["country"], + "rating": int(movie["movie"]["rating"]), + "votes": int(movie["movie"]["votes"]), + "comment_count": int(movie["movie"]["comment_count"]), + "tagline": movie["movie"]["tagline"] + } + return movie + + + +params["limit"] = 1 +res = requests.get(f"{api_base}/search/movie",headers=headers,params=params) +total_items = res.headers["x-pagination-item-count"] + +print(f"There are {total_items} movies") +print(f"Started from page {page}") + +""" +movies = [] +params["limit"] = limit +res = requests.get(f"{api_base}/search/movie",headers=headers,params=params) + +if res.status_code == 200: + for movie in res.json(): + movies.append(create_movie_dict(movie)) + print(create_movie_dict(movie)["title"]) +""" +engine, Session = init_db_stuff(database_url) + +start_time = datetime.now() + +for page in tqdm(range(2990,max_requests+10)): + if req_count == 999: + seconds_to_sleep = 300 - (datetime.now() - start_time).seconds + if seconds_to_sleep < 1: + seconds_to_sleep = 100 + print(f"Sleeping {seconds_to_sleep}s") + # Need to respect their rate limitting + time.sleep(seconds_to_sleep) + start_time = datetime.now() + req_count = 0 + + params["page"] = page + params["limit"] = int(int(total_items)/max_requests) + movies = [] + res = requests.get(f"{api_base}/search/movie",headers=headers,params=params) + + if res.status_code == 500: + break + elif res.status_code == 200: + None + else: + print(f"OwO Code {res.status_code}") + + for movie in res.json(): + movies.append(create_movie_dict(movie)) + + with engine.connect() as conn: + for movie in movies: + with conn.begin() as trans: + stmt = insert(movies_table).values( + trakt_id=movie["trakt_id"], title=movie["title"], genres=" ".join(movie["genres"]), + language=movie["language"], year=movie["year"], released=movie["released"], + runtime=movie["runtime"], country=movie["country"], overview=movie["overview"], + rating=movie["rating"], votes=movie["votes"], comment_count=movie["comment_count"], + tagline=movie["tagline"]) + try: + result = conn.execute(stmt) + trans.commit() + except IntegrityError: + trans.rollback() + req_count += 1