Spaces:

Invescoz
/

Server-B

Runtime error

App Files Files Community

Invescoz commited on Sep 8, 2025

Commit

2795928

verified ·

1 Parent(s): 564e391

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -159

app.py CHANGED Viewed

@@ -1,163 +1,42 @@
-import gradio as gr
-import subprocess
-import sys
 import torch
-import requests
-from bs4 import BeautifulSoup
-from sentence_transformers import SentenceTransformer, util
-from duckduckgo_search import DDGS
-from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
-from typing import Generator
-# Install dependencies at runtime if not found
-try:
-    from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
-except ImportError:
-    print("Installing transformers...")
-    subprocess.check_call([sys.executable, "-m", "pip", "install", "transformers==4.44.2"])
-    from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
-# Initialize model and tokenizer
-model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
-try:
-    tokenizer = AutoTokenizer.from_pretrained(model_name)
-    model = AutoModelForCausalLM.from_pretrained(
-        model_name,
-        device_map="auto",  # Offload to CPU
-        torch_dtype=torch.float16,  # Optimize memory
-        trust_remote_code=True,
-        low_cpu_mem_usage=True  # Reduce memory overhead
-    )
-except Exception as e:
-    print(f"Error loading model: {e}")
-    raise
-# Initialize sentence-transformers for indexing
-embedder = SentenceTransformer('all-MiniLM-L6-v2')
-# In-memory index (list of documents and embeddings)
-document_index = []
-embeddings = []
-def crawl_website(url: str) -> str:
-    """Crawl a website and return text content."""
-    try:
-        response = requests.get(url, timeout=10)
-        response.raise_for_status()
-        soup = BeautifulSoup(response.text, 'html.parser')
-        text = soup.get_text(separator=' ', strip=True)
-        return text[:2000]  # Limit to 2000 chars for performance
-    except Exception as e:
-        return f"Error crawling {url}: {str(e)}"
-def index_data(text: str):
-    """Index crawled data for similarity search."""
-    document_index.append(text)
-    embeddings.append(embedder.encode(text))
-    return "Data indexed successfully."
-def search_index(query: str) -> str:
-    """Search indexed data using similarity."""
-    if not embeddings:
-        return "No data indexed yet."
-    query_emb = embedder.encode(query)
-    hits = util.semantic_search(query_emb, embeddings, top_k=1)[0]
-    if hits:
-        return document_index[hits[0]['corpus_id']][:500]  # Limit to 500 chars
-    return "No relevant data found."
-def web_search(query: str) -> str:
-    """Perform web search using DuckDuckGo."""
-    try:
-        with DDGS() as ddgs:
-            results = list(ddgs.text(query, max_results=3))
-        return "\n".join([f"{r['title']}: {r['body']}" for r in results])
-    except Exception as e:
-        return f"Error performing web search: {str(e)}"
-def extract_image_links(url: str) -> str:
-    """Extract image links from a webpage."""
-    try:
-        response = requests.get(url, timeout=10)
-        response.raise_for_status()
-        soup = BeautifulSoup(response.text, 'html.parser')
-        images = soup.find_all('img')
-        links = [img['src'] for img in images if 'src' in img.attrs]
-        return "\n".join(links[:5]) or "No images found."
-    except Exception as e:
-        return f"Error extracting images from {url}: {str(e)}"
-def generate_ai_reasoning(prompt: str) -> Generator[str, None, None]:
-    """
-    Uses TinyLlama for AI reasoning, integrating web crawling, indexing, search, and image links.
-    """
-    system_prompt = (
-        "You are an AI reasoning assistant like Grok, capable of logical analysis and web operations. "
-        "Given a user prompt, provide a reasoned response. You can crawl websites, index data, perform web searches, "
-        "and extract image links if requested. Stream the output line by line. Use bullet points for key insights. "
-        "If the prompt is vague (e.g., 'Hi'), request more details and provide a general response. "
-        "For astrology queries, offer vivid, optimistic predictions based on user-provided zodiac or birth date."
-    )
-    # Handle specific commands
-    response_prefix = ""
-    if "crawl" in prompt.lower():
-        url = prompt.split("crawl")[-1].strip().split()[0]
-        crawled_text = crawl_website(url)
-        index_data(crawled_text)
-        response_prefix = f"Crawled {url}:\n{crawled_text[:500]}\n\nIndexed data.\n\n"
-    elif "search index" in prompt.lower():
-        query = prompt.split("search index")[-1].strip()
-        response_prefix = f"Indexed search result:\n{search_index(query)}\n\n"
-    elif "search web" in prompt.lower():
-        query = prompt.split("search web")[-1].strip()
-        response_prefix = f"Web search results:\n{web_search(query)}\n\n"
-    elif "image links" in prompt.lower():
-        url = prompt.split("image links")[-1].strip().split()[0]
-        response_prefix = f"Image links from {url}:\n{extract_image_links(url)}\n\n"
-    full_prompt = f"<|SYSTEM|> {system_prompt}\n<|USER|> {prompt}\n<|ASSISTANT|> {response_prefix}"
-    # Tokenize input
-    inputs = tokenizer(full_prompt, return_tensors="pt").to("cpu")
-    # Stream output
-    streamer = TextStreamer(tokenizer, skip_prompt=True)
-    for token in model.generate(
-        **inputs,
-        max_length=1000,
-        temperature=0.7,
-        top_p=0.9,
-        do_sample=True,
-        streamer=streamer
-    ):
-        content = tokenizer.decode(token, skip_special_tokens=True)
-        if content:
-            yield content
-# Gradio interface with streaming
-def live_ai_reasoner(prompt: str):
-    """Handles streaming AI reasoning."""
-    output = ""
-    for chunk in generate_ai_reasoning(prompt):
-        output += chunk
-        yield output
-# Gradio app
-with gr.Blocks() as demo:
-    gr.Markdown("# Invescoz AI Studio: AI Reasoning with Web Tools")
-    prompt_input = gr.Textbox(
-        label="Enter your query",
-        placeholder="e.g., Crawl https://example.com for Scorpio predictions, Search web for AI trends, or Image links from https://example.com"
-    )
-    output_display = gr.Textbox(label="AI Response", interactive=False, lines=10)
-    submit_button = gr.Button("Reason")
-    submit_button.click(
-        fn=live_ai_reasoner,
-        inputs=prompt_input,
-        outputs=output_display
-    )
-# Launch the app (handled by Hugging Face Spaces)
-demo.launch()

+from fastapi import FastAPI
+from pydantic import BaseModel
+from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
+app = FastAPI()
+# Load Qwen2-1.5B-Instruct model and tokenizer
+model_name = "Qwen/Qwen2-1.5B-Instruct"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+    device_map="auto"
+)
+class ChatRequest(BaseModel):
+    message: str
+@app.post("/chat")
+async def chat(request: ChatRequest):
+    # Prepare input for Qwen model
+    messages = [{"role": "user", "content": request.message}]
+    text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+    inputs = tokenizer([text], return_tensors="pt").to(model.device)
+    # Generate response
+    with torch.no_grad():
+        outputs = model.generate(
+            **inputs,
+            max_new_tokens=200,
+            temperature=0.7,
+            do_sample=True,
+            top_p=0.8
+        )
+    response = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
+    return {"response": response.strip()}
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)